From 9ca4a157305e4e23a892ba9bafc9eee0f66954ce Mon Sep 17 00:00:00 2001 From: Ido Barnea Date: Sun, 5 Feb 2017 15:21:19 +0200 Subject: dpdk1702-rc2 upstream files unchanged + mlx5 driver rc3 Signed-off-by: Ido Barnea --- src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c | 160 +- src/dpdk/drivers/net/bnx2x/bnx2x.c | 43 +- src/dpdk/drivers/net/bnx2x/bnx2x.h | 131 +- src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c | 74 +- src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c | 16 +- src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h | 6 - src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c | 350 ++- src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h | 17 +- src/dpdk/drivers/net/bnx2x/debug.c | 96 - src/dpdk/drivers/net/bnx2x/elink.c | 394 +-- src/dpdk/drivers/net/bnx2x/elink.h | 4 - src/dpdk/drivers/net/bonding/rte_eth_bond_api.c | 101 +- src/dpdk/drivers/net/bonding/rte_eth_bond_args.c | 12 +- src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c | 99 +- .../drivers/net/bonding/rte_eth_bond_private.h | 6 +- src/dpdk/drivers/net/cxgbe/base/adapter.h | 34 +- src/dpdk/drivers/net/cxgbe/base/t4_hw.c | 2 +- src/dpdk/drivers/net/cxgbe/cxgbe_compat.h | 8 +- src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c | 34 +- src/dpdk/drivers/net/cxgbe/cxgbe_main.c | 10 +- src/dpdk/drivers/net/cxgbe/sge.c | 23 +- src/dpdk/drivers/net/e1000/base/e1000_82575.c | 1 - src/dpdk/drivers/net/e1000/base/e1000_82575.h | 1 + src/dpdk/drivers/net/e1000/base/e1000_api.c | 24 +- src/dpdk/drivers/net/e1000/base/e1000_defines.h | 9 + src/dpdk/drivers/net/e1000/base/e1000_hw.h | 21 +- src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c | 865 +++++- src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h | 21 +- src/dpdk/drivers/net/e1000/base/e1000_mbx.c | 36 +- src/dpdk/drivers/net/e1000/base/e1000_nvm.c | 1 + src/dpdk/drivers/net/e1000/base/e1000_osdep.h | 18 +- src/dpdk/drivers/net/e1000/base/e1000_regs.h | 7 + src/dpdk/drivers/net/e1000/e1000_ethdev.h | 13 + src/dpdk/drivers/net/e1000/em_ethdev.c | 132 +- src/dpdk/drivers/net/e1000/em_rxtx.c | 51 +- src/dpdk/drivers/net/e1000/igb_ethdev.c | 297 +- src/dpdk/drivers/net/e1000/igb_pf.c | 4 +- src/dpdk/drivers/net/e1000/igb_rxtx.c | 62 +- src/dpdk/drivers/net/enic/base/vnic_dev.c | 37 +- src/dpdk/drivers/net/enic/base/vnic_rq.c | 6 +- src/dpdk/drivers/net/enic/base/vnic_rq.h | 2 + src/dpdk/drivers/net/enic/enic.h | 41 +- src/dpdk/drivers/net/enic/enic_clsf.c | 66 +- src/dpdk/drivers/net/enic/enic_compat.h | 27 +- src/dpdk/drivers/net/enic/enic_ethdev.c | 103 +- src/dpdk/drivers/net/enic/enic_main.c | 279 +- src/dpdk/drivers/net/enic/enic_res.c | 5 +- src/dpdk/drivers/net/enic/enic_res.h | 6 +- src/dpdk/drivers/net/enic/enic_rxtx.c | 198 +- src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h | 17 +- src/dpdk/drivers/net/fm10k/fm10k.h | 6 + src/dpdk/drivers/net/fm10k/fm10k_ethdev.c | 133 +- src/dpdk/drivers/net/fm10k/fm10k_rxtx.c | 64 +- src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c | 29 +- src/dpdk/drivers/net/i40e/base/i40e_adminq.c | 4 +- src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h | 159 +- src/dpdk/drivers/net/i40e/base/i40e_common.c | 501 +++- src/dpdk/drivers/net/i40e/base/i40e_devids.h | 4 - src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c | 5 - src/dpdk/drivers/net/i40e/base/i40e_nvm.c | 52 +- src/dpdk/drivers/net/i40e/base/i40e_osdep.h | 10 +- src/dpdk/drivers/net/i40e/base/i40e_prototype.h | 33 +- src/dpdk/drivers/net/i40e/base/i40e_register.h | 2 - src/dpdk/drivers/net/i40e/base/i40e_type.h | 377 ++- src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h | 5 + src/dpdk/drivers/net/i40e/i40e_ethdev.c | 2755 +++++++++++++++---- src/dpdk/drivers/net/i40e/i40e_ethdev.h | 239 +- src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c | 346 +-- src/dpdk/drivers/net/i40e/i40e_fdir.c | 215 +- src/dpdk/drivers/net/i40e/i40e_flow.c | 1849 +++++++++++++ src/dpdk/drivers/net/i40e/i40e_pf.c | 449 ++- src/dpdk/drivers/net/i40e/i40e_pf.h | 12 +- src/dpdk/drivers/net/i40e/i40e_rxtx.c | 833 ++---- src/dpdk/drivers/net/i40e/i40e_rxtx.h | 571 ++++ src/dpdk/drivers/net/i40e/i40e_rxtx_vec.c | 761 ------ src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h | 251 ++ src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c | 614 +++++ src/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c | 633 +++++ src/dpdk/drivers/net/i40e/rte_pmd_i40e.h | 335 +++ src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c | 6 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c | 16 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c | 19 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h | 4 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c | 317 ++- src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h | 10 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c | 240 ++ src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h | 41 + src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h | 7 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h | 16 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c | 272 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h | 5 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h | 180 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c | 119 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h | 6 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c | 31 +- src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c | 1601 +++++++---- src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h | 65 +- src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c | 2339 ++++++++++++---- src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h | 258 +- src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c | 431 ++- src/dpdk/drivers/net/ixgbe/ixgbe_flow.c | 2878 ++++++++++++++++++++ src/dpdk/drivers/net/ixgbe/ixgbe_pf.c | 72 +- src/dpdk/drivers/net/ixgbe/ixgbe_regs.h | 42 +- src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c | 133 +- src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h | 4 +- src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 24 +- src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c | 6 + src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c | 73 +- src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h | 412 +++ src/dpdk/drivers/net/mlx4/mlx4.c | 120 +- src/dpdk/drivers/net/mlx4/mlx4.h | 2 +- src/dpdk/drivers/net/mlx5/mlx5.c | 586 +--- src/dpdk/drivers/net/mlx5/mlx5.h | 65 +- src/dpdk/drivers/net/mlx5/mlx5_defs.h | 41 +- src/dpdk/drivers/net/mlx5/mlx5_ethdev.c | 151 +- src/dpdk/drivers/net/mlx5/mlx5_fdir.c | 106 +- src/dpdk/drivers/net/mlx5/mlx5_flow.c | 1247 +++++++++ src/dpdk/drivers/net/mlx5/mlx5_prm.h | 141 +- src/dpdk/drivers/net/mlx5/mlx5_rxq.c | 4 +- src/dpdk/drivers/net/mlx5/mlx5_rxtx.c | 497 ++-- src/dpdk/drivers/net/mlx5/mlx5_rxtx.h | 19 +- src/dpdk/drivers/net/mlx5/mlx5_stats.c | 623 +++-- src/dpdk/drivers/net/mlx5/mlx5_trigger.c | 3 + src/dpdk/drivers/net/mlx5/mlx5_txq.c | 12 +- src/dpdk/drivers/net/null/rte_eth_null.c | 29 +- src/dpdk/drivers/net/ring/rte_eth_ring.c | 61 +- src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c | 108 +- src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h | 60 +- src/dpdk/drivers/net/virtio/virtio_ethdev.c | 748 +++-- src/dpdk/drivers/net/virtio/virtio_ethdev.h | 44 +- src/dpdk/drivers/net/virtio/virtio_pci.c | 213 +- src/dpdk/drivers/net/virtio/virtio_pci.h | 43 +- src/dpdk/drivers/net/virtio/virtio_rxtx.c | 588 ++-- src/dpdk/drivers/net/virtio/virtio_rxtx.h | 3 +- src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c | 272 +- src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h | 136 + .../drivers/net/virtio/virtio_rxtx_simple_neon.c | 235 ++ .../drivers/net/virtio/virtio_rxtx_simple_sse.c | 222 ++ src/dpdk/drivers/net/virtio/virtio_user/vhost.h | 123 + .../drivers/net/virtio/virtio_user/vhost_kernel.c | 403 +++ .../net/virtio/virtio_user/vhost_kernel_tap.c | 133 + .../net/virtio/virtio_user/vhost_kernel_tap.h | 67 + .../drivers/net/virtio/virtio_user/vhost_user.c | 467 ++++ .../net/virtio/virtio_user/virtio_user_dev.c | 414 +++ .../net/virtio/virtio_user/virtio_user_dev.h | 75 + src/dpdk/drivers/net/virtio/virtio_user_ethdev.c | 99 +- src/dpdk/drivers/net/virtio/virtqueue.c | 11 - src/dpdk/drivers/net/virtio/virtqueue.h | 27 +- src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c | 166 +- src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h | 42 +- src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h | 22 +- src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c | 189 +- src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c | 29 +- src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h | 1 - src/dpdk/lib/librte_acl/acl.h | 4 + src/dpdk/lib/librte_acl/acl_run.h | 6 +- src/dpdk/lib/librte_acl/acl_run_altivec.c | 47 + src/dpdk/lib/librte_acl/acl_run_altivec.h | 329 +++ src/dpdk/lib/librte_acl/rte_acl.c | 16 +- src/dpdk/lib/librte_acl/rte_acl.h | 5 +- src/dpdk/lib/librte_cfgfile/rte_cfgfile.h | 33 +- .../lib/librte_eal/bsdapp/contigmem/contigmem.c | 8 +- src/dpdk/lib/librte_eal/bsdapp/eal/eal.c | 21 +- .../lib/librte_eal/bsdapp/eal/eal_interrupts.c | 24 +- src/dpdk/lib/librte_eal/bsdapp/eal/eal_log.c | 57 - src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c | 63 +- src/dpdk/lib/librte_eal/common/eal_common_bus.c | 133 + .../lib/librte_eal/common/eal_common_cpuflags.c | 6 - src/dpdk/lib/librte_eal/common/eal_common_dev.c | 95 +- .../lib/librte_eal/common/eal_common_devargs.c | 1 + src/dpdk/lib/librte_eal/common/eal_common_log.c | 54 +- .../lib/librte_eal/common/eal_common_memzone.c | 12 - .../lib/librte_eal/common/eal_common_options.c | 34 +- src/dpdk/lib/librte_eal/common/eal_common_pci.c | 70 +- src/dpdk/lib/librte_eal/common/eal_common_timer.c | 17 +- src/dpdk/lib/librte_eal/common/eal_common_vdev.c | 124 + src/dpdk/lib/librte_eal/common/eal_filesystem.h | 11 - src/dpdk/lib/librte_eal/common/eal_hugepages.h | 3 - src/dpdk/lib/librte_eal/common/eal_private.h | 50 +- .../common/include/arch/x86/rte_atomic.h | 8 + .../common/include/arch/x86/rte_atomic_32.h | 9 + .../common/include/arch/x86/rte_atomic_64.h | 8 + .../common/include/arch/x86/rte_byteorder.h | 2 + .../common/include/arch/x86/rte_byteorder_32.h | 7 + .../common/include/arch/x86/rte_byteorder_64.h | 7 + .../common/include/arch/x86/rte_cycles.h | 2 + .../librte_eal/common/include/arch/x86/rte_io.h | 47 + .../common/include/arch/x86/rte_memcpy.h | 85 +- .../common/include/arch/x86/rte_prefetch.h | 1 + .../librte_eal/common/include/arch/x86/rte_rtm.h | 1 + .../librte_eal/common/include/arch/x86/rte_vect.h | 15 +- .../librte_eal/common/include/generic/rte_atomic.h | 28 + .../common/include/generic/rte_byteorder.h | 2 + .../common/include/generic/rte_cpuflags.h | 3 + .../librte_eal/common/include/generic/rte_cycles.h | 24 +- .../lib/librte_eal/common/include/generic/rte_io.h | 381 +++ .../librte_eal/common/include/generic/rte_memcpy.h | 4 + .../librte_eal/common/include/generic/rte_vect.h | 214 ++ src/dpdk/lib/librte_eal/common/include/rte_bus.h | 158 ++ .../lib/librte_eal/common/include/rte_common.h | 42 +- src/dpdk/lib/librte_eal/common/include/rte_dev.h | 113 +- .../lib/librte_eal/common/include/rte_devargs.h | 9 +- src/dpdk/lib/librte_eal/common/include/rte_eal.h | 17 +- .../lib/librte_eal/common/include/rte_interrupts.h | 10 +- src/dpdk/lib/librte_eal/common/include/rte_log.h | 100 +- .../lib/librte_eal/common/include/rte_malloc.h | 2 +- .../lib/librte_eal/common/include/rte_memory.h | 9 +- .../lib/librte_eal/common/include/rte_memzone.h | 11 +- src/dpdk/lib/librte_eal/common/include/rte_pci.h | 72 +- .../librte_eal/common/include/rte_pci_dev_ids.h | 326 --- src/dpdk/lib/librte_eal/common/include/rte_tailq.h | 6 +- src/dpdk/lib/librte_eal/common/include/rte_time.h | 8 + src/dpdk/lib/librte_eal/common/include/rte_vdev.h | 102 + .../lib/librte_eal/common/include/rte_version.h | 9 +- .../lib/librte_eal/common/include/rte_warnings.h | 84 - src/dpdk/lib/librte_eal/common/malloc_heap.c | 8 - src/dpdk/lib/librte_eal/linuxapp/eal/eal.c | 52 +- .../lib/librte_eal/linuxapp/eal/eal_interrupts.c | 84 +- src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c | 954 ------- src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c | 40 +- src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c | 338 +-- src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c | 88 +- src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 2 +- .../linuxapp/eal/include/exec-env/rte_interrupts.h | 1 + .../linuxapp/eal/include/exec-env/rte_kni_common.h | 10 +- src/dpdk/lib/librte_eal/linuxapp/kni/compat.h | 31 +- .../linuxapp/kni/ethtool/igb/e1000_82575.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_82575.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_api.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_api.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_defines.h | 2 +- .../librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_i210.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_i210.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_mac.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_mac.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_manage.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_manage.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_mbx.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_mbx.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_nvm.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_nvm.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_osdep.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_phy.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_phy.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_regs.h | 2 +- .../lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h | 2 +- .../linuxapp/kni/ethtool/igb/igb_debugfs.c | 28 - .../linuxapp/kni/ethtool/igb/igb_ethtool.c | 2 +- .../linuxapp/kni/ethtool/igb/igb_hwmon.c | 260 -- .../librte_eal/linuxapp/kni/ethtool/igb/igb_main.c | 24 +- .../linuxapp/kni/ethtool/igb/igb_param.c | 2 +- .../linuxapp/kni/ethtool/igb/igb_procfs.c | 363 --- .../librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c | 944 ------- .../linuxapp/kni/ethtool/igb/igb_regtest.h | 2 +- .../librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c | 2 +- .../librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h | 2 +- .../librte_eal/linuxapp/kni/ethtool/igb/kcompat.c | 1482 ---------- .../librte_eal/linuxapp/kni/ethtool/igb/kcompat.h | 26 +- .../linuxapp/kni/ethtool/igb/kcompat_ethtool.c | 1171 -------- .../librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_main.c | 4 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h | 73 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_type.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/kcompat.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/kcompat.h | 14 +- src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h | 59 +- src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c | 39 +- src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h | 30 +- src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c | 536 ++-- src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c | 471 ++-- src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c | 199 +- src/dpdk/lib/librte_ether/rte_dev_info.h | 2 + src/dpdk/lib/librte_ether/rte_eth_ctrl.h | 12 +- src/dpdk/lib/librte_ether/rte_ethdev.c | 712 ++--- src/dpdk/lib/librte_ether/rte_ethdev.h | 605 ++-- src/dpdk/lib/librte_ether/rte_ether.h | 416 --- src/dpdk/lib/librte_ether/rte_flow.c | 159 ++ src/dpdk/lib/librte_ether/rte_flow.h | 1090 ++++++++ src/dpdk/lib/librte_ether/rte_flow_driver.h | 182 ++ src/dpdk/lib/librte_hash/rte_cuckoo_hash.c | 486 ++-- src/dpdk/lib/librte_hash/rte_cuckoo_hash.h | 72 +- src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h | 28 +- src/dpdk/lib/librte_hash/rte_fbk_hash.h | 2 +- src/dpdk/lib/librte_hash/rte_thash.h | 3 + src/dpdk/lib/librte_kvargs/rte_kvargs.c | 8 +- src/dpdk/lib/librte_kvargs/rte_kvargs.h | 3 +- src/dpdk/lib/librte_mbuf/rte_mbuf.c | 165 +- src/dpdk/lib/librte_mbuf/rte_mbuf.h | 798 ++---- src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c | 227 ++ src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h | 668 +++++ src/dpdk/lib/librte_mempool/rte_mempool.c | 15 +- src/dpdk/lib/librte_mempool/rte_mempool.h | 48 +- src/dpdk/lib/librte_mempool/rte_mempool_stack.c | 2 +- src/dpdk/lib/librte_net/rte_ether.h | 417 +++ src/dpdk/lib/librte_net/rte_gre.h | 71 + src/dpdk/lib/librte_net/rte_ip.h | 71 + src/dpdk/lib/librte_net/rte_net.c | 517 ++++ src/dpdk/lib/librte_net/rte_net.h | 204 ++ src/dpdk/lib/librte_pipeline/rte_pipeline.h | 4 +- src/dpdk/lib/librte_port/rte_port_fd.c | 552 ++++ src/dpdk/lib/librte_port/rte_port_fd.h | 105 + src/dpdk/lib/librte_port/rte_port_source_sink.h | 4 +- src/dpdk/lib/librte_ring/rte_ring.h | 6 +- src/dpdk/lib/librte_table/rte_table_acl.c | 2 +- src/dpdk/lib/librte_table/rte_table_hash.h | 31 +- src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c | 382 +++ src/dpdk/lib/librte_table/rte_table_hash_key16.c | 4 +- src/dpdk/lib/librte_table/rte_table_hash_key32.c | 4 +- src/dpdk/lib/librte_table/rte_table_hash_key8.c | 4 +- 327 files changed, 34595 insertions(+), 17549 deletions(-) delete mode 100644 src/dpdk/drivers/net/bnx2x/debug.c create mode 100644 src/dpdk/drivers/net/i40e/i40e_flow.c delete mode 100644 src/dpdk/drivers/net/i40e/i40e_rxtx_vec.c create mode 100644 src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h create mode 100644 src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c create mode 100644 src/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c create mode 100644 src/dpdk/drivers/net/i40e/rte_pmd_i40e.h create mode 100644 src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c create mode 100644 src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h create mode 100644 src/dpdk/drivers/net/ixgbe/ixgbe_flow.c create mode 100644 src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h create mode 100644 src/dpdk/drivers/net/mlx5/mlx5_flow.c create mode 100644 src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h create mode 100644 src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c create mode 100644 src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c create mode 100644 src/dpdk/drivers/net/virtio/virtio_user/vhost.h create mode 100644 src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c create mode 100644 src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c create mode 100644 src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h create mode 100644 src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c create mode 100644 src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h create mode 100644 src/dpdk/lib/librte_acl/acl_run_altivec.c create mode 100644 src/dpdk/lib/librte_acl/acl_run_altivec.h delete mode 100644 src/dpdk/lib/librte_eal/bsdapp/eal/eal_log.c create mode 100644 src/dpdk/lib/librte_eal/common/eal_common_bus.c create mode 100644 src/dpdk/lib/librte_eal/common/eal_common_vdev.c create mode 100644 src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h create mode 100644 src/dpdk/lib/librte_eal/common/include/generic/rte_io.h create mode 100644 src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h create mode 100644 src/dpdk/lib/librte_eal/common/include/rte_bus.h delete mode 100644 src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h create mode 100644 src/dpdk/lib/librte_eal/common/include/rte_vdev.h delete mode 100644 src/dpdk/lib/librte_eal/common/include/rte_warnings.h delete mode 100644 src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c delete mode 100644 src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c delete mode 100644 src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c delete mode 100644 src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c delete mode 100644 src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c delete mode 100644 src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c delete mode 100644 src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c delete mode 100644 src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h delete mode 100644 src/dpdk/lib/librte_ether/rte_ether.h create mode 100644 src/dpdk/lib/librte_ether/rte_flow.c create mode 100644 src/dpdk/lib/librte_ether/rte_flow.h create mode 100644 src/dpdk/lib/librte_ether/rte_flow_driver.h create mode 100644 src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c create mode 100644 src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h create mode 100644 src/dpdk/lib/librte_net/rte_ether.h create mode 100644 src/dpdk/lib/librte_net/rte_gre.h create mode 100644 src/dpdk/lib/librte_net/rte_net.c create mode 100644 src/dpdk/lib/librte_net/rte_net.h create mode 100644 src/dpdk/lib/librte_port/rte_port_fd.c create mode 100644 src/dpdk/lib/librte_port/rte_port_fd.h create mode 100644 src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c (limited to 'src') diff --git a/src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c b/src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c index f7955662..2f875539 100644 --- a/src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c +++ b/src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include @@ -83,6 +83,7 @@ struct pkt_rx_queue { struct pkt_tx_queue { int sockfd; + unsigned int frame_data_size; struct iovec *rd; uint8_t *map; @@ -98,6 +99,7 @@ struct pmd_internals { unsigned nb_queues; int if_index; + char *if_name; struct ether_addr eth_addr; struct tpacket_req req; @@ -115,8 +117,6 @@ static const char *valid_arguments[] = { NULL }; -static const char *drivername = "AF_PACKET PMD"; - static struct rte_eth_link pmd_link = { .link_speed = ETH_SPEED_NUM_10G, .link_duplex = ETH_LINK_FULL_DUPLEX, @@ -161,6 +161,12 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) pbuf = (uint8_t *) ppd + ppd->tp_mac; memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf)); + /* check for vlan info */ + if (ppd->tp_status & TP_STATUS_VLAN_VALID) { + mbuf->vlan_tci = ppd->tp_vlan_tci; + mbuf->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED); + } + /* release incoming frame and advance ring buffer */ ppd->tp_status = TP_STATUS_KERNEL; if (++framenum >= framecount) @@ -206,13 +212,28 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) framenum = pkt_q->framenum; ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; for (i = 0; i < nb_pkts; i++) { + mbuf = *bufs++; + + /* drop oversized packets */ + if (rte_pktmbuf_data_len(mbuf) > pkt_q->frame_data_size) { + rte_pktmbuf_free(mbuf); + continue; + } + + /* insert vlan info if necessary */ + if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { + if (rte_vlan_insert(&mbuf)) { + rte_pktmbuf_free(mbuf); + continue; + } + } + /* point at the next incoming frame */ if ((ppd->tp_status != TP_STATUS_AVAILABLE) && (poll(&pfd, 1, -1) < 0)) - continue; + break; /* copy the tx frame data */ - mbuf = bufs[num_tx]; pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); memcpy(pbuf, rte_pktmbuf_mtod(mbuf, void*), rte_pktmbuf_data_len(mbuf)); @@ -231,13 +252,13 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) /* kick-off transmits */ if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1) - return 0; /* error sending -- no packets transmitted */ + num_tx = 0; /* error sending -- no packets transmitted */ pkt_q->framenum = framenum; pkt_q->tx_pkts += num_tx; - pkt_q->err_pkts += nb_pkts - num_tx; + pkt_q->err_pkts += i - num_tx; pkt_q->tx_bytes += num_tx_bytes; - return num_tx; + return i; } static int @@ -261,9 +282,16 @@ eth_dev_stop(struct rte_eth_dev *dev) sockfd = internals->rx_queue[i].sockfd; if (sockfd != -1) close(sockfd); - sockfd = internals->tx_queue[i].sockfd; - if (sockfd != -1) - close(sockfd); + + /* Prevent use after free in case tx fd == rx fd */ + if (sockfd != internals->tx_queue[i].sockfd) { + sockfd = internals->tx_queue[i].sockfd; + if (sockfd != -1) + close(sockfd); + } + + internals->rx_queue[i].sockfd = -1; + internals->tx_queue[i].sockfd = -1; } dev->data->dev_link.link_status = ETH_LINK_DOWN; @@ -280,14 +308,12 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; - dev_info->driver_name = drivername; dev_info->if_index = internals->if_index; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; dev_info->max_rx_queues = (uint16_t)internals->nb_queues; dev_info->max_tx_queues = (uint16_t)internals->nb_queues; dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; } static void @@ -370,18 +396,20 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, { struct pmd_internals *internals = dev->data->dev_private; struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id]; - uint16_t buf_size; + unsigned int buf_size, data_size; pkt_q->mb_pool = mb_pool; /* Now get the space available for data in the mbuf */ - buf_size = (uint16_t)(rte_pktmbuf_data_room_size(pkt_q->mb_pool) - - RTE_PKTMBUF_HEADROOM); + buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) - + RTE_PKTMBUF_HEADROOM; + data_size = internals->req.tp_frame_size; + data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); - if (ETH_FRAME_LEN > buf_size) { + if (data_size > buf_size) { RTE_LOG(ERR, PMD, "%s: %d bytes will not fit in mbuf (%d bytes)\n", - dev->data->name, ETH_FRAME_LEN, buf_size); + dev->data->name, data_size, buf_size); return -ENOMEM; } @@ -405,12 +433,80 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, return 0; } +static int +eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct pmd_internals *internals = dev->data->dev_private; + struct ifreq ifr = { .ifr_mtu = mtu }; + int ret; + int s; + unsigned int data_size = internals->req.tp_frame_size - + TPACKET2_HDRLEN - + sizeof(struct sockaddr_ll); + + if (mtu > data_size) + return -EINVAL; + + s = socket(PF_INET, SOCK_DGRAM, 0); + if (s < 0) + return -EINVAL; + + strncpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); + ret = ioctl(s, SIOCSIFMTU, &ifr); + close(s); + + if (ret < 0) + return -EINVAL; + + return 0; +} + +static void +eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask) +{ + struct ifreq ifr; + int s; + + s = socket(PF_INET, SOCK_DGRAM, 0); + if (s < 0) + return; + + strncpy(ifr.ifr_name, if_name, IFNAMSIZ); + if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) + goto out; + ifr.ifr_flags &= mask; + ifr.ifr_flags |= flags; + if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) + goto out; +out: + close(s); +} + +static void +eth_dev_promiscuous_enable(struct rte_eth_dev *dev) +{ + struct pmd_internals *internals = dev->data->dev_private; + + eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); +} + +static void +eth_dev_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct pmd_internals *internals = dev->data->dev_private; + + eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); +} + static const struct eth_dev_ops ops = { .dev_start = eth_dev_start, .dev_stop = eth_dev_stop, .dev_close = eth_dev_close, .dev_configure = eth_dev_configure, .dev_infos_get = eth_dev_info, + .mtu_set = eth_dev_mtu_set, + .promiscuous_enable = eth_dev_promiscuous_enable, + .promiscuous_disable = eth_dev_promiscuous_disable, .rx_queue_setup = eth_rx_queue_setup, .tx_queue_setup = eth_tx_queue_setup, .rx_queue_release = eth_queue_release, @@ -440,6 +536,8 @@ open_packet_iface(const char *key __rte_unused, return 0; } +static struct rte_vdev_driver pmd_af_packet_drv; + static int rte_pmd_init_internals(const char *name, const int sockfd, @@ -524,6 +622,7 @@ rte_pmd_init_internals(const char *name, name); goto error_early; } + (*internals)->if_name = strdup(pair->value); (*internals)->if_index = ifr.ifr_ifindex; if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { @@ -633,6 +732,9 @@ rte_pmd_init_internals(const char *name, tx_queue = &((*internals)->tx_queue[q]); tx_queue->framecount = req->tp_frame_nr; + tx_queue->frame_data_size = req->tp_frame_size; + tx_queue->frame_data_size -= TPACKET2_HDRLEN - + sizeof(struct sockaddr_ll); tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr; @@ -666,7 +768,7 @@ rte_pmd_init_internals(const char *name, } /* reserve an ethdev entry */ - *eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + *eth_dev = rte_eth_dev_allocate(name); if (*eth_dev == NULL) goto error; @@ -693,7 +795,7 @@ rte_pmd_init_internals(const char *name, (*eth_dev)->dev_ops = &ops; (*eth_dev)->driver = NULL; (*eth_dev)->data->dev_flags = RTE_ETH_DEV_DETACHABLE; - (*eth_dev)->data->drv_name = drivername; + (*eth_dev)->data->drv_name = pmd_af_packet_drv.driver.name; (*eth_dev)->data->kdrv = RTE_KDRV_NONE; (*eth_dev)->data->numa_node = numa_node; @@ -712,6 +814,7 @@ error: ((*internals)->rx_queue[q].sockfd != qsockfd)) close((*internals)->rx_queue[q].sockfd); } + free((*internals)->if_name); rte_free(*internals); error_early: rte_free(data); @@ -820,7 +923,7 @@ rte_eth_from_packet(const char *name, } static int -rte_pmd_af_packet_devinit(const char *name, const char *params) +rte_pmd_af_packet_probe(const char *name, const char *params) { unsigned numa_node; int ret = 0; @@ -858,7 +961,7 @@ exit: } static int -rte_pmd_af_packet_devuninit(const char *name) +rte_pmd_af_packet_remove(const char *name) { struct rte_eth_dev *eth_dev = NULL; struct pmd_internals *internals; @@ -880,6 +983,7 @@ rte_pmd_af_packet_devuninit(const char *name) rte_free(internals->rx_queue[q].rd); rte_free(internals->tx_queue[q].rd); } + free(internals->if_name); rte_free(eth_dev->data->dev_private); rte_free(eth_dev->data); @@ -889,14 +993,14 @@ rte_pmd_af_packet_devuninit(const char *name) return 0; } -static struct rte_driver pmd_af_packet_drv = { - .type = PMD_VDEV, - .init = rte_pmd_af_packet_devinit, - .uninit = rte_pmd_af_packet_devuninit, +static struct rte_vdev_driver pmd_af_packet_drv = { + .probe = rte_pmd_af_packet_probe, + .remove = rte_pmd_af_packet_remove, }; -PMD_REGISTER_DRIVER(pmd_af_packet_drv, eth_af_packet); -DRIVER_REGISTER_PARAM_STRING(eth_af_packet, +RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv); +RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet); +RTE_PMD_REGISTER_PARAM_STRING(net_af_packet, "iface= " "qpairs= " "blocksz= " diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x.c b/src/dpdk/drivers/net/bnx2x/bnx2x.c index a49a07fb..cc380bd5 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x.c +++ b/src/dpdk/drivers/net/bnx2x/bnx2x.c @@ -178,7 +178,7 @@ bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size, struct bnx2x_dma *dma, /* Caller must take care that strlen(mz_name) < RTE_MEMZONE_NAMESIZE */ z = rte_memzone_reserve_aligned(mz_name, (uint64_t) (size), - rte_lcore_to_socket_id(rte_lcore_id()), + SOCKET_ID_ANY, 0, align); if (z == NULL) { PMD_DRV_LOG(ERR, "DMA alloc failed for %s", msg); @@ -1397,10 +1397,10 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj, return rc; } -int +static int bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode, - unsigned long *rx_accept_flags, - unsigned long *tx_accept_flags) + unsigned long *rx_accept_flags, + unsigned long *tx_accept_flags) { /* Clear the flags first */ *rx_accept_flags = 0; @@ -1438,6 +1438,7 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode, break; + case BNX2X_RX_MODE_ALLMULTI_PROMISC: case BNX2X_RX_MODE_PROMISC: /* * According to deffinition of SI mode, iface in promisc mode @@ -2219,7 +2220,7 @@ int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0) } PMD_TX_LOG(DEBUG, - "start bd: nbytes %d flags %x vlan %x\n", + "start bd: nbytes %d flags %x vlan %x", tx_start_bd->nbytes, tx_start_bd->bd_flags.as_bitfield, tx_start_bd->vlan_or_ethertype); @@ -7016,34 +7017,6 @@ static int bnx2x_initial_phy_init(struct bnx2x_softc *sc, int load_mode) bnx2x_set_requested_fc(sc); - if (CHIP_REV_IS_SLOW(sc)) { - uint32_t bond = CHIP_BOND_ID(sc); - uint32_t feat = 0; - - if (CHIP_IS_E2(sc) && CHIP_IS_MODE_4_PORT(sc)) { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; - } else if (bond & 0x4) { - if (CHIP_IS_E3(sc)) { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC; - } else { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; - } - } else if (bond & 0x8) { - if (CHIP_IS_E3(sc)) { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC; - } else { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; - } - } - -/* disable EMAC for E3 and above */ - if (bond & 0x2) { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; - } - - sc->link_params.feature_config_flags |= feat; - } - if (load_mode == LOAD_DIAG) { lp->loopback_mode = ELINK_LOOPBACK_XGXS; /* Prefer doing PHY loopback at 10G speed, if possible */ @@ -9556,8 +9529,8 @@ static void bnx2x_init_rte(struct bnx2x_softc *sc) sc->max_rx_queues = min(BNX2X_VF_MAX_QUEUES_PER_VF, sc->igu_sb_cnt); } else { - sc->max_tx_queues = 128; - sc->max_rx_queues = 128; + sc->max_rx_queues = BNX2X_MAX_RSS_COUNT(sc); + sc->max_tx_queues = sc->max_rx_queues; } } diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x.h b/src/dpdk/drivers/net/bnx2x/bnx2x.h index 78757a8d..b3cd5fcc 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x.h +++ b/src/dpdk/drivers/net/bnx2x/bnx2x.h @@ -17,6 +17,8 @@ #define __BNX2X_H__ #include +#include +#include #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN #ifndef __LITTLE_ENDIAN @@ -304,10 +306,7 @@ struct bnx2x_device_type { /* TCP with Timestamp Option (32) + IPv6 (40) */ /* max supported alignment is 256 (8 shift) */ -#define BNX2X_RX_ALIGN_SHIFT 8 -/* FW uses 2 cache lines alignment for start packet and size */ -#define BNX2X_FW_RX_ALIGN_START (1 << BNX2X_RX_ALIGN_SHIFT) -#define BNX2X_FW_RX_ALIGN_END (1 << BNX2X_RX_ALIGN_SHIFT) +#define BNX2X_RX_ALIGN_SHIFT RTE_MAX(6, min(8, RTE_CACHE_LINE_SIZE_LOG2)) #define BNX2X_PXP_DRAM_ALIGN (BNX2X_RX_ALIGN_SHIFT - 5) @@ -1031,12 +1030,13 @@ struct bnx2x_softc { struct bnx2x_mac_ops mac_ops; /* structures for VF mbox/response/bulletin */ - struct bnx2x_vf_mbx_msg *vf2pf_mbox; - struct bnx2x_dma vf2pf_mbox_mapping; - struct vf_acquire_resp_tlv acquire_resp; + struct bnx2x_vf_mbx_msg *vf2pf_mbox; + struct bnx2x_dma vf2pf_mbox_mapping; + struct vf_acquire_resp_tlv acquire_resp; struct bnx2x_vf_bulletin *pf2vf_bulletin; - struct bnx2x_dma pf2vf_bulletin_mapping; - struct bnx2x_vf_bulletin old_bulletin; + struct bnx2x_dma pf2vf_bulletin_mapping; + struct bnx2x_vf_bulletin old_bulletin; + rte_spinlock_t vf2pf_lock; int media; @@ -1147,11 +1147,12 @@ struct bnx2x_softc { #define BNX2X_RECOVERY_NIC_LOADING 5 uint32_t rx_mode; -#define BNX2X_RX_MODE_NONE 0 -#define BNX2X_RX_MODE_NORMAL 1 -#define BNX2X_RX_MODE_ALLMULTI 2 -#define BNX2X_RX_MODE_PROMISC 3 -#define BNX2X_MAX_MULTICAST 64 +#define BNX2X_RX_MODE_NONE 0 +#define BNX2X_RX_MODE_NORMAL 1 +#define BNX2X_RX_MODE_ALLMULTI 2 +#define BNX2X_RX_MODE_ALLMULTI_PROMISC 3 +#define BNX2X_RX_MODE_PROMISC 4 +#define BNX2X_MAX_MULTICAST 64 struct bnx2x_port port; @@ -1415,34 +1416,90 @@ struct bnx2x_func_init_params { #define BAR1 2 #define BAR2 4 +static inline void +bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val) +{ + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", + (unsigned long)offset, val); + rte_write8(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset)); +} + +static inline void +bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val) +{ #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC -uint8_t bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset); -uint16_t bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset); -uint32_t bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset); + if ((offset % 2) != 0) + PMD_DRV_LOG(NOTICE, "Unaligned 16-bit write to 0x%08lx", + (unsigned long)offset); +#endif + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%04x", + (unsigned long)offset, val); + rte_write16(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset)); -void bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val); -void bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val); -void bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val); -#else -#define bnx2x_reg_write8(sc, offset, val)\ - *((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val +} -#define bnx2x_reg_write16(sc, offset, val)\ - *((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val +static inline void +bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val) +{ +#ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC + if ((offset % 4) != 0) + PMD_DRV_LOG(NOTICE, "Unaligned 32-bit write to 0x%08lx", + (unsigned long)offset); +#endif -#define bnx2x_reg_write32(sc, offset, val)\ - *((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", + (unsigned long)offset, val); + rte_write32(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset)); +} -#define bnx2x_reg_read8(sc, offset)\ - (*((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))) +static inline uint8_t +bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset) +{ + uint8_t val; -#define bnx2x_reg_read16(sc, offset)\ - (*((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))) + val = rte_read8((uint8_t *)sc->bar[BAR0].base_addr + offset); + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", + (unsigned long)offset, val); -#define bnx2x_reg_read32(sc, offset)\ - (*((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))) + return val; +} + +static inline uint16_t +bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset) +{ + uint16_t val; + +#ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC + if ((offset % 2) != 0) + PMD_DRV_LOG(NOTICE, "Unaligned 16-bit read from 0x%08lx", + (unsigned long)offset); +#endif + + val = rte_read16(((uint8_t *)sc->bar[BAR0].base_addr + offset)); + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", + (unsigned long)offset, val); + + return val; +} + +static inline uint32_t +bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset) +{ + uint32_t val; + +#ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC + if ((offset % 4) != 0) + PMD_DRV_LOG(NOTICE, "Unaligned 32-bit read from 0x%08lx", + (unsigned long)offset); #endif + val = rte_read32(((uint8_t *)sc->bar[BAR0].base_addr + offset)); + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", + (unsigned long)offset, val); + + return val; +} + #define REG_ADDR(sc, offset) (((uint64_t)sc->bar[BAR0].base_addr) + (offset)) #define REG_RD8(sc, offset) bnx2x_reg_read8(sc, (offset)) @@ -1500,11 +1557,9 @@ void bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val); #define DPM_TRIGGER_TYPE 0x40 /* Doorbell macro */ -#define BNX2X_DB_WRITE(db_bar, val) \ - *((volatile uint32_t *)(db_bar)) = (val) +#define BNX2X_DB_WRITE(db_bar, val) rte_write32_relaxed((val), (db_bar)) -#define BNX2X_DB_READ(db_bar) \ - *((volatile uint32_t *)(db_bar)) +#define BNX2X_DB_READ(db_bar) rte_read32_relaxed(db_bar) #define DOORBELL_ADDR(sc, offset) \ (volatile uint32_t *)(((char *)(sc)->bar[BAR1].base_addr + (offset))) @@ -1883,8 +1938,6 @@ int bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, int leading); void bnx2x_free_hsi_mem(struct bnx2x_softc *sc); int bnx2x_vf_set_rx_mode(struct bnx2x_softc *sc); -int bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode, - unsigned long *rx_accept_flags, unsigned long *tx_accept_flags); int bnx2x_check_bull(struct bnx2x_softc *sc); //#define BNX2X_PULSE diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c b/src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c index f3ab3550..a0b0dfab 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c @@ -17,7 +17,7 @@ * The set of PCI devices this driver supports */ #define BROADCOM_PCI_VENDOR_ID 0x14E4 -static struct rte_pci_id pci_id_bnx2x_map[] = { +static const struct rte_pci_id pci_id_bnx2x_map[] = { { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57800) }, { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57711) }, { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57810) }, @@ -33,7 +33,7 @@ static struct rte_pci_id pci_id_bnx2x_map[] = { { .vendor_id = 0, } }; -static struct rte_pci_id pci_id_bnx2xvf_map[] = { +static const struct rte_pci_id pci_id_bnx2xvf_map[] = { { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57800_VF) }, { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57810_VF) }, { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57811_VF) }, @@ -119,12 +119,12 @@ bnx2x_interrupt_action(struct rte_eth_dev *dev) } static __rte_unused void -bnx2x_interrupt_handler(__rte_unused struct rte_intr_handle *handle, void *param) +bnx2x_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; bnx2x_interrupt_action(dev); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(handle); } /* @@ -187,10 +187,10 @@ bnx2x_dev_start(struct rte_eth_dev *dev) } if (IS_PF(sc)) { - rte_intr_callback_register(&(dev->pci_dev->intr_handle), + rte_intr_callback_register(&sc->pci_dev->intr_handle, bnx2x_interrupt_handler, (void *)dev); - if(rte_intr_enable(&(dev->pci_dev->intr_handle))) + if (rte_intr_enable(&sc->pci_dev->intr_handle)) PMD_DRV_LOG(ERR, "rte_intr_enable failed"); } @@ -203,8 +203,6 @@ bnx2x_dev_start(struct rte_eth_dev *dev) /* Print important adapter info for the user. */ bnx2x_print_adapter_info(sc); - DELAY_MS(2500); - return ret; } @@ -217,8 +215,8 @@ bnx2x_dev_stop(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); if (IS_PF(sc)) { - rte_intr_disable(&(dev->pci_dev->intr_handle)); - rte_intr_callback_unregister(&(dev->pci_dev->intr_handle), + rte_intr_disable(&sc->pci_dev->intr_handle); + rte_intr_callback_unregister(&sc->pci_dev->intr_handle, bnx2x_interrupt_handler, (void *)dev); } @@ -258,6 +256,8 @@ bnx2x_promisc_enable(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); sc->rx_mode = BNX2X_RX_MODE_PROMISC; + if (rte_eth_allmulticast_get(dev->data->port_id) == 1) + sc->rx_mode = BNX2X_RX_MODE_ALLMULTI_PROMISC; bnx2x_set_rx_mode(sc); } @@ -268,6 +268,8 @@ bnx2x_promisc_disable(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); sc->rx_mode = BNX2X_RX_MODE_NORMAL; + if (rte_eth_allmulticast_get(dev->data->port_id) == 1) + sc->rx_mode = BNX2X_RX_MODE_ALLMULTI; bnx2x_set_rx_mode(sc); } @@ -278,6 +280,8 @@ bnx2x_dev_allmulticast_enable(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); sc->rx_mode = BNX2X_RX_MODE_ALLMULTI; + if (rte_eth_promiscuous_get(dev->data->port_id) == 1) + sc->rx_mode = BNX2X_RX_MODE_ALLMULTI_PROMISC; bnx2x_set_rx_mode(sc); } @@ -288,6 +292,8 @@ bnx2x_dev_allmulticast_disable(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); sc->rx_mode = BNX2X_RX_MODE_NORMAL; + if (rte_eth_promiscuous_get(dev->data->port_id) == 1) + sc->rx_mode = BNX2X_RX_MODE_PROMISC; bnx2x_set_rx_mode(sc); } @@ -424,6 +430,7 @@ bnx2x_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, xstats[num].value = *(uint64_t *)((char *)&sc->eth_stats + bnx2x_xstats_strings[num].offset_lo); + xstats[num].id = num; } return num; @@ -433,6 +440,7 @@ static void bnx2x_dev_infos_get(struct rte_eth_dev *dev, __rte_unused struct rte_eth_dev_info *dev_info) { struct bnx2x_softc *sc = dev->data->dev_private; + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->max_rx_queues = sc->max_rx_queues; dev_info->max_tx_queues = sc->max_tx_queues; dev_info->min_rx_bufsize = BNX2X_MIN_RX_BUF_SIZE; @@ -518,7 +526,7 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf) PMD_INIT_FUNC_TRACE(); eth_dev->dev_ops = is_vf ? &bnx2xvf_eth_dev_ops : &bnx2x_eth_dev_ops; - pci_dev = eth_dev->pci_dev; + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); rte_eth_copy_pci_info(eth_dev, pci_dev); @@ -577,6 +585,8 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf) eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); if (IS_VF(sc)) { + rte_spinlock_init(&sc->vf2pf_lock); + if (bnx2x_dma_alloc(sc, sizeof(struct bnx2x_vf_mbx_msg), &sc->vf2pf_mbox_mapping, "vf2pf_mbox", RTE_CACHE_LINE_SIZE) != 0) @@ -618,9 +628,10 @@ eth_bnx2xvf_dev_init(struct rte_eth_dev *eth_dev) static struct eth_driver rte_bnx2x_pmd = { .pci_drv = { - .name = "rte_bnx2x_pmd", .id_table = pci_id_bnx2x_map, .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_bnx2x_dev_init, .dev_private_size = sizeof(struct bnx2x_softc), @@ -631,41 +642,18 @@ static struct eth_driver rte_bnx2x_pmd = { */ static struct eth_driver rte_bnx2xvf_pmd = { .pci_drv = { - .name = "rte_bnx2xvf_pmd", .id_table = pci_id_bnx2xvf_map, .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_bnx2xvf_dev_init, .dev_private_size = sizeof(struct bnx2x_softc), }; -static int rte_bnx2x_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - rte_eth_driver_register(&rte_bnx2x_pmd); - - return 0; -} - -static int rte_bnx2xvf_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - rte_eth_driver_register(&rte_bnx2xvf_pmd); - - return 0; -} - -static struct rte_driver rte_bnx2x_driver = { - .type = PMD_PDEV, - .init = rte_bnx2x_pmd_init, -}; - -static struct rte_driver rte_bnx2xvf_driver = { - .type = PMD_PDEV, - .init = rte_bnx2xvf_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_bnx2x_driver, bnx2x); -DRIVER_REGISTER_PCI_TABLE(bnx2x, pci_id_bnx2x_map); -PMD_REGISTER_DRIVER(rte_bnx2xvf_driver, bnx2xvf); -DRIVER_REGISTER_PCI_TABLE(bnx2xvf, pci_id_bnx2xvf_map); +RTE_PMD_REGISTER_PCI(net_bnx2x, rte_bnx2x_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_bnx2x, pci_id_bnx2x_map); +RTE_PMD_REGISTER_KMOD_DEP(net_bnx2x, "* igb_uio | uio_pci_generic | vfio"); +RTE_PMD_REGISTER_PCI(net_bnx2xvf, rte_bnx2xvf_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_bnx2xvf, pci_id_bnx2xvf_map); +RTE_PMD_REGISTER_KMOD_DEP(net_bnx2xvf, "* igb_uio | vfio"); diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c b/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c index 0ec4f899..170e48fb 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c @@ -19,7 +19,8 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->driver->pci_drv.name, ring_name, dev->data->port_id, queue_id); + dev->driver->pci_drv.driver.name, ring_name, + dev->data->port_id, queue_id); mz = rte_memzone_lookup(z_name); if (mz) @@ -59,7 +60,7 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, + __rte_unused const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp) { uint16_t j, idx; @@ -84,7 +85,6 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, rxq->mb_pool = mp; rxq->queue_id = queue_idx; rxq->port_id = dev->data->port_id; - rxq->crc_len = (uint8_t)((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 : ETHER_CRC_LEN); rxq->nb_rx_pages = 1; while (USABLE_RX_BD(rxq) < nb_desc) @@ -94,13 +94,9 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, sc->rx_ring_size = USABLE_RX_BD(rxq); rxq->nb_cq_pages = RCQ_BD_PAGES(rxq); - rxq->rx_free_thresh = rx_conf->rx_free_thresh ? - rx_conf->rx_free_thresh : DEFAULT_RX_FREE_THRESH; - - PMD_INIT_LOG(DEBUG, "fp[%02d] req_bd=%u, thresh=%u, usable_bd=%lu, " + PMD_INIT_LOG(DEBUG, "fp[%02d] req_bd=%u, usable_bd=%lu, " "total_bd=%lu, rx_pages=%u, cq_pages=%u", - queue_idx, nb_desc, rxq->rx_free_thresh, - (unsigned long)USABLE_RX_BD(rxq), + queue_idx, nb_desc, (unsigned long)USABLE_RX_BD(rxq), (unsigned long)TOTAL_RX_BD(rxq), rxq->nb_rx_pages, rxq->nb_cq_pages); @@ -135,7 +131,6 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, } /* Initialize software ring entries */ - rxq->rx_mbuf_alloc = 0; for (idx = 0; idx < rxq->nb_rx_desc; idx = NEXT_RX_BD(idx)) { mbuf = rte_mbuf_raw_alloc(mp); if (NULL == mbuf) { @@ -146,7 +141,6 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, } rxq->sw_ring[idx] = mbuf; rxq->rx_ring[idx] = mbuf->buf_physaddr; - rxq->rx_mbuf_alloc++; } rxq->pkt_first_seg = NULL; rxq->pkt_last_seg = NULL; diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h b/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h index ccb22fc1..dd251aaf 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h @@ -11,8 +11,6 @@ #ifndef _BNX2X_RXTX_H_ #define _BNX2X_RXTX_H_ - -#define DEFAULT_RX_FREE_THRESH 0 #define DEFAULT_TX_FREE_THRESH 512 #define RTE_PMD_BNX2X_TX_MAX_BURST 1 @@ -42,13 +40,9 @@ struct bnx2x_rx_queue { uint16_t rx_bd_tail; /**< Index of last rx bd. */ uint16_t rx_cq_head; /**< Index of current rcq bd. */ uint16_t rx_cq_tail; /**< Index of last rcq bd. */ - uint16_t nb_rx_hold; /**< number of held free RX desc. */ - uint16_t rx_free_thresh; /**< max free RX desc to hold. */ uint16_t queue_id; /**< RX queue index. */ uint8_t port_id; /**< Device port identifier. */ - uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */ struct bnx2x_softc *sc; /**< Ptr to dev_private data. */ - uint64_t rx_mbuf_alloc; /**< Number of allocated mbufs. */ }; /** diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c b/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c index 1c895f88..0ca0df87 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c @@ -64,25 +64,46 @@ bnx2x_check_bull(struct bnx2x_softc *sc) return TRUE; } -/* add tlv to a buffer */ -#define BNX2X_TLV_APPEND(_tlvs, _offset, _type, _length) \ - ((struct vf_first_tlv *)((unsigned long)_tlvs + _offset))->type = _type; \ - ((struct vf_first_tlv *)((unsigned long)_tlvs + _offset))->length = _length +/* place a given tlv on the tlv buffer at a given offset */ +static void +bnx2x_add_tlv(__rte_unused struct bnx2x_softc *sc, void *tlvs_list, + uint16_t offset, uint16_t type, uint16_t length) +{ + struct channel_tlv *tl = (struct channel_tlv *) + ((unsigned long)tlvs_list + offset); + + tl->type = type; + tl->length = length; +} /* Initiliaze header of the first tlv and clear mailbox*/ static void -bnx2x_init_first_tlv(struct bnx2x_softc *sc, struct vf_first_tlv *tlv, - uint16_t type, uint16_t len) +bnx2x_vf_prep(struct bnx2x_softc *sc, struct vf_first_tlv *first_tlv, + uint16_t type, uint16_t length) { struct bnx2x_vf_mbx_msg *mbox = sc->vf2pf_mbox; + + rte_spinlock_lock(&sc->vf2pf_lock); + PMD_DRV_LOG(DEBUG, "Preparing %d tlv for sending", type); memset(mbox, 0, sizeof(struct bnx2x_vf_mbx_msg)); - BNX2X_TLV_APPEND(tlv, 0, type, len); + bnx2x_add_tlv(sc, &first_tlv->tl, 0, type, length); /* Initialize header of the first tlv */ - tlv->reply_offset = sizeof(mbox->query); + first_tlv->reply_offset = sizeof(mbox->query); +} + +/* releases the mailbox */ +static void +bnx2x_vf_finalize(struct bnx2x_softc *sc, + __rte_unused struct vf_first_tlv *first_tlv) +{ + PMD_DRV_LOG(DEBUG, "done sending [%d] tlv over vf pf channel", + first_tlv->tl.type); + + rte_spinlock_unlock(&sc->vf2pf_lock); } #define BNX2X_VF_CMD_ADDR_LO PXP_VF_ADDR_CSDM_GLOBAL_START @@ -97,39 +118,36 @@ bnx2x_do_req4pf(struct bnx2x_softc *sc, phys_addr_t phys_addr) uint8_t *status = &sc->vf2pf_mbox->resp.common_reply.status; uint8_t i; - if (!*status) { - bnx2x_check_bull(sc); - if (sc->old_bulletin.valid_bitmap & (1 << CHANNEL_DOWN)) { - PMD_DRV_LOG(ERR, "channel is down. Aborting message sending"); - *status = BNX2X_VF_STATUS_SUCCESS; - return 0; - } + if (*status) { + PMD_DRV_LOG(ERR, "status should be zero before message" + " to pf was sent"); + return -EINVAL; + } - REG_WR(sc, BNX2X_VF_CMD_ADDR_LO, U64_LO(phys_addr)); - REG_WR(sc, BNX2X_VF_CMD_ADDR_HI, U64_HI(phys_addr)); + bnx2x_check_bull(sc); + if (sc->old_bulletin.valid_bitmap & (1 << CHANNEL_DOWN)) { + PMD_DRV_LOG(ERR, "channel is down. Aborting message sending"); + return -EINVAL; + } - /* memory barrier to ensure that FW can read phys_addr */ - wmb(); + REG_WR(sc, BNX2X_VF_CMD_ADDR_LO, U64_LO(phys_addr)); + REG_WR(sc, BNX2X_VF_CMD_ADDR_HI, U64_HI(phys_addr)); - REG_WR8(sc, BNX2X_VF_CMD_TRIGGER, 1); + /* memory barrier to ensure that FW can read phys_addr */ + wmb(); - /* Do several attempts until PF completes - * "." is used to show progress - */ - for (i = 0; i < BNX2X_VF_CHANNEL_TRIES; i++) { - DELAY_MS(BNX2X_VF_CHANNEL_DELAY); - if (*status) - break; - } + REG_WR8(sc, BNX2X_VF_CMD_TRIGGER, 1); - if (!*status) { - PMD_DRV_LOG(ERR, "Response from PF timed out"); - return -EAGAIN; - } - } else { - PMD_DRV_LOG(ERR, "status should be zero before message" - "to pf was sent"); - return -EINVAL; + /* Do several attempts until PF completes */ + for (i = 0; i < BNX2X_VF_CHANNEL_TRIES; i++) { + DELAY_MS(BNX2X_VF_CHANNEL_DELAY); + if (*status) + break; + } + + if (!*status) { + PMD_DRV_LOG(ERR, "Response from PF timed out"); + return -EAGAIN; } PMD_DRV_LOG(DEBUG, "Response from PF was received"); @@ -168,31 +186,23 @@ static inline int bnx2x_read_vf_id(struct bnx2x_softc *sc) #define BNX2X_VF_OBTAIN_MAC_FILTERS 1 #define BNX2X_VF_OBTAIN_MC_FILTERS 10 -struct bnx2x_obtain_status { - int success; - int err_code; -}; - static -struct bnx2x_obtain_status bnx2x_loop_obtain_resources(struct bnx2x_softc *sc) +int bnx2x_loop_obtain_resources(struct bnx2x_softc *sc) { - int tries = 0; struct vf_acquire_resp_tlv *resp = &sc->vf2pf_mbox->resp.acquire_resp, - *sc_resp = &sc->acquire_resp; - struct vf_resource_query *res_query; - struct vf_resc *resc; - struct bnx2x_obtain_status status; + *sc_resp = &sc->acquire_resp; + struct vf_resource_query *res_query; + struct vf_resc *resc; int res_obtained = false; + int tries = 0; + int rc; do { PMD_DRV_LOG(DEBUG, "trying to get resources"); - if (bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr)) { - /* timeout */ - status.success = 0; - status.err_code = -EAGAIN; - return status; - } + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + return rc; memcpy(sc_resp, resp, sizeof(sc->acquire_resp)); @@ -203,12 +213,12 @@ struct bnx2x_obtain_status bnx2x_loop_obtain_resources(struct bnx2x_softc *sc) PMD_DRV_LOG(DEBUG, "resources obtained successfully"); res_obtained = true; } else if (sc_resp->status == BNX2X_VF_STATUS_NO_RESOURCES && - tries < BNX2X_VF_OBTAIN_MAX_TRIES) { + tries < BNX2X_VF_OBTAIN_MAX_TRIES) { PMD_DRV_LOG(DEBUG, "PF cannot allocate requested amount of resources"); res_query = &sc->vf2pf_mbox->query[0].acquire.res_query; - resc = &sc_resp->resc; + resc = &sc_resp->resc; /* PF refused our request. Try to decrease request params */ res_query->num_txqs = min(res_query->num_txqs, resc->num_txqs); @@ -220,30 +230,30 @@ struct bnx2x_obtain_status bnx2x_loop_obtain_resources(struct bnx2x_softc *sc) memset(&sc->vf2pf_mbox->resp, 0, sizeof(union resp_tlvs)); } else { - PMD_DRV_LOG(ERR, "Resources cannot be obtained. Status of handling: %d. Aborting", - sc_resp->status); - status.success = 0; - status.err_code = -EAGAIN; - return status; + PMD_DRV_LOG(ERR, "Failed to get the requested " + "amount of resources: %d.", + sc_resp->status); + return -EINVAL; } } while (!res_obtained); - status.success = 1; - return status; + return 0; } int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_count) { struct vf_acquire_tlv *acq = &sc->vf2pf_mbox->query[0].acquire; int vf_id; - struct bnx2x_obtain_status obtain_status; + int rc; bnx2x_vf_close(sc); - bnx2x_init_first_tlv(sc, &acq->first_tlv, BNX2X_VF_TLV_ACQUIRE, sizeof(*acq)); + bnx2x_vf_prep(sc, &acq->first_tlv, BNX2X_VF_TLV_ACQUIRE, sizeof(*acq)); vf_id = bnx2x_read_vf_id(sc); - if (vf_id < 0) - return -EAGAIN; + if (vf_id < 0) { + rc = -EAGAIN; + goto out; + } acq->vf_id = vf_id; @@ -256,19 +266,19 @@ int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_ acq->bulletin_addr = sc->pf2vf_bulletin_mapping.paddr; /* Request physical port identifier */ - BNX2X_TLV_APPEND(acq, acq->first_tlv.length, - BNX2X_VF_TLV_PHYS_PORT_ID, - sizeof(struct channel_tlv)); + bnx2x_add_tlv(sc, acq, acq->first_tlv.tl.length, + BNX2X_VF_TLV_PHYS_PORT_ID, + sizeof(struct channel_tlv)); - BNX2X_TLV_APPEND(acq, - (acq->first_tlv.length + sizeof(struct channel_tlv)), - BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, acq, + (acq->first_tlv.tl.length + sizeof(struct channel_tlv)), + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); /* requesting the resources in loop */ - obtain_status = bnx2x_loop_obtain_resources(sc); - if (!obtain_status.success) - return obtain_status.err_code; + rc = bnx2x_loop_obtain_resources(sc); + if (rc) + goto out; struct vf_acquire_resp_tlv sc_resp = sc->acquire_resp; @@ -299,7 +309,10 @@ int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_ else eth_random_addr(sc->link_params.mac_addr); - return 0; +out: + bnx2x_vf_finalize(sc, &acq->first_tlv); + + return rc; } /* Ask PF to release VF's resources */ @@ -309,19 +322,23 @@ bnx2x_vf_close(struct bnx2x_softc *sc) struct vf_release_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; int vf_id = bnx2x_read_vf_id(sc); + int rc; if (vf_id >= 0) { query = &sc->vf2pf_mbox->query[0].release; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_RELEASE, - sizeof(*query)); + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_RELEASE, + sizeof(*query)); query->vf_id = vf_id; - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); - if (reply->status != BNX2X_VF_STATUS_SUCCESS) + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS) PMD_DRV_LOG(ERR, "Failed to release VF"); + + bnx2x_vf_finalize(sc, &query->first_tlv); } } @@ -331,11 +348,11 @@ bnx2x_vf_init(struct bnx2x_softc *sc) { struct vf_init_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; - int i; + int i, rc; query = &sc->vf2pf_mbox->query[0].init; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_INIT, - sizeof(*query)); + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_INIT, + sizeof(*query)); FOR_EACH_QUEUE(sc, i) { query->sb_addr[i] = (unsigned long)(sc->fp[i].sb_dma.paddr); @@ -345,17 +362,23 @@ bnx2x_vf_init(struct bnx2x_softc *sc) query->stats_addr = sc->fw_stats_data_mapping + offsetof(struct bnx2x_fw_stats_data, queue_stats); - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; if (reply->status != BNX2X_VF_STATUS_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to init VF"); - return -EINVAL; + rc = -EINVAL; + goto out; } PMD_DRV_LOG(DEBUG, "VF was initialized"); - return 0; +out: + bnx2x_vf_finalize(sc, &query->first_tlv); + return rc; } void @@ -364,44 +387,49 @@ bnx2x_vf_unload(struct bnx2x_softc *sc) struct vf_close_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; struct vf_q_op_tlv *query_op; - int i, vf_id; + int i, vf_id, rc; vf_id = bnx2x_read_vf_id(sc); if (vf_id > 0) { FOR_EACH_QUEUE(sc, i) { query_op = &sc->vf2pf_mbox->query[0].q_op; - bnx2x_init_first_tlv(sc, &query_op->first_tlv, - BNX2X_VF_TLV_TEARDOWN_Q, - sizeof(*query_op)); + bnx2x_vf_prep(sc, &query_op->first_tlv, + BNX2X_VF_TLV_TEARDOWN_Q, + sizeof(*query_op)); query_op->vf_qid = i; - BNX2X_TLV_APPEND(query_op, query_op->first_tlv.length, - BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query_op, + query_op->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); - if (reply->status != BNX2X_VF_STATUS_SUCCESS) + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS) PMD_DRV_LOG(ERR, "Bad reply for vf_q %d teardown", i); + + bnx2x_vf_finalize(sc, &query_op->first_tlv); } bnx2x_vf_set_mac(sc, false); query = &sc->vf2pf_mbox->query[0].close; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_CLOSE, - sizeof(*query)); + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_CLOSE, + sizeof(*query)); query->vf_id = vf_id; - BNX2X_TLV_APPEND(query, query->first_tlv.length, - BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); - if (reply->status != BNX2X_VF_STATUS_SUCCESS) + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS) PMD_DRV_LOG(ERR, "Bad reply from PF for close message"); + + bnx2x_vf_finalize(sc, &query->first_tlv); } } @@ -466,10 +494,11 @@ bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, int lead struct vf_setup_q_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; uint16_t flags = bnx2x_vf_q_flags(leading); + int rc; query = &sc->vf2pf_mbox->query[0].setup_q; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_SETUP_Q, - sizeof(*query)); + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_SETUP_Q, + sizeof(*query)); query->vf_qid = fp->index; query->param_valid = VF_RXQ_VALID | VF_TXQ_VALID; @@ -477,17 +506,22 @@ bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, int lead bnx2x_vf_rx_q_prep(sc, fp, &query->rxq, flags); bnx2x_vf_tx_q_prep(sc, fp, &query->txq, flags); - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; if (reply->status != BNX2X_VF_STATUS_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to setup VF queue[%d]", fp->index); - return -EINVAL; + rc = -EINVAL; } +out: + bnx2x_vf_finalize(sc, &query->first_tlv); - return 0; + return rc; } int @@ -495,9 +529,10 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set) { struct vf_set_q_filters_tlv *query; struct vf_common_reply_tlv *reply; + int rc; query = &sc->vf2pf_mbox->query[0].set_q_filters; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS, + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS, sizeof(*query)); query->vf_qid = sc->fp->index; @@ -511,10 +546,13 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set) rte_memcpy(query->filters[0].mac, sc->link_params.mac_addr, ETH_ALEN); - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; reply = &sc->vf2pf_mbox->resp.common_reply; while (BNX2X_VF_STATUS_FAILURE == reply->status && @@ -525,16 +563,20 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set) rte_memcpy(query->filters[0].mac, sc->pf2vf_bulletin->mac, ETH_ALEN); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; } if (BNX2X_VF_STATUS_SUCCESS != reply->status) { PMD_DRV_LOG(ERR, "Bad reply from PF for SET MAC message: %d", reply->status); - return -EINVAL; + rc = -EINVAL; } +out: + bnx2x_vf_finalize(sc, &query->first_tlv); - return 0; + return rc; } int @@ -543,15 +585,17 @@ bnx2x_vf_config_rss(struct bnx2x_softc *sc, { struct vf_rss_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; + int rc; query = &sc->vf2pf_mbox->query[0].update_rss; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_UPDATE_RSS, + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_UPDATE_RSS, sizeof(*query)); /* add list termination tlv */ - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); rte_memcpy(query->rss_key, params->rss_key, sizeof(params->rss_key)); query->rss_key_size = T_ETH_RSS_KEY; @@ -562,13 +606,18 @@ bnx2x_vf_config_rss(struct bnx2x_softc *sc, query->rss_result_mask = params->rss_result_mask; query->rss_flags = params->rss_flags; - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; + if (reply->status != BNX2X_VF_STATUS_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to configure RSS"); - return -EINVAL; + rc = -EINVAL; } +out: + bnx2x_vf_finalize(sc, &query->first_tlv); - return 0; + return rc; } int @@ -576,27 +625,56 @@ bnx2x_vf_set_rx_mode(struct bnx2x_softc *sc) { struct vf_set_q_filters_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; - unsigned long tx_mask; + int rc; query = &sc->vf2pf_mbox->query[0].set_q_filters; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS, + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS, sizeof(*query)); query->vf_qid = 0; query->flags = BNX2X_VF_RX_MASK_CHANGED; - if (bnx2x_fill_accept_flags(sc, sc->rx_mode, &query->rx_mask, &tx_mask)) { - return -EINVAL; + switch (sc->rx_mode) { + case BNX2X_RX_MODE_NONE: /* no Rx */ + query->rx_mask = VFPF_RX_MASK_ACCEPT_NONE; + break; + case BNX2X_RX_MODE_NORMAL: + query->rx_mask = VFPF_RX_MASK_ACCEPT_MATCHED_MULTICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST; + break; + case BNX2X_RX_MODE_ALLMULTI: + query->rx_mask = VFPF_RX_MASK_ACCEPT_ALL_MULTICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST; + break; + case BNX2X_RX_MODE_ALLMULTI_PROMISC: + case BNX2X_RX_MODE_PROMISC: + query->rx_mask = VFPF_RX_MASK_ACCEPT_ALL_UNICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_ALL_MULTICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST; + break; + default: + PMD_DRV_LOG(ERR, "BAD rx mode (%d)", sc->rx_mode); + rc = -EINVAL; + goto out; } - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); + + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); if (reply->status != BNX2X_VF_STATUS_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to set RX mode"); - return -EINVAL; + rc = -EINVAL; } - return 0; +out: + bnx2x_vf_finalize(sc, &query->first_tlv); + + return rc; } diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h b/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h index f854d81b..955ea982 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h @@ -40,6 +40,13 @@ struct vf_resource_query { #define TLV_BUFFER_SIZE 1024 +#define VFPF_RX_MASK_ACCEPT_NONE 0x00000000 +#define VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST 0x00000001 +#define VFPF_RX_MASK_ACCEPT_MATCHED_MULTICAST 0x00000002 +#define VFPF_RX_MASK_ACCEPT_ALL_UNICAST 0x00000004 +#define VFPF_RX_MASK_ACCEPT_ALL_MULTICAST 0x00000008 +#define VFPF_RX_MASK_ACCEPT_BROADCAST 0x00000010 + /* general tlv header (used for both vf->pf request and pf->vf response) */ struct channel_tlv { uint16_t type; @@ -47,8 +54,7 @@ struct channel_tlv { }; struct vf_first_tlv { - uint16_t type; - uint16_t length; + struct channel_tlv tl; uint32_t reply_offset; }; @@ -58,16 +64,14 @@ struct tlv_buffer_size { /* tlv struct for all PF replies except acquire */ struct vf_common_reply_tlv { - uint16_t type; - uint16_t length; + struct channel_tlv tl; uint8_t status; uint8_t pad[3]; }; /* used to terminate and pad a tlv list */ struct channel_list_end_tlv { - uint16_t type; - uint16_t length; + struct channel_tlv tl; uint32_t pad; }; @@ -327,7 +331,6 @@ struct bnx2x_vf_mbx_msg { union resp_tlvs resp; }; -void bnx2x_add_tlv(void *tlvs_list, uint16_t offset, uint16_t type, uint16_t length); int bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set); int bnx2x_vf_config_rss(struct bnx2x_softc *sc, struct ecore_config_rss_params *params); diff --git a/src/dpdk/drivers/net/bnx2x/debug.c b/src/dpdk/drivers/net/bnx2x/debug.c deleted file mode 100644 index cc50845c..00000000 --- a/src/dpdk/drivers/net/bnx2x/debug.c +++ /dev/null @@ -1,96 +0,0 @@ -/*- - * Copyright (c) 2007-2013 QLogic Corporation. All rights reserved. - * - * Eric Davis - * David Christensen - * Gary Zambrano - * - * Copyright (c) 2013-2015 Brocade Communications Systems, Inc. - * Copyright (c) 2015 QLogic Corporation. - * All rights reserved. - * www.qlogic.com - * - * See LICENSE.bnx2x_pmd for copyright and licensing details. - */ - -#include "bnx2x.h" - - -/* - * Debug versions of the 8/16/32 bit OS register read/write functions to - * capture/display values read/written from/to the controller. - */ -void -bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val) -{ - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", (unsigned long)offset, val); - *((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val; -} - -void -bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val) -{ - if ((offset % 2) != 0) { - PMD_DRV_LOG(NOTICE, "Unaligned 16-bit write to 0x%08lx", - (unsigned long)offset); - } - - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%04x", (unsigned long)offset, val); - *((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val; -} - -void -bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val) -{ - if ((offset % 4) != 0) { - PMD_DRV_LOG(NOTICE, "Unaligned 32-bit write to 0x%08lx", - (unsigned long)offset); - } - - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", (unsigned long)offset, val); - *((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val; -} - -uint8_t -bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset) -{ - uint8_t val; - - val = (uint8_t)(*((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))); - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", (unsigned long)offset, val); - - return val; -} - -uint16_t -bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset) -{ - uint16_t val; - - if ((offset % 2) != 0) { - PMD_DRV_LOG(NOTICE, "Unaligned 16-bit read from 0x%08lx", - (unsigned long)offset); - } - - val = (uint16_t)(*((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))); - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", (unsigned long)offset, val); - - return val; -} - -uint32_t -bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset) -{ - uint32_t val; - - if ((offset % 4) != 0) { - PMD_DRV_LOG(NOTICE, "Unaligned 32-bit read from 0x%08lx", - (unsigned long)offset); - return 0; - } - - val = (uint32_t)(*((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))); - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", (unsigned long)offset, val); - - return val; -} diff --git a/src/dpdk/drivers/net/bnx2x/elink.c b/src/dpdk/drivers/net/bnx2x/elink.c index 149cc975..53293962 100644 --- a/src/dpdk/drivers/net/bnx2x/elink.c +++ b/src/dpdk/drivers/net/bnx2x/elink.c @@ -1586,26 +1586,6 @@ static elink_status_t elink_emac_enable(struct elink_params *params, /* enable emac and not bmac */ REG_WR(sc, NIG_REG_EGRESS_EMAC0_PORT + port * 4, 1); -#ifdef ELINK_INCLUDE_EMUL - /* for paladium */ - if (CHIP_REV_IS_EMUL(sc)) { - /* Use lane 1 (of lanes 0-3) */ - REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, 1); - REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 1); - } - /* for fpga */ - else -#endif -#ifdef ELINK_INCLUDE_FPGA - if (CHIP_REV_IS_FPGA(sc)) { - /* Use lane 1 (of lanes 0-3) */ - PMD_DRV_LOG(DEBUG, "elink_emac_enable: Setting FPGA"); - - REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, 1); - REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 0); - } else -#endif - /* ASIC */ if (vars->phy_flags & PHY_XGXS_FLAG) { uint32_t ser_lane = ((params->lane_config & PORT_HW_CFG_LANE_SWAP_CFG_MASTER_MASK) >> @@ -1628,39 +1608,28 @@ static elink_status_t elink_emac_enable(struct elink_params *params, elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE, EMAC_TX_MODE_RESET); -#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA) - if (CHIP_REV_IS_SLOW(sc)) { - /* config GMII mode */ - val = REG_RD(sc, emac_base + EMAC_REG_EMAC_MODE); - elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_MODE, - (val | EMAC_MODE_PORT_GMII)); - } else { /* ASIC */ -#endif - /* pause enable/disable */ - elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_RX_MODE, - EMAC_RX_MODE_FLOW_EN); + /* pause enable/disable */ + elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_RX_MODE, + EMAC_RX_MODE_FLOW_EN); - elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_TX_MODE, - (EMAC_TX_MODE_EXT_PAUSE_EN | - EMAC_TX_MODE_FLOW_EN)); - if (!(params->feature_config_flags & - ELINK_FEATURE_CONFIG_PFC_ENABLED)) { - if (vars->flow_ctrl & ELINK_FLOW_CTRL_RX) - elink_bits_en(sc, emac_base + - EMAC_REG_EMAC_RX_MODE, - EMAC_RX_MODE_FLOW_EN); - - if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX) - elink_bits_en(sc, emac_base + - EMAC_REG_EMAC_TX_MODE, - (EMAC_TX_MODE_EXT_PAUSE_EN | - EMAC_TX_MODE_FLOW_EN)); - } else - elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE, - EMAC_TX_MODE_FLOW_EN); -#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA) - } -#endif + elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_TX_MODE, + (EMAC_TX_MODE_EXT_PAUSE_EN | + EMAC_TX_MODE_FLOW_EN)); + if (!(params->feature_config_flags & + ELINK_FEATURE_CONFIG_PFC_ENABLED)) { + if (vars->flow_ctrl & ELINK_FLOW_CTRL_RX) + elink_bits_en(sc, emac_base + + EMAC_REG_EMAC_RX_MODE, + EMAC_RX_MODE_FLOW_EN); + + if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX) + elink_bits_en(sc, emac_base + + EMAC_REG_EMAC_TX_MODE, + (EMAC_TX_MODE_EXT_PAUSE_EN | + EMAC_TX_MODE_FLOW_EN)); + } else + elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE, + EMAC_TX_MODE_FLOW_EN); /* KEEP_VLAN_TAG, promiscuous */ val = REG_RD(sc, emac_base + EMAC_REG_EMAC_RX_MODE); @@ -1727,17 +1696,7 @@ static elink_status_t elink_emac_enable(struct elink_params *params, REG_WR(sc, NIG_REG_EMAC0_PAUSE_OUT_EN + port * 4, val); REG_WR(sc, NIG_REG_EGRESS_EMAC0_OUT_EN + port * 4, 0x1); -#ifdef ELINK_INCLUDE_EMUL - if (CHIP_REV_IS_EMUL(sc)) { - /* Take the BigMac out of reset */ - REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, - (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port)); - - /* Enable access for bmac registers */ - REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x1); - } else -#endif - REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x0); + REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x0); vars->mac_type = ELINK_MAC_TYPE_EMAC; return ELINK_STATUS_OK; @@ -2137,15 +2096,6 @@ static elink_status_t elink_bmac1_enable(struct elink_params *params, wb_data[1] = 0; REG_WR_DMAE(sc, bmac_addr + BIGMAC_REGISTER_RX_LLFC_MSG_FLDS, wb_data, 2); -#ifdef ELINK_INCLUDE_EMUL - /* Fix for emulation */ - if (CHIP_REV_IS_EMUL(sc)) { - wb_data[0] = 0xf000; - wb_data[1] = 0; - REG_WR_DMAE(sc, bmac_addr + BIGMAC_REGISTER_TX_PAUSE_THRESHOLD, - wb_data, 2); - } -#endif return ELINK_STATUS_OK; } @@ -5922,11 +5872,6 @@ elink_status_t elink_set_led(struct elink_params *params, params, mode); } } -#ifdef ELINK_INCLUDE_EMUL - if (params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC) - return rc; -#endif switch (mode) { case ELINK_LED_MODE_FRONT_PANEL_OFF: @@ -6645,7 +6590,7 @@ static elink_status_t elink_8073_8727_external_rom_boot(struct bnx2x_softc *sc, uint8_t port) { uint32_t count = 0; - uint16_t fw_ver1, fw_msgout; + uint16_t fw_ver1 = 0, fw_msgout; elink_status_t rc = ELINK_STATUS_OK; /* Boot port from external ROM */ @@ -11671,10 +11616,7 @@ elink_status_t elink_phy_probe(struct elink_params * params) struct elink_phy *phy; params->num_phys = 0; PMD_DRV_LOG(DEBUG, "Begin phy probe"); -#ifdef ELINK_INCLUDE_EMUL - if (CHIP_REV_IS_EMUL(sc)) - return ELINK_STATUS_OK; -#endif + phy_config_swapped = params->multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED; @@ -11739,182 +11681,6 @@ elink_status_t elink_phy_probe(struct elink_params * params) return ELINK_STATUS_OK; } -#ifdef ELINK_INCLUDE_EMUL -static elink_status_t elink_init_e3_emul_mac(struct elink_params *params, - struct elink_vars *vars) -{ - struct bnx2x_softc *sc = params->sc; - vars->line_speed = params->req_line_speed[0]; - /* In case link speed is auto, set speed the highest as possible */ - if (params->req_line_speed[0] == ELINK_SPEED_AUTO_NEG) { - if (params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC) - vars->line_speed = ELINK_SPEED_2500; - else if (elink_is_4_port_mode(sc)) - vars->line_speed = ELINK_SPEED_10000; - else - vars->line_speed = ELINK_SPEED_20000; - } - if (vars->line_speed < ELINK_SPEED_10000) { - if ((params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC)) { - PMD_DRV_LOG(DEBUG, "Invalid line speed %d while UMAC is" - " disabled!", params->req_line_speed[0]); - return ELINK_STATUS_ERROR; - } - switch (vars->line_speed) { - case ELINK_SPEED_10: - vars->link_status = ELINK_LINK_10TFD; - break; - case ELINK_SPEED_100: - vars->link_status = ELINK_LINK_100TXFD; - break; - case ELINK_SPEED_1000: - vars->link_status = ELINK_LINK_1000TFD; - break; - case ELINK_SPEED_2500: - vars->link_status = ELINK_LINK_2500TFD; - break; - default: - PMD_DRV_LOG(DEBUG, "Invalid line speed %d for UMAC", - vars->line_speed); - return ELINK_STATUS_ERROR; - } - vars->link_status |= LINK_STATUS_LINK_UP; - - if (params->loopback_mode == ELINK_LOOPBACK_UMAC) - elink_umac_enable(params, vars, 1); - else - elink_umac_enable(params, vars, 0); - } else { - /* Link speed >= 10000 requires XMAC enabled */ - if (params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC) { - PMD_DRV_LOG(DEBUG, "Invalid line speed %d while XMAC is" - " disabled!", params->req_line_speed[0]); - return ELINK_STATUS_ERROR; - } - /* Check link speed */ - switch (vars->line_speed) { - case ELINK_SPEED_10000: - vars->link_status = ELINK_LINK_10GTFD; - break; - case ELINK_SPEED_20000: - vars->link_status = ELINK_LINK_20GTFD; - break; - default: - PMD_DRV_LOG(DEBUG, "Invalid line speed %d for XMAC", - vars->line_speed); - return ELINK_STATUS_ERROR; - } - vars->link_status |= LINK_STATUS_LINK_UP; - if (params->loopback_mode == ELINK_LOOPBACK_XMAC) - elink_xmac_enable(params, vars, 1); - else - elink_xmac_enable(params, vars, 0); - } - return ELINK_STATUS_OK; -} - -static elink_status_t elink_init_emul(struct elink_params *params, - struct elink_vars *vars) -{ - struct bnx2x_softc *sc = params->sc; - if (CHIP_IS_E3(sc)) { - if (elink_init_e3_emul_mac(params, vars) != ELINK_STATUS_OK) - return ELINK_STATUS_ERROR; - } else { - if (params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC) { - vars->line_speed = ELINK_SPEED_1000; - vars->link_status = (LINK_STATUS_LINK_UP | - ELINK_LINK_1000XFD); - if (params->loopback_mode == ELINK_LOOPBACK_EMAC) - elink_emac_enable(params, vars, 1); - else - elink_emac_enable(params, vars, 0); - } else { - vars->line_speed = ELINK_SPEED_10000; - vars->link_status = (LINK_STATUS_LINK_UP | - ELINK_LINK_10GTFD); - if (params->loopback_mode == ELINK_LOOPBACK_BMAC) - elink_bmac_enable(params, vars, 1, 1); - else - elink_bmac_enable(params, vars, 0, 1); - } - } - vars->link_up = 1; - vars->duplex = DUPLEX_FULL; - vars->flow_ctrl = ELINK_FLOW_CTRL_NONE; - - if (CHIP_IS_E1x(sc)) - elink_pbf_update(params, vars->flow_ctrl, vars->line_speed); - /* Disable drain */ - REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0); - - /* update shared memory */ - elink_update_mng(params, vars->link_status); - return ELINK_STATUS_OK; -} -#endif -#ifdef ELINK_INCLUDE_FPGA -static elink_status_t elink_init_fpga(struct elink_params *params, - struct elink_vars *vars) -{ - /* Enable on E1.5 FPGA */ - struct bnx2x_softc *sc = params->sc; - vars->duplex = DUPLEX_FULL; - vars->flow_ctrl = ELINK_FLOW_CTRL_NONE; - vars->flow_ctrl = (ELINK_FLOW_CTRL_TX | ELINK_FLOW_CTRL_RX); - vars->link_status |= (LINK_STATUS_TX_FLOW_CONTROL_ENABLED | - LINK_STATUS_RX_FLOW_CONTROL_ENABLED); - if (CHIP_IS_E3(sc)) { - vars->line_speed = params->req_line_speed[0]; - switch (vars->line_speed) { - case ELINK_SPEED_AUTO_NEG: - vars->line_speed = ELINK_SPEED_2500; - case ELINK_SPEED_2500: - vars->link_status = ELINK_LINK_2500TFD; - break; - case ELINK_SPEED_1000: - vars->link_status = ELINK_LINK_1000XFD; - break; - case ELINK_SPEED_100: - vars->link_status = ELINK_LINK_100TXFD; - break; - case ELINK_SPEED_10: - vars->link_status = ELINK_LINK_10TFD; - break; - default: - PMD_DRV_LOG(DEBUG, "Invalid link speed %d", - params->req_line_speed[0]); - return ELINK_STATUS_ERROR; - } - vars->link_status |= LINK_STATUS_LINK_UP; - if (params->loopback_mode == ELINK_LOOPBACK_UMAC) - elink_umac_enable(params, vars, 1); - else - elink_umac_enable(params, vars, 0); - } else { - vars->line_speed = ELINK_SPEED_10000; - vars->link_status = (LINK_STATUS_LINK_UP | ELINK_LINK_10GTFD); - if (params->loopback_mode == ELINK_LOOPBACK_EMAC) - elink_emac_enable(params, vars, 1); - else - elink_emac_enable(params, vars, 0); - } - vars->link_up = 1; - - if (CHIP_IS_E1x(sc)) - elink_pbf_update(params, vars->flow_ctrl, vars->line_speed); - /* Disable drain */ - REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0); - - /* Update shared memory */ - elink_update_mng(params, vars->link_status); - return ELINK_STATUS_OK; -} -#endif static void elink_init_bmac_loopback(struct elink_params *params, struct elink_vars *vars) { @@ -12236,12 +12002,8 @@ elink_status_t elink_phy_init(struct elink_params *params, ELINK_NIG_MASK_XGXS0_LINK10G | ELINK_NIG_MASK_SERDES0_LINK_STATUS | ELINK_NIG_MASK_MI_INT)); -#ifdef ELINK_INCLUDE_EMUL - if (!(params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC)) -#endif - elink_emac_init(params); + elink_emac_init(params); if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED) vars->link_status |= LINK_STATUS_PFC_ENABLED; @@ -12253,45 +12015,36 @@ elink_status_t elink_phy_init(struct elink_params *params, set_phy_vars(params, vars); PMD_DRV_LOG(DEBUG, "Num of phys on board: %d", params->num_phys); -#ifdef ELINK_INCLUDE_FPGA - if (CHIP_REV_IS_FPGA(sc)) { - return elink_init_fpga(params, vars); - } else -#endif -#ifdef ELINK_INCLUDE_EMUL - if (CHIP_REV_IS_EMUL(sc)) { - return elink_init_emul(params, vars); - } else -#endif - switch (params->loopback_mode) { - case ELINK_LOOPBACK_BMAC: - elink_init_bmac_loopback(params, vars); - break; - case ELINK_LOOPBACK_EMAC: - elink_init_emac_loopback(params, vars); - break; - case ELINK_LOOPBACK_XMAC: - elink_init_xmac_loopback(params, vars); - break; - case ELINK_LOOPBACK_UMAC: - elink_init_umac_loopback(params, vars); - break; - case ELINK_LOOPBACK_XGXS: - case ELINK_LOOPBACK_EXT_PHY: - elink_init_xgxs_loopback(params, vars); - break; - default: - if (!CHIP_IS_E3(sc)) { - if (params->switch_cfg == ELINK_SWITCH_CFG_10G) - elink_xgxs_deassert(params); - else - elink_serdes_deassert(sc, params->port); - } - elink_link_initialize(params, vars); - DELAY(1000 * 30); - elink_link_int_enable(params); - break; + + switch (params->loopback_mode) { + case ELINK_LOOPBACK_BMAC: + elink_init_bmac_loopback(params, vars); + break; + case ELINK_LOOPBACK_EMAC: + elink_init_emac_loopback(params, vars); + break; + case ELINK_LOOPBACK_XMAC: + elink_init_xmac_loopback(params, vars); + break; + case ELINK_LOOPBACK_UMAC: + elink_init_umac_loopback(params, vars); + break; + case ELINK_LOOPBACK_XGXS: + case ELINK_LOOPBACK_EXT_PHY: + elink_init_xgxs_loopback(params, vars); + break; + default: + if (!CHIP_IS_E3(sc)) { + if (params->switch_cfg == ELINK_SWITCH_CFG_10G) + elink_xgxs_deassert(params); + else + elink_serdes_deassert(sc, params->port); } + elink_link_initialize(params, vars); + DELAY(1000 * 30); + elink_link_int_enable(params); + break; + } elink_update_mng(params, vars->link_status); elink_update_mng_eee(params, vars->eee_status); @@ -12325,22 +12078,12 @@ static elink_status_t elink_link_reset(struct elink_params *params, REG_WR(sc, NIG_REG_BMAC0_OUT_EN + port * 4, 0); REG_WR(sc, NIG_REG_EGRESS_EMAC0_OUT_EN + port * 4, 0); } -#ifdef ELINK_INCLUDE_EMUL - /* Stop BigMac rx */ - if (!(params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC)) -#endif - if (!CHIP_IS_E3(sc)) - elink_set_bmac_rx(sc, port, 0); -#ifdef ELINK_INCLUDE_EMUL - /* Stop XMAC/UMAC rx */ - if (!(params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC)) -#endif - if (CHIP_IS_E3(sc) && !CHIP_REV_IS_FPGA(sc)) { - elink_set_xmac_rxtx(params, 0); - elink_set_umac_rxtx(params, 0); - } + if (!CHIP_IS_E3(sc)) + elink_set_bmac_rx(sc, port, 0); + if (CHIP_IS_E3(sc) && !CHIP_REV_IS_FPGA(sc)) { + elink_set_xmac_rxtx(params, 0); + elink_set_umac_rxtx(params, 0); + } /* Disable emac */ if (!CHIP_IS_E3(sc)) REG_WR(sc, NIG_REG_NIG_EMAC0_EN + port * 4, 0); @@ -12376,14 +12119,11 @@ static elink_status_t elink_link_reset(struct elink_params *params, elink_bits_dis(sc, NIG_REG_LATCH_BC_0 + port * 4, 1 << ELINK_NIG_LATCH_BC_ENABLE_MI_INT); } -#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA) - if (!CHIP_REV_IS_SLOW(sc)) -#endif - if (params->phy[ELINK_INT_PHY].link_reset) - params->phy[ELINK_INT_PHY].link_reset(¶ms-> - phy - [ELINK_INT_PHY], - params); + if (params->phy[ELINK_INT_PHY].link_reset) + params->phy[ELINK_INT_PHY].link_reset(¶ms-> + phy + [ELINK_INT_PHY], + params); /* Disable nig ingress interface */ if (!CHIP_IS_E3(sc)) { @@ -12868,10 +12608,6 @@ elink_status_t elink_common_init_phy(struct bnx2x_softc * sc, uint32_t phy_ver, val; uint8_t phy_index = 0; uint32_t ext_phy_type, ext_phy_config; -#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA) - if (CHIP_REV_IS_EMUL(sc) || CHIP_REV_IS_FPGA(sc)) - return ELINK_STATUS_OK; -#endif elink_set_mdio_clk(sc, GRCBASE_EMAC0); elink_set_mdio_clk(sc, GRCBASE_EMAC1); diff --git a/src/dpdk/drivers/net/bnx2x/elink.h b/src/dpdk/drivers/net/bnx2x/elink.h index c4f886a7..9401b7cd 100644 --- a/src/dpdk/drivers/net/bnx2x/elink.h +++ b/src/dpdk/drivers/net/bnx2x/elink.h @@ -359,10 +359,6 @@ struct elink_params { #define ELINK_FEATURE_CONFIG_PFC_ENABLED (1<<1) #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_OPT_MDL_VRFY (1<<2) #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_DUAL_PHY_OPT_MDL_VRFY (1<<3) -#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC (1<<4) -#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC (1<<5) -#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC (1<<6) -#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC (1<<7) #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_AFEX (1<<8) #define ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED (1<<9) #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_SFP_TX_DISABLED (1<<10) diff --git a/src/dpdk/drivers/net/bonding/rte_eth_bond_api.c b/src/dpdk/drivers/net/bonding/rte_eth_bond_api.c index 203ebe9e..f552d969 100644 --- a/src/dpdk/drivers/net/bonding/rte_eth_bond_api.c +++ b/src/dpdk/drivers/net/bonding/rte_eth_bond_api.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "rte_eth_bond.h" #include "rte_eth_bond_private.h" @@ -44,8 +45,6 @@ #define DEFAULT_POLLING_INTERVAL_10_MS (10) -const char pmd_bond_driver_name[] = "rte_bond_pmd"; - int check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev) { @@ -54,7 +53,7 @@ check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev) return -1; /* return 0 if driver name matches */ - return eth_dev->data->drv_name != pmd_bond_driver_name; + return eth_dev->data->drv_name != pmd_bond_drv.driver.name; } int @@ -166,6 +165,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) { struct bond_dev_private *internals = NULL; struct rte_eth_dev *eth_dev = NULL; + uint32_t vlan_filter_bmp_size; /* now do all data allocation - for eth_dev structure, dummy pci driver * and internal (private) data @@ -189,7 +189,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) } /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) { RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); goto err; @@ -199,10 +199,6 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) eth_dev->data->nb_rx_queues = (uint16_t)1; eth_dev->data->nb_tx_queues = (uint16_t)1; - TAILQ_INIT(&(eth_dev->link_intr_cbs)); - - eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; - eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0, socket_id); if (eth_dev->data->mac_addrs == NULL) { @@ -210,17 +206,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) goto err; } - eth_dev->data->dev_started = 0; - eth_dev->data->promiscuous = 0; - eth_dev->data->scattered_rx = 0; - eth_dev->data->all_multicast = 0; - eth_dev->dev_ops = &default_dev_ops; eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_DETACHABLE; eth_dev->driver = NULL; eth_dev->data->kdrv = RTE_KDRV_NONE; - eth_dev->data->drv_name = pmd_bond_driver_name; + eth_dev->data->drv_name = pmd_bond_drv.driver.name; eth_dev->data->numa_node = socket_id; rte_spinlock_init(&internals->lock); @@ -260,6 +251,27 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) goto err; } + vlan_filter_bmp_size = + rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1); + internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size, + RTE_CACHE_LINE_SIZE); + if (internals->vlan_filter_bmpmem == NULL) { + RTE_BOND_LOG(ERR, + "Failed to allocate vlan bitmap for bonded device %u\n", + eth_dev->data->port_id); + goto err; + } + + internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1, + internals->vlan_filter_bmpmem, vlan_filter_bmp_size); + if (internals->vlan_filter_bmp == NULL) { + RTE_BOND_LOG(ERR, + "Failed to init vlan bitmap for bonded device %u\n", + eth_dev->data->port_id); + rte_free(internals->vlan_filter_bmpmem); + goto err; + } + return eth_dev->data->port_id; err: @@ -299,6 +311,9 @@ rte_eth_bond_free(const char *name) eth_dev->rx_pkt_burst = NULL; eth_dev->tx_pkt_burst = NULL; + internals = eth_dev->data->dev_private; + rte_bitmap_free(internals->vlan_filter_bmp); + rte_free(internals->vlan_filter_bmpmem); rte_free(eth_dev->data->dev_private); rte_free(eth_dev->data->mac_addrs); @@ -307,6 +322,46 @@ rte_eth_bond_free(const char *name) return 0; } +static int +slave_vlan_filter_set(uint8_t bonded_port_id, uint8_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + int found; + int res = 0; + uint64_t slab = 0; + uint32_t pos = 0; + uint16_t first; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + if (bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter == 0) + return 0; + + internals = bonded_eth_dev->data->dev_private; + found = rte_bitmap_scan(internals->vlan_filter_bmp, &pos, &slab); + first = pos; + + if (!found) + return 0; + + do { + uint32_t i; + uint64_t mask; + + for (i = 0, mask = 1; + i < RTE_BITMAP_SLAB_BIT_SIZE; + i ++, mask <<= 1) { + if (unlikely(slab & mask)) + res = rte_eth_dev_vlan_filter(slave_port_id, + (uint16_t)pos, 1); + } + found = rte_bitmap_scan(internals->vlan_filter_bmp, + &pos, &slab); + } while (found && first != pos && res == 0); + + return res; +} + static int __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) { @@ -373,21 +428,6 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) internals->candidate_max_rx_pktlen = dev_info.max_rx_pktlen; } else { - /* Check slave link properties are supported if props are set, - * all slaves must be the same */ - if (internals->link_props_set) { - if (link_properties_valid(&(bonded_eth_dev->data->dev_link), - &(slave_eth_dev->data->dev_link))) { - slave_eth_dev->data->dev_flags &= (~RTE_ETH_DEV_BONDED_SLAVE); - RTE_BOND_LOG(ERR, - "Slave port %d link speed/duplex not supported", - slave_port_id); - return -1; - } - } else { - link_properties_set(bonded_eth_dev, - &(slave_eth_dev->data->dev_link)); - } internals->rx_offload_capa &= dev_info.rx_offload_capa; internals->tx_offload_capa &= dev_info.tx_offload_capa; internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads; @@ -442,6 +482,9 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) activate_slave(bonded_eth_dev, slave_port_id); } } + + slave_vlan_filter_set(bonded_port_id, slave_port_id); + return 0; } diff --git a/src/dpdk/drivers/net/bonding/rte_eth_bond_args.c b/src/dpdk/drivers/net/bonding/rte_eth_bond_args.c index 02ecde64..3dca273d 100644 --- a/src/dpdk/drivers/net/bonding/rte_eth_bond_args.c +++ b/src/dpdk/drivers/net/bonding/rte_eth_bond_args.c @@ -54,15 +54,23 @@ const char *pmd_bond_init_valid_arguments[] = { static inline int find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr) { + struct rte_pci_device *pci_dev; struct rte_pci_addr *eth_pci_addr; unsigned i; for (i = 0; i < rte_eth_dev_count(); i++) { - if (rte_eth_devices[i].pci_dev == NULL) + /* Currently populated by rte_eth_copy_pci_info(). + * + * TODO: Once the PCI bus has arrived we should have a better + * way to test for being a PCI device or not. + */ + if (rte_eth_devices[i].data->kdrv == RTE_KDRV_UNKNOWN || + rte_eth_devices[i].data->kdrv == RTE_KDRV_NONE) continue; - eth_pci_addr = &(rte_eth_devices[i].pci_dev->addr); + pci_dev = RTE_DEV_TO_PCI(rte_eth_devices[i].device); + eth_pci_addr = &pci_dev->addr; if (pci_addr->bus == eth_pci_addr->bus && pci_addr->devid == eth_pci_addr->devid && diff --git a/src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c b/src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c index b20a2729..f3ac9e27 100644 --- a/src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include @@ -122,6 +122,15 @@ bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs, bd_rx_q->queue_id, bufs, nb_pkts); } +static inline uint8_t +is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci) +{ + const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW); + + return !vlan_tci && (ethertype == ether_type_slow_be && + (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP)); +} + static uint16_t bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) @@ -141,6 +150,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint8_t collecting; /* current slave collecting status */ const uint8_t promisc = internals->promiscuous_en; uint8_t i, j, k; + uint8_t subtype; rte_eth_macaddr_get(internals->port_id, &bond_mac); /* Copy slave list to protect against slave up/down changes during tx @@ -166,10 +176,12 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *); + subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype; + /* Remove packet from array if it is slow packet or slave is not * in collecting state or bondign interface is not in promiscus * mode and packet address does not match. */ - if (unlikely(hdr->ether_type == ether_type_slow_be || + if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) || !collecting || (!promisc && !is_multicast_ether_addr(&hdr->d_addr) && !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) { @@ -888,7 +900,6 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) } num_tx_total += num_send; - num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send; } return num_tx_total; @@ -1305,8 +1316,6 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, struct bond_rx_queue *bd_rx_q; struct bond_tx_queue *bd_tx_q; - uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues; - uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues; int errval; uint16_t q_id; @@ -1335,6 +1344,9 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, bonded_eth_dev->data->dev_conf.rxmode.mq_mode; } + slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter = + bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter; + /* Configure device */ errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, bonded_eth_dev->data->nb_rx_queues, @@ -1347,9 +1359,7 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, } /* Setup Rx Queues */ - /* Use existing queues, if any */ - for (q_id = old_nb_rx_queues; - q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { + for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id]; errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, @@ -1365,9 +1375,7 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, } /* Setup Tx Queues */ - /* Use existing queues, if any */ - for (q_id = old_nb_tx_queues; - q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { + for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id]; errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, @@ -1439,6 +1447,9 @@ slave_remove(struct bond_dev_private *internals, (internals->slave_count - i - 1)); internals->slave_count--; + + /* force reconfiguration of slave interfaces */ + _rte_eth_dev_reset(slave_eth_dev); } static void @@ -1637,7 +1648,10 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) void bond_ethdev_close(struct rte_eth_dev *dev) { + struct bond_dev_private *internals = dev->data->dev_private; + bond_ethdev_free_queues(dev); + rte_bitmap_reset(internals->vlan_filter_bmp); } /* forward declaration */ @@ -1657,7 +1671,6 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_tx_queues = (uint16_t)512; dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; dev_info->rx_offload_capa = internals->rx_offload_capa; dev_info->tx_offload_capa = internals->tx_offload_capa; @@ -1666,6 +1679,35 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->reta_size = internals->reta_size; } +static int +bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) +{ + int res; + uint8_t i; + struct bond_dev_private *internals = dev->data->dev_private; + + /* don't do this while a slave is being added */ + rte_spinlock_lock(&internals->lock); + + if (on) + rte_bitmap_set(internals->vlan_filter_bmp, vlan_id); + else + rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id); + + for (i = 0; i < internals->slave_count; i++) { + uint8_t port_id = internals->slaves[i].port_id; + + res = rte_eth_dev_vlan_filter(port_id, vlan_id, on); + if (res == ENOTSUP) + RTE_LOG(WARNING, PMD, + "Setting VLAN filter on slave port %u not supported.\n", + port_id); + } + + rte_spinlock_unlock(&internals->lock); + return 0; +} + static int bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, uint16_t nb_rx_desc, unsigned int socket_id __rte_unused, @@ -1923,7 +1965,7 @@ bond_ethdev_delayed_lsc_propagation(void *arg) return; _rte_eth_dev_callback_process((struct rte_eth_dev *)arg, - RTE_ETH_EVENT_INTR_LSC); + RTE_ETH_EVENT_INTR_LSC, NULL); } void @@ -1985,6 +2027,16 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, /* Inherit eth dev link properties from first active slave */ link_properties_set(bonded_eth_dev, &(slave_eth_dev->data->dev_link)); + } else { + if (link_properties_valid( + &bonded_eth_dev->data->dev_link, &link) != 0) { + slave_eth_dev->data->dev_flags &= + (~RTE_ETH_DEV_BONDED_SLAVE); + RTE_LOG(ERR, PMD, + "port %u invalid speed/duplex\n", + port_id); + return; + } } activate_slave(bonded_eth_dev, port_id); @@ -2034,7 +2086,7 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, (void *)bonded_eth_dev); else _rte_eth_dev_callback_process(bonded_eth_dev, - RTE_ETH_EVENT_INTR_LSC); + RTE_ETH_EVENT_INTR_LSC, NULL); } else { if (internals->link_down_delay_ms > 0) @@ -2043,7 +2095,7 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, (void *)bonded_eth_dev); else _rte_eth_dev_callback_process(bonded_eth_dev, - RTE_ETH_EVENT_INTR_LSC); + RTE_ETH_EVENT_INTR_LSC, NULL); } } } @@ -2161,6 +2213,7 @@ const struct eth_dev_ops default_dev_ops = { .dev_close = bond_ethdev_close, .dev_configure = bond_ethdev_configure, .dev_infos_get = bond_ethdev_info, + .vlan_filter_set = bond_ethdev_vlan_filter_set, .rx_queue_setup = bond_ethdev_rx_queue_setup, .tx_queue_setup = bond_ethdev_tx_queue_setup, .rx_queue_release = bond_ethdev_rx_queue_release, @@ -2177,7 +2230,7 @@ const struct eth_dev_ops default_dev_ops = { }; static int -bond_init(const char *name, const char *params) +bond_probe(const char *name, const char *params) { struct bond_dev_private *internals; struct rte_kvargs *kvlist; @@ -2244,7 +2297,7 @@ parse_error: } static int -bond_uninit(const char *name) +bond_remove(const char *name) { int ret; @@ -2508,15 +2561,15 @@ bond_ethdev_configure(struct rte_eth_dev *dev) return 0; } -static struct rte_driver bond_drv = { - .type = PMD_VDEV, - .init = bond_init, - .uninit = bond_uninit, +struct rte_vdev_driver pmd_bond_drv = { + .probe = bond_probe, + .remove = bond_remove, }; -PMD_REGISTER_DRIVER(bond_drv, eth_bond); +RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv); +RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond); -DRIVER_REGISTER_PARAM_STRING(eth_bond, +RTE_PMD_REGISTER_PARAM_STRING(net_bonding, "slave= " "primary= " "mode=[0-6] " diff --git a/src/dpdk/drivers/net/bonding/rte_eth_bond_private.h b/src/dpdk/drivers/net/bonding/rte_eth_bond_private.h index 2bdc9efa..5a411e22 100644 --- a/src/dpdk/drivers/net/bonding/rte_eth_bond_private.h +++ b/src/dpdk/drivers/net/bonding/rte_eth_bond_private.h @@ -36,6 +36,7 @@ #include #include +#include #include "rte_eth_bond.h" #include "rte_eth_bond_8023ad_private.h" @@ -62,7 +63,7 @@ extern const char *pmd_bond_init_valid_arguments[]; -extern const char pmd_bond_driver_name[]; +extern struct rte_vdev_driver pmd_bond_drv; /** Port Queue Mapping Structure */ struct bond_rx_queue { @@ -172,6 +173,9 @@ struct bond_dev_private { uint32_t candidate_max_rx_pktlen; uint32_t max_rx_pktlen; + + void *vlan_filter_bmpmem; /* enabled vlan filter bitmap */ + struct rte_bitmap *vlan_filter_bmp; }; extern const struct eth_dev_ops default_dev_ops; diff --git a/src/dpdk/drivers/net/cxgbe/base/adapter.h b/src/dpdk/drivers/net/cxgbe/base/adapter.h index 5e3bd509..beb1e3e6 100644 --- a/src/dpdk/drivers/net/cxgbe/base/adapter.h +++ b/src/dpdk/drivers/net/cxgbe/base/adapter.h @@ -37,6 +37,7 @@ #define __T4_ADAPTER_H__ #include +#include #include "cxgbe_compat.h" #include "t4_regs_values.h" @@ -324,7 +325,7 @@ struct adapter { int use_unpacked_mode; /* unpacked rx mode state */ }; -#define CXGBE_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define CXGBE_PCI_REG(reg) rte_read32(reg) static inline uint64_t cxgbe_read_addr64(volatile void *addr) { @@ -350,16 +351,21 @@ static inline uint32_t cxgbe_read_addr(volatile void *addr) #define CXGBE_READ_REG64(adap, reg) \ cxgbe_read_addr64(CXGBE_PCI_REG_ADDR((adap), (reg))) -#define CXGBE_PCI_REG_WRITE(reg, value) ({ \ - CXGBE_PCI_REG((reg)) = (value); }) +#define CXGBE_PCI_REG_WRITE(reg, value) rte_write32((value), (reg)) + +#define CXGBE_PCI_REG_WRITE_RELAXED(reg, value) \ + rte_write32_relaxed((value), (reg)) #define CXGBE_WRITE_REG(adap, reg, value) \ CXGBE_PCI_REG_WRITE(CXGBE_PCI_REG_ADDR((adap), (reg)), (value)) +#define CXGBE_WRITE_REG_RELAXED(adap, reg, value) \ + CXGBE_PCI_REG_WRITE_RELAXED(CXGBE_PCI_REG_ADDR((adap), (reg)), (value)) + static inline uint64_t cxgbe_write_addr64(volatile void *addr, uint64_t val) { - CXGBE_PCI_REG(addr) = val; - CXGBE_PCI_REG(((volatile uint8_t *)(addr) + 4)) = (val >> 32); + CXGBE_PCI_REG_WRITE(addr, val); + CXGBE_PCI_REG_WRITE(((volatile uint8_t *)(addr) + 4), (val >> 32)); return val; } @@ -383,7 +389,7 @@ static inline u32 t4_read_reg(struct adapter *adapter, u32 reg_addr) } /** - * t4_write_reg - write a HW register + * t4_write_reg - write a HW register with barrier * @adapter: the adapter * @reg_addr: the register address * @val: the value to write @@ -397,6 +403,22 @@ static inline void t4_write_reg(struct adapter *adapter, u32 reg_addr, u32 val) CXGBE_WRITE_REG(adapter, reg_addr, val); } +/** + * t4_write_reg_relaxed - write a HW register with no barrier + * @adapter: the adapter + * @reg_addr: the register address + * @val: the value to write + * + * Write a 32-bit value into the given HW register. + */ +static inline void t4_write_reg_relaxed(struct adapter *adapter, u32 reg_addr, + u32 val) +{ + CXGBE_DEBUG_REG(adapter, "setting register 0x%x to 0x%x\n", reg_addr, + val); + CXGBE_WRITE_REG_RELAXED(adapter, reg_addr, val); +} + /** * t4_read_reg64 - read a 64-bit HW register * @adapter: the adapter diff --git a/src/dpdk/drivers/net/cxgbe/base/t4_hw.c b/src/dpdk/drivers/net/cxgbe/base/t4_hw.c index 7e79adf6..c089b068 100644 --- a/src/dpdk/drivers/net/cxgbe/base/t4_hw.c +++ b/src/dpdk/drivers/net/cxgbe/base/t4_hw.c @@ -1532,7 +1532,7 @@ int t4_seeprom_write(struct adapter *adapter, u32 addr, u32 data) { unsigned int base = adapter->params.pci.vpd_cap_addr; int ret; - u32 stats_reg; + u32 stats_reg = 0; int max_poll; /* VPD Accesses must alway be 4-byte aligned! diff --git a/src/dpdk/drivers/net/cxgbe/cxgbe_compat.h b/src/dpdk/drivers/net/cxgbe/cxgbe_compat.h index e68f8f59..1551cbf5 100644 --- a/src/dpdk/drivers/net/cxgbe/cxgbe_compat.h +++ b/src/dpdk/drivers/net/cxgbe/cxgbe_compat.h @@ -45,6 +45,7 @@ #include #include #include +#include #define dev_printf(level, fmt, args...) \ RTE_LOG(level, PMD, "rte_cxgbe_pmd: " fmt, ## args) @@ -254,7 +255,7 @@ static inline unsigned long ilog2(unsigned long n) static inline void writel(unsigned int val, volatile void __iomem *addr) { - *(volatile unsigned int *)addr = val; + rte_write32(val, addr); } static inline void writeq(u64 val, volatile void __iomem *addr) @@ -263,4 +264,9 @@ static inline void writeq(u64 val, volatile void __iomem *addr) writel(val >> 32, (void *)((uintptr_t)addr + 4)); } +static inline void writel_relaxed(unsigned int val, volatile void __iomem *addr) +{ + rte_write32_relaxed(val, addr); +} + #endif /* _CXGBE_COMPAT_H_ */ diff --git a/src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c b/src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c index 9208a615..4d543a7f 100644 --- a/src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c +++ b/src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c @@ -68,7 +68,7 @@ * Macros needed to support the PCI Device ID Table ... */ #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ - static struct rte_pci_id cxgb4_pci_tbl[] = { + static const struct rte_pci_id cxgb4_pci_tbl[] = { #define CH_PCI_DEVICE_ID_FUNCTION 0x4 #define PCI_VENDOR_ID_CHELSIO 0x1425 @@ -147,6 +147,8 @@ static void cxgbe_dev_info_get(struct rte_eth_dev *eth_dev, .nb_align = 1, }; + device_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device); + device_info->min_rx_bufsize = CXGBE_MIN_RX_BUFSIZE; device_info->max_rx_pktlen = CXGBE_MAX_RX_PKTLEN; device_info->max_rx_queues = max_queues; @@ -1005,7 +1007,7 @@ static int eth_cxgbe_dev_init(struct rte_eth_dev *eth_dev) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; - pci_dev = eth_dev->pci_dev; + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); snprintf(name, sizeof(name), "cxgbeadapter%d", eth_dev->data->port_id); adapter = rte_zmalloc(name, sizeof(*adapter), 0); @@ -1039,33 +1041,15 @@ out_free_adapter: static struct eth_driver rte_cxgbe_pmd = { .pci_drv = { - .name = "rte_cxgbe_pmd", .id_table = cxgb4_pci_tbl, .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_cxgbe_dev_init, .dev_private_size = sizeof(struct port_info), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI CXGBE devices. - */ -static int rte_cxgbe_pmd_init(const char *name __rte_unused, - const char *params __rte_unused) -{ - CXGBE_FUNC_TRACE(); - - rte_eth_driver_register(&rte_cxgbe_pmd); - return 0; -} - -static struct rte_driver rte_cxgbe_driver = { - .type = PMD_PDEV, - .init = rte_cxgbe_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_cxgbe_driver, cxgb4); -DRIVER_REGISTER_PCI_TABLE(cxgb4, cxgb4_pci_tbl); - +RTE_PMD_REGISTER_PCI(net_cxgbe, rte_cxgbe_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_cxgbe, cxgb4_pci_tbl); +RTE_PMD_REGISTER_KMOD_DEP(net_cxgbe, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/cxgbe/cxgbe_main.c b/src/dpdk/drivers/net/cxgbe/cxgbe_main.c index ceaf5ab2..541fc40e 100644 --- a/src/dpdk/drivers/net/cxgbe/cxgbe_main.c +++ b/src/dpdk/drivers/net/cxgbe/cxgbe_main.c @@ -959,7 +959,7 @@ int setup_rss(struct port_info *pi) dev_debug(adapter, "%s: pi->rss_size = %u; pi->n_rx_qsets = %u\n", __func__, pi->rss_size, pi->n_rx_qsets); - if (!pi->flags & PORT_RSS_DONE) { + if (!(pi->flags & PORT_RSS_DONE)) { if (adapter->flags & FULL_INIT_DONE) { /* Fill default values with equal distribution */ for (j = 0; j < pi->rss_size; j++) @@ -1150,7 +1150,7 @@ int cxgbe_probe(struct adapter *adapter) */ /* reserve an ethdev entry */ - pi->eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI); + pi->eth_dev = rte_eth_dev_allocate(name); if (!pi->eth_dev) goto out_free; @@ -1163,16 +1163,14 @@ int cxgbe_probe(struct adapter *adapter) pi->eth_dev->data = data; allocate_mac: - pi->eth_dev->pci_dev = adapter->pdev; + pi->eth_dev->device = &adapter->pdev->device; pi->eth_dev->data->dev_private = pi; pi->eth_dev->driver = adapter->eth_dev->driver; pi->eth_dev->dev_ops = adapter->eth_dev->dev_ops; pi->eth_dev->tx_pkt_burst = adapter->eth_dev->tx_pkt_burst; pi->eth_dev->rx_pkt_burst = adapter->eth_dev->rx_pkt_burst; - rte_eth_copy_pci_info(pi->eth_dev, pi->eth_dev->pci_dev); - - TAILQ_INIT(&pi->eth_dev->link_intr_cbs); + rte_eth_copy_pci_info(pi->eth_dev, adapter->pdev); pi->eth_dev->data->mac_addrs = rte_zmalloc(name, ETHER_ADDR_LEN, 0); diff --git a/src/dpdk/drivers/net/cxgbe/sge.c b/src/dpdk/drivers/net/cxgbe/sge.c index ab5a842a..37b60904 100644 --- a/src/dpdk/drivers/net/cxgbe/sge.c +++ b/src/dpdk/drivers/net/cxgbe/sge.c @@ -338,12 +338,12 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) * mechanism. */ if (unlikely(!q->bar2_addr)) { - t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL), - val | V_QID(q->cntxt_id)); + t4_write_reg_relaxed(adap, MYPF_REG(A_SGE_PF_KDOORBELL), + val | V_QID(q->cntxt_id)); } else { - writel(val | V_QID(q->bar2_qid), - (void *)((uintptr_t)q->bar2_addr + - SGE_UDB_KDOORBELL)); + writel_relaxed(val | V_QID(q->bar2_qid), + (void *)((uintptr_t)q->bar2_addr + + SGE_UDB_KDOORBELL)); /* * This Write memory Barrier will force the write to @@ -890,15 +890,11 @@ static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq, struct sge_txq *q = &txq->q; unsigned int flits, ndesc; unsigned char type = 0; - int credits, hw_cidx = ntohs(q->stat->cidx); - int in_use = q->pidx - hw_cidx + flits_to_desc(q->coalesce.flits); + int credits; /* use coal WR type 1 when no frags are present */ type = (mbuf->nb_segs == 1) ? 1 : 0; - if (in_use < 0) - in_use += q->size; - if (unlikely(type != q->coalesce.type && q->coalesce.idx)) ship_tx_pkt_coalesce_wr(adap, txq); @@ -1645,7 +1641,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, iq->size = cxgbe_roundup(iq->size, 16); snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - eth_dev->driver->pci_drv.name, fwevtq ? "fwq_ring" : "rx_ring", + eth_dev->driver->pci_drv.driver.name, + fwevtq ? "fwq_ring" : "rx_ring", eth_dev->data->port_id, queue_id); snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name); @@ -1697,7 +1694,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, fl->size = cxgbe_roundup(fl->size, 8); snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - eth_dev->driver->pci_drv.name, + eth_dev->driver->pci_drv.driver.name, fwevtq ? "fwq_ring" : "fl_ring", eth_dev->data->port_id, queue_id); snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name); @@ -1893,7 +1890,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - eth_dev->driver->pci_drv.name, "tx_ring", + eth_dev->driver->pci_drv.driver.name, "tx_ring", eth_dev->data->port_id, queue_id); snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name); diff --git a/src/dpdk/drivers/net/e1000/base/e1000_82575.c b/src/dpdk/drivers/net/e1000/base/e1000_82575.c index 723885d7..c6400bde 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_82575.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_82575.c @@ -100,7 +100,6 @@ STATIC s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset); STATIC s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw); STATIC s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw); -STATIC void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); STATIC void e1000_clear_vfta_i350(struct e1000_hw *hw); STATIC void e1000_i2c_start(struct e1000_hw *hw); diff --git a/src/dpdk/drivers/net/e1000/base/e1000_82575.h b/src/dpdk/drivers/net/e1000/base/e1000_82575.h index c4986841..4133cdd8 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_82575.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_82575.h @@ -492,6 +492,7 @@ enum e1000_promisc_type { void e1000_vfta_set_vf(struct e1000_hw *, u16, bool); void e1000_rlpml_set_vf(struct e1000_hw *, u16); s32 e1000_promisc_set_vf(struct e1000_hw *, enum e1000_promisc_type type); +void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); u16 e1000_rxpbs_adjust_82580(u32 data); s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data); s32 e1000_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M); diff --git a/src/dpdk/drivers/net/e1000/base/e1000_api.c b/src/dpdk/drivers/net/e1000/base/e1000_api.c index 22b96fd1..f7cf83b6 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_api.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_api.c @@ -298,6 +298,23 @@ s32 e1000_set_mac_type(struct e1000_hw *hw) case E1000_DEV_ID_PCH_I218_V3: mac->type = e1000_pch_lpt; break; + case E1000_DEV_ID_PCH_SPT_I219_LM: + case E1000_DEV_ID_PCH_SPT_I219_V: + case E1000_DEV_ID_PCH_SPT_I219_LM2: + case E1000_DEV_ID_PCH_SPT_I219_V2: + case E1000_DEV_ID_PCH_LBG_I219_LM3: + case E1000_DEV_ID_PCH_SPT_I219_LM4: + case E1000_DEV_ID_PCH_SPT_I219_V4: + case E1000_DEV_ID_PCH_SPT_I219_LM5: + case E1000_DEV_ID_PCH_SPT_I219_V5: + mac->type = e1000_pch_spt; + break; + case E1000_DEV_ID_PCH_CNP_I219_LM6: + case E1000_DEV_ID_PCH_CNP_I219_V6: + case E1000_DEV_ID_PCH_CNP_I219_LM7: + case E1000_DEV_ID_PCH_CNP_I219_V7: + mac->type = e1000_pch_cnp; + break; case E1000_DEV_ID_82575EB_COPPER: case E1000_DEV_ID_82575EB_FIBER_SERDES: case E1000_DEV_ID_82575GB_QUAD_COPPER: @@ -448,6 +465,8 @@ s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device) case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: e1000_init_function_pointers_ich8lan(hw); break; case e1000_82575: @@ -632,8 +651,6 @@ s32 e1000_reset_hw(struct e1000_hw *hw) return -E1000_ERR_CONFIG; } -//TREX_PATCH -extern int eal_err_read_from_file_is_error; /** * e1000_init_hw - Initialize hardware * @hw: pointer to the HW structure @@ -643,9 +660,6 @@ extern int eal_err_read_from_file_is_error; **/ s32 e1000_init_hw(struct e1000_hw *hw) { - //TREX_PATCH - eal_err_read_from_file_is_error = 0; - if (hw->mac.ops.init_hw) return hw->mac.ops.init_hw(hw); diff --git a/src/dpdk/drivers/net/e1000/base/e1000_defines.h b/src/dpdk/drivers/net/e1000/base/e1000_defines.h index 69aa1f23..dbc2bbbe 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_defines.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_defines.h @@ -198,6 +198,7 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ #define E1000_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ #define E1000_RCTL_RDMTS_HEX 0x00010000 +#define E1000_RCTL_RDMTS1_HEX E1000_RCTL_RDMTS_HEX #define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ #define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ #define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ @@ -468,6 +469,8 @@ POSSIBILITY OF SUCH DAMAGE. #define ETHERNET_FCS_SIZE 4 #define MAX_JUMBO_FRAME_SIZE 0x3F00 +/* The datasheet maximum supported RX size is 9.5KB (9728 bytes) */ +#define MAX_RX_JUMBO_FRAME_SIZE 0x2600 #define E1000_TX_PTR_GAP 0x1F /* Extended Configuration Control and Size */ @@ -751,6 +754,12 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ #define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ +/* HH Time Sync */ +#define E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */ +#define E1000_TSYNCTXCTL_SYNC_COMP_ERR 0x20000000 /* sync err */ +#define E1000_TSYNCTXCTL_SYNC_COMP 0x40000000 /* sync complete */ +#define E1000_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ + #define E1000_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ #define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ #define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00 diff --git a/src/dpdk/drivers/net/e1000/base/e1000_hw.h b/src/dpdk/drivers/net/e1000/base/e1000_hw.h index e4e4f764..d9de9fc1 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_hw.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_hw.h @@ -136,6 +136,19 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_I218_V2 0x15A1 #define E1000_DEV_ID_PCH_I218_LM3 0x15A2 /* Wildcat Point PCH */ #define E1000_DEV_ID_PCH_I218_V3 0x15A3 /* Wildcat Point PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_LM 0x156F /* Sunrise Point PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_V 0x1570 /* Sunrise Point PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* Sunrise Point-H PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* Sunrise Point-H PCH */ +#define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LEWISBURG PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_LM4 0x15D7 +#define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8 +#define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3 +#define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6 +#define E1000_DEV_ID_PCH_CNP_I219_LM6 0x15BD +#define E1000_DEV_ID_PCH_CNP_I219_V6 0x15BE +#define E1000_DEV_ID_PCH_CNP_I219_LM7 0x15BB +#define E1000_DEV_ID_PCH_CNP_I219_V7 0x15BC #define E1000_DEV_ID_82576 0x10C9 #define E1000_DEV_ID_82576_FIBER 0x10E6 #define E1000_DEV_ID_82576_SERDES 0x10E7 @@ -221,6 +234,8 @@ enum e1000_mac_type { e1000_pchlan, e1000_pch2lan, e1000_pch_lpt, + e1000_pch_spt, + e1000_pch_cnp, e1000_82575, e1000_82576, e1000_82580, @@ -950,11 +965,15 @@ struct e1000_dev_spec_ich8lan { E1000_MUTEX nvm_mutex; E1000_MUTEX swflag_mutex; bool nvm_k1_enabled; + bool disable_k1_off; bool eee_disable; u16 eee_lp_ability; #ifdef ULP_SUPPORT enum e1000_ulp_state ulp_state; -#endif /* NAHUM6LP_HW && ULP_SUPPORT */ + bool ulp_capability_disabled; + bool during_suspend_flow; + bool during_dpg_exit; +#endif /* ULP_SUPPORT */ u16 lat_enc; u16 max_ltr_enc; bool smbus_disable; diff --git a/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c b/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c index 89d07e90..6dd046d2 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c @@ -94,10 +94,13 @@ STATIC s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, bool active); STATIC s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +STATIC s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data); STATIC s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); STATIC s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw); STATIC s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw); +STATIC s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw); STATIC s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data); STATIC s32 e1000_id_led_init_pchlan(struct e1000_hw *hw); @@ -125,6 +128,14 @@ STATIC s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 *data); STATIC s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, u8 size, u16 *data); +STATIC s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, + u32 *data); +STATIC s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, + u32 offset, u32 *data); +STATIC s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, + u32 offset, u32 data); +STATIC s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw, + u32 offset, u32 dword); STATIC s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, u16 *data); STATIC s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, @@ -233,7 +244,7 @@ STATIC bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) if (ret_val) return false; out: - if (hw->mac.type == e1000_pch_lpt) { + if (hw->mac.type >= e1000_pch_lpt) { /* Only unforce SMBus if ME is not active */ if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { @@ -277,7 +288,7 @@ STATIC void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw) mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE; E1000_WRITE_REG(hw, E1000_CTRL, mac_reg); E1000_WRITE_FLUSH(hw); - usec_delay(10); + msec_delay(1); mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE; E1000_WRITE_REG(hw, E1000_CTRL, mac_reg); E1000_WRITE_FLUSH(hw); @@ -334,6 +345,8 @@ STATIC s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) */ switch (hw->mac.type) { case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: if (e1000_phy_is_accessible_pchlan(hw)) break; @@ -481,6 +494,8 @@ STATIC s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) /* fall-through */ case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ @@ -623,36 +638,57 @@ STATIC s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 gfpreg, sector_base_addr, sector_end_addr; u16 i; + u32 nvm_size; DEBUGFUNC("e1000_init_nvm_params_ich8lan"); - /* Can't read flash registers if the register set isn't mapped. */ nvm->type = e1000_nvm_flash_sw; - if (!hw->flash_address) { - DEBUGOUT("ERROR: Flash registers not mapped\n"); - return -E1000_ERR_CONFIG; - } - gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG); + if (hw->mac.type >= e1000_pch_spt) { + /* in SPT, gfpreg doesn't exist. NVM size is taken from the + * STRAP register. This is because in SPT the GbE Flash region + * is no longer accessed through the flash registers. Instead, + * the mechanism has changed, and the Flash region access + * registers are now implemented in GbE memory space. + */ + nvm->flash_base_addr = 0; + nvm_size = + (((E1000_READ_REG(hw, E1000_STRAP) >> 1) & 0x1F) + 1) + * NVM_SIZE_MULTIPLIER; + nvm->flash_bank_size = nvm_size / 2; + /* Adjust to word count */ + nvm->flash_bank_size /= sizeof(u16); + /* Set the base address for flash register access */ + hw->flash_address = hw->hw_addr + E1000_FLASH_BASE_ADDR; + } else { + /* Can't read flash registers if register set isn't mapped. */ + if (!hw->flash_address) { + DEBUGOUT("ERROR: Flash registers not mapped\n"); + return -E1000_ERR_CONFIG; + } - /* sector_X_addr is a "sector"-aligned address (4096 bytes) - * Add 1 to sector_end_addr since this sector is included in - * the overall size. - */ - sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK; - sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1; + gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG); - /* flash_base_addr is byte-aligned */ - nvm->flash_base_addr = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT; + /* sector_X_addr is a "sector"-aligned address (4096 bytes) + * Add 1 to sector_end_addr since this sector is included in + * the overall size. + */ + sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK; + sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1; - /* find total size of the NVM, then cut in half since the total - * size represents two separate NVM banks. - */ - nvm->flash_bank_size = ((sector_end_addr - sector_base_addr) - << FLASH_SECTOR_ADDR_SHIFT); - nvm->flash_bank_size /= 2; - /* Adjust to word count */ - nvm->flash_bank_size /= sizeof(u16); + /* flash_base_addr is byte-aligned */ + nvm->flash_base_addr = sector_base_addr + << FLASH_SECTOR_ADDR_SHIFT; + + /* find total size of the NVM, then cut in half since the total + * size represents two separate NVM banks. + */ + nvm->flash_bank_size = ((sector_end_addr - sector_base_addr) + << FLASH_SECTOR_ADDR_SHIFT); + nvm->flash_bank_size /= 2; + /* Adjust to word count */ + nvm->flash_bank_size /= sizeof(u16); + } nvm->word_size = E1000_SHADOW_RAM_WORDS; @@ -668,8 +704,13 @@ STATIC s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_ich8lan; nvm->ops.release = e1000_release_nvm_ich8lan; - nvm->ops.read = e1000_read_nvm_ich8lan; - nvm->ops.update = e1000_update_nvm_checksum_ich8lan; + if (hw->mac.type >= e1000_pch_spt) { + nvm->ops.read = e1000_read_nvm_spt; + nvm->ops.update = e1000_update_nvm_checksum_spt; + } else { + nvm->ops.read = e1000_read_nvm_ich8lan; + nvm->ops.update = e1000_update_nvm_checksum_ich8lan; + } nvm->ops.valid_led_default = e1000_valid_led_default_ich8lan; nvm->ops.validate = e1000_validate_nvm_checksum_ich8lan; nvm->ops.write = e1000_write_nvm_ich8lan; @@ -758,6 +799,8 @@ STATIC s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) mac->ops.rar_set = e1000_rar_set_pch2lan; /* fall-through */ case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: #ifndef NO_NON_BLOCKING_PHY_MTA_UPDATE_SUPPORT /* multicast address update for pch2 */ mac->ops.update_mc_addr_list = @@ -768,7 +811,13 @@ STATIC s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) #if defined(QV_RELEASE) || !defined(NO_PCH_LPT_B0_SUPPORT) /* save PCH revision_id */ e1000_read_pci_cfg(hw, E1000_PCI_REVISION_ID_REG, &pci_cfg); - hw->revision_id = (u8)(pci_cfg &= 0x000F); + /* SPT uses full byte for revision ID, + * as opposed to previous generations + */ + if (hw->mac.type >= e1000_pch_spt) + hw->revision_id = (u8)(pci_cfg &= 0x00FF); + else + hw->revision_id = (u8)(pci_cfg &= 0x000F); #endif /* QV_RELEASE || !defined(NO_PCH_LPT_B0_SUPPORT) */ /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; @@ -786,7 +835,7 @@ STATIC s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) break; } - if (mac->type == e1000_pch_lpt) { + if (mac->type >= e1000_pch_lpt) { mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES; mac->ops.rar_set = e1000_rar_set_pch_lpt; mac->ops.setup_physical_interface = e1000_setup_copper_link_pch_lpt; @@ -1015,8 +1064,9 @@ release: /* clear FEXTNVM6 bit 8 on link down or 10/100 */ fextnvm6 &= ~E1000_FEXTNVM6_REQ_PLL_CLK; - if (!link || ((status & E1000_STATUS_SPEED_100) && - (status & E1000_STATUS_FD))) + if ((hw->phy.revision > 5) || !link || + ((status & E1000_STATUS_SPEED_100) && + (status & E1000_STATUS_FD))) goto update_fextnvm6; ret_val = hw->phy.ops.read_reg(hw, I217_INBAND_CTRL, ®); @@ -1068,6 +1118,7 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) u32 mac_reg; s32 ret_val = E1000_SUCCESS; u16 phy_reg; + u16 oem_reg = 0; if ((hw->mac.type < e1000_pch_lpt) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_LM) || @@ -1128,6 +1179,25 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); + /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable + * LPLU and disable Gig speed when entering ULP + */ + if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) { + ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS, + &oem_reg); + if (ret_val) + goto release; + + phy_reg = oem_reg; + phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS; + + ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, + phy_reg); + + if (ret_val) + goto release; + } + skip_smbus: if (!to_sx) { /* Change the 'Link Status Change' interrupt to trigger @@ -1184,6 +1254,14 @@ skip_smbus: E1000_WRITE_REG(hw, E1000_TCTL, mac_reg); } + if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) && + to_sx && (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { + ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, + oem_reg); + if (ret_val) + goto release; + } + release: hw->phy.ops.release(hw); out: @@ -1240,10 +1318,10 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); } - /* Poll up to 100msec for ME to clear ULP_CFG_DONE */ + /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */ while (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_ULP_CFG_DONE) { - if (i++ == 10) { + if (i++ == 30) { ret_val = -E1000_ERR_PHY; goto out; } @@ -1343,6 +1421,8 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) I218_ULP_CONFIG1_RESET_TO_SMBUS | I218_ULP_CONFIG1_WOL_HOST | I218_ULP_CONFIG1_INBAND_EXIT | + I218_ULP_CONFIG1_EN_ULP_LANPHYPC | + I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST | I218_ULP_CONFIG1_DISABLE_SMB_PERST); e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); @@ -1360,6 +1440,8 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) if (hw->mac.autoneg) e1000_phy_setup_autoneg(hw); + else + e1000_setup_copper_link_generic(hw); e1000_sw_lcd_config_ich8lan(hw); @@ -1397,6 +1479,8 @@ out: } #endif /* ULP_SUPPORT */ + + /** * e1000_check_for_copper_link_ich8lan - Check for link (Copper) * @hw: pointer to the HW structure @@ -1456,8 +1540,7 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * aggressive resulting in many collisions. To avoid this, increase * the IPG and reduce Rx latency in the PHY. */ - if (((hw->mac.type == e1000_pch2lan) || - (hw->mac.type == e1000_pch_lpt)) && link) { + if ((hw->mac.type >= e1000_pch2lan) && link) { u16 speed, duplex; e1000_get_speed_and_duplex_copper_generic(hw, &speed, &duplex); @@ -1468,6 +1551,10 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) tipg_reg |= 0xFF; /* Reduce Rx latency in analog PHY */ emi_val = 0; + } else if (hw->mac.type >= e1000_pch_spt && + duplex == FULL_DUPLEX && speed != SPEED_1000) { + tipg_reg |= 0xC; + emi_val = 1; } else { /* Roll back the default values */ tipg_reg |= 0x08; @@ -1486,10 +1573,78 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) emi_addr = I217_RX_CONFIG; ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val); + + if (hw->mac.type >= e1000_pch_lpt) { + u16 phy_reg; + + hw->phy.ops.read_reg_locked(hw, I217_PLL_CLOCK_GATE_REG, + &phy_reg); + phy_reg &= ~I217_PLL_CLOCK_GATE_MASK; + if (speed == SPEED_100 || speed == SPEED_10) + phy_reg |= 0x3E8; + else + phy_reg |= 0xFA; + hw->phy.ops.write_reg_locked(hw, + I217_PLL_CLOCK_GATE_REG, + phy_reg); + + if (speed == SPEED_1000) { + hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL, + &phy_reg); + + phy_reg |= HV_PM_CTRL_K1_CLK_REQ; + + hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL, + phy_reg); + } + } hw->phy.ops.release(hw); if (ret_val) return ret_val; + + if (hw->mac.type >= e1000_pch_spt) { + u16 data; + u16 ptr_gap; + + if (speed == SPEED_1000) { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.read_reg_locked(hw, + PHY_REG(776, 20), + &data); + if (ret_val) { + hw->phy.ops.release(hw); + return ret_val; + } + + ptr_gap = (data & (0x3FF << 2)) >> 2; + if (ptr_gap < 0x18) { + data &= ~(0x3FF << 2); + data |= (0x18 << 2); + ret_val = + hw->phy.ops.write_reg_locked(hw, + PHY_REG(776, 20), data); + } + hw->phy.ops.release(hw); + if (ret_val) + return ret_val; + } else { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg_locked(hw, + PHY_REG(776, 20), + 0xC023); + hw->phy.ops.release(hw); + if (ret_val) + return ret_val; + + } + } } /* I217 Packet Loss issue: @@ -1497,7 +1652,7 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * on power up. * Set the Beacon Duration for I217 to 8 usec */ - if (hw->mac.type == e1000_pch_lpt) { + if (hw->mac.type >= e1000_pch_lpt) { u32 mac_reg; mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM4); @@ -1519,10 +1674,29 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) hw->dev_spec.ich8lan.eee_lp_ability = 0; /* Configure K0s minimum time */ - if (hw->mac.type == e1000_pch_lpt) { + if (hw->mac.type >= e1000_pch_lpt) { e1000_configure_k0s_lpt(hw, K1_ENTRY_LATENCY, K1_MIN_TIME); } + if (hw->mac.type >= e1000_pch_lpt) { + u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); + + if (hw->mac.type == e1000_pch_spt) { + /* FEXTNVM6 K1-off workaround - for SPT only */ + u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG); + + if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) + fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE; + else + fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; + } + + if (hw->dev_spec.ich8lan.disable_k1_off == true) + fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; + + E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6); + } + if (!link) return E1000_SUCCESS; /* No link detected */ @@ -1616,6 +1790,8 @@ void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw) case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: hw->phy.ops.init_params = e1000_init_phy_params_pchlan; break; default: @@ -2081,6 +2257,8 @@ STATIC s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; break; default: @@ -3204,6 +3382,41 @@ STATIC s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) DEBUGFUNC("e1000_valid_nvm_bank_detect_ich8lan"); switch (hw->mac.type) { + case e1000_pch_spt: + case e1000_pch_cnp: + bank1_offset = nvm->flash_bank_size; + act_offset = E1000_ICH_NVM_SIG_WORD; + + /* set bank to 0 in case flash read fails */ + *bank = 0; + + /* Check bank 0 */ + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, + &nvm_dword); + if (ret_val) + return ret_val; + sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); + if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == + E1000_ICH_NVM_SIG_VALUE) { + *bank = 0; + return E1000_SUCCESS; + } + + /* Check bank 1 */ + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset + + bank1_offset, + &nvm_dword); + if (ret_val) + return ret_val; + sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); + if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == + E1000_ICH_NVM_SIG_VALUE) { + *bank = 1; + return E1000_SUCCESS; + } + + DEBUGOUT("ERROR: No valid NVM bank present\n"); + return -E1000_ERR_NVM; case e1000_ich8lan: case e1000_ich9lan: eecd = E1000_READ_REG(hw, E1000_EECD); @@ -3250,6 +3463,99 @@ STATIC s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) } } +/** + * e1000_read_nvm_spt - NVM access for SPT + * @hw: pointer to the HW structure + * @offset: The offset (in bytes) of the word(s) to read. + * @words: Size of data to read in words. + * @data: pointer to the word(s) to read at offset. + * + * Reads a word(s) from the NVM + **/ +STATIC s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; + u32 act_offset; + s32 ret_val = E1000_SUCCESS; + u32 bank = 0; + u32 dword = 0; + u16 offset_to_read; + u16 i; + + DEBUGFUNC("e1000_read_nvm_spt"); + + if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + ret_val = -E1000_ERR_NVM; + goto out; + } + + nvm->ops.acquire(hw); + + ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); + if (ret_val != E1000_SUCCESS) { + DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); + bank = 0; + } + + act_offset = (bank) ? nvm->flash_bank_size : 0; + act_offset += offset; + + ret_val = E1000_SUCCESS; + + for (i = 0; i < words; i += 2) { + if (words - i == 1) { + if (dev_spec->shadow_ram[offset+i].modified) { + data[i] = dev_spec->shadow_ram[offset+i].value; + } else { + offset_to_read = act_offset + i - + ((act_offset + i) % 2); + ret_val = + e1000_read_flash_dword_ich8lan(hw, + offset_to_read, + &dword); + if (ret_val) + break; + if ((act_offset + i) % 2 == 0) + data[i] = (u16)(dword & 0xFFFF); + else + data[i] = (u16)((dword >> 16) & 0xFFFF); + } + } else { + offset_to_read = act_offset + i; + if (!(dev_spec->shadow_ram[offset+i].modified) || + !(dev_spec->shadow_ram[offset+i+1].modified)) { + ret_val = + e1000_read_flash_dword_ich8lan(hw, + offset_to_read, + &dword); + if (ret_val) + break; + } + if (dev_spec->shadow_ram[offset+i].modified) + data[i] = dev_spec->shadow_ram[offset+i].value; + else + data[i] = (u16) (dword & 0xFFFF); + if (dev_spec->shadow_ram[offset+i].modified) + data[i+1] = + dev_spec->shadow_ram[offset+i+1].value; + else + data[i+1] = (u16) (dword >> 16 & 0xFFFF); + } + } + + nvm->ops.release(hw); + +out: + if (ret_val) + DEBUGOUT1("NVM read error: %d\n", ret_val); + + return ret_val; +} + /** * e1000_read_nvm_ich8lan - Read word(s) from the NVM * @hw: pointer to the HW structure @@ -3337,7 +3643,11 @@ STATIC s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) /* Clear FCERR and DAEL in hw status by writing 1 */ hsfsts.hsf_status.flcerr = 1; hsfsts.hsf_status.dael = 1; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsfsts.regval & 0xFFFF); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); /* Either we should have a hardware SPI cycle in progress * bit to check against, in order to start a new cycle or @@ -3353,7 +3663,12 @@ STATIC s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) * Begin by setting Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsfsts.regval & 0xFFFF); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, + hsfsts.regval); ret_val = E1000_SUCCESS; } else { s32 i; @@ -3375,8 +3690,12 @@ STATIC s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) * now set the Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, - hsfsts.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsfsts.regval & 0xFFFF); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, + hsfsts.regval); } else { DEBUGOUT("Flash controller busy, cannot get access\n"); } @@ -3401,10 +3720,17 @@ STATIC s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) DEBUGFUNC("e1000_flash_cycle_ich8lan"); /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */ - hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); + if (hw->mac.type >= e1000_pch_spt) + hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; + else + hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcgo = 1; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsflctl.regval << 16); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); /* wait till FDONE bit is set to 1 */ do { @@ -3420,6 +3746,29 @@ STATIC s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) return -E1000_ERR_NVM; } +/** + * e1000_read_flash_dword_ich8lan - Read dword from flash + * @hw: pointer to the HW structure + * @offset: offset to data location + * @data: pointer to the location for storing the data + * + * Reads the flash dword at offset into data. Offset is converted + * to bytes before read. + **/ +STATIC s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset, + u32 *data) +{ + DEBUGFUNC("e1000_read_flash_dword_ich8lan"); + + if (!data) + return -E1000_ERR_NVM; + + /* Must convert word offset into bytes. */ + offset <<= 1; + + return e1000_read_flash_data32_ich8lan(hw, offset, data); +} + /** * e1000_read_flash_word_ich8lan - Read word from flash * @hw: pointer to the HW structure @@ -3457,7 +3806,13 @@ STATIC s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, s32 ret_val; u16 word = 0; - ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); + /* In SPT, only 32 bits access is supported, + * so this function should not be called. + */ + if (hw->mac.type >= e1000_pch_spt) + return -E1000_ERR_NVM; + else + ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); if (ret_val) return ret_val; @@ -3543,6 +3898,83 @@ STATIC s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, return ret_val; } +/** + * e1000_read_flash_data32_ich8lan - Read dword from NVM + * @hw: pointer to the HW structure + * @offset: The offset (in bytes) of the dword to read. + * @data: Pointer to the dword to store the value read. + * + * Reads a byte or word from the NVM using the flash access registers. + **/ +STATIC s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, + u32 *data) +{ + union ich8_hws_flash_status hsfsts; + union ich8_hws_flash_ctrl hsflctl; + u32 flash_linear_addr; + s32 ret_val = -E1000_ERR_NVM; + u8 count = 0; + + DEBUGFUNC("e1000_read_flash_data_ich8lan"); + + if (offset > ICH_FLASH_LINEAR_ADDR_MASK || + hw->mac.type < e1000_pch_spt) + return -E1000_ERR_NVM; + flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + + hw->nvm.flash_base_addr); + + do { + usec_delay(1); + /* Steps */ + ret_val = e1000_flash_cycle_init_ich8lan(hw); + if (ret_val != E1000_SUCCESS) + break; + /* In SPT, This register is in Lan memory space, not flash. + * Therefore, only 32 bit access is supported + */ + hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; + + /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ + hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1; + hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ; + /* In SPT, This register is in Lan memory space, not flash. + * Therefore, only 32 bit access is supported + */ + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + (u32)hsflctl.regval << 16); + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); + + ret_val = e1000_flash_cycle_ich8lan(hw, + ICH_FLASH_READ_COMMAND_TIMEOUT); + + /* Check if FCERR is set to 1, if set to 1, clear it + * and try the whole sequence a few more times, else + * read in (shift in) the Flash Data0, the order is + * least significant byte first msb to lsb + */ + if (ret_val == E1000_SUCCESS) { + *data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0); + break; + } else { + /* If we've gotten here, then things are probably + * completely hosed, but if the error condition is + * detected, it won't hurt to give it another try... + * ICH_FLASH_CYCLE_REPEAT_COUNT times. + */ + hsfsts.regval = E1000_READ_FLASH_REG16(hw, + ICH_FLASH_HSFSTS); + if (hsfsts.hsf_status.flcerr) { + /* Repeat for some time before giving up. */ + continue; + } else if (!hsfsts.hsf_status.flcdone) { + DEBUGOUT("Timeout error - flash cycle did not complete.\n"); + break; + } + } + } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); + + return ret_val; +} /** * e1000_write_nvm_ich8lan - Write word(s) to the NVM @@ -3580,6 +4012,175 @@ STATIC s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, return E1000_SUCCESS; } +/** + * e1000_update_nvm_checksum_spt - Update the checksum for NVM + * @hw: pointer to the HW structure + * + * The NVM checksum is updated by calling the generic update_nvm_checksum, + * which writes the checksum to the shadow ram. The changes in the shadow + * ram are then committed to the EEPROM by processing each bank at a time + * checking for the modified bit and writing only the pending changes. + * After a successful commit, the shadow ram is cleared and is ready for + * future writes. + **/ +STATIC s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; + u32 i, act_offset, new_bank_offset, old_bank_offset, bank; + s32 ret_val; + u32 dword = 0; + + DEBUGFUNC("e1000_update_nvm_checksum_spt"); + + ret_val = e1000_update_nvm_checksum_generic(hw); + if (ret_val) + goto out; + + if (nvm->type != e1000_nvm_flash_sw) + goto out; + + nvm->ops.acquire(hw); + + /* We're writing to the opposite bank so if we're on bank 1, + * write to bank 0 etc. We also need to erase the segment that + * is going to be written + */ + ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); + if (ret_val != E1000_SUCCESS) { + DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); + bank = 0; + } + + if (bank == 0) { + new_bank_offset = nvm->flash_bank_size; + old_bank_offset = 0; + ret_val = e1000_erase_flash_bank_ich8lan(hw, 1); + if (ret_val) + goto release; + } else { + old_bank_offset = nvm->flash_bank_size; + new_bank_offset = 0; + ret_val = e1000_erase_flash_bank_ich8lan(hw, 0); + if (ret_val) + goto release; + } + for (i = 0; i < E1000_SHADOW_RAM_WORDS; i += 2) { + /* Determine whether to write the value stored + * in the other NVM bank or a modified value stored + * in the shadow RAM + */ + ret_val = e1000_read_flash_dword_ich8lan(hw, + i + old_bank_offset, + &dword); + + if (dev_spec->shadow_ram[i].modified) { + dword &= 0xffff0000; + dword |= (dev_spec->shadow_ram[i].value & 0xffff); + } + if (dev_spec->shadow_ram[i + 1].modified) { + dword &= 0x0000ffff; + dword |= ((dev_spec->shadow_ram[i + 1].value & 0xffff) + << 16); + } + if (ret_val) + break; + + /* If the word is 0x13, then make sure the signature bits + * (15:14) are 11b until the commit has completed. + * This will allow us to write 10b which indicates the + * signature is valid. We want to do this after the write + * has completed so that we don't mark the segment valid + * while the write is still in progress + */ + if (i == E1000_ICH_NVM_SIG_WORD - 1) + dword |= E1000_ICH_NVM_SIG_MASK << 16; + + /* Convert offset to bytes. */ + act_offset = (i + new_bank_offset) << 1; + + usec_delay(100); + + /* Write the data to the new bank. Offset in words*/ + act_offset = i + new_bank_offset; + ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, + dword); + if (ret_val) + break; + } + + /* Don't bother writing the segment valid bits if sector + * programming failed. + */ + if (ret_val) { + DEBUGOUT("Flash commit failed.\n"); + goto release; + } + + /* Finally validate the new segment by setting bit 15:14 + * to 10b in word 0x13 , this can be done without an + * erase as well since these bits are 11 to start with + * and we need to change bit 14 to 0b + */ + act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD; + + /*offset in words but we read dword*/ + --act_offset; + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword); + + if (ret_val) + goto release; + + dword &= 0xBFFFFFFF; + ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword); + + if (ret_val) + goto release; + + /* And invalidate the previously valid segment by setting + * its signature word (0x13) high_byte to 0b. This can be + * done without an erase because flash erase sets all bits + * to 1's. We can write 1's to 0's without an erase + */ + act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1; + + /* offset in words but we read dword*/ + act_offset = old_bank_offset + E1000_ICH_NVM_SIG_WORD - 1; + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword); + + if (ret_val) + goto release; + + dword &= 0x00FFFFFF; + ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword); + + if (ret_val) + goto release; + + /* Great! Everything worked, we can now clear the cached entries. */ + for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { + dev_spec->shadow_ram[i].modified = false; + dev_spec->shadow_ram[i].value = 0xFFFF; + } + +release: + nvm->ops.release(hw); + + /* Reload the EEPROM, or else modifications will not appear + * until after the next adapter reset. + */ + if (!ret_val) { + nvm->ops.reload(hw); + msec_delay(10); + } + +out: + if (ret_val) + DEBUGOUT1("NVM update error: %d\n", ret_val); + + return ret_val; +} + /** * e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM * @hw: pointer to the HW structure @@ -3757,6 +4358,8 @@ STATIC s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) */ switch (hw->mac.type) { case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: word = NVM_COMPAT; valid_csum_mask = NVM_COMPAT_VALID_CSUM; break; @@ -3804,8 +4407,13 @@ STATIC s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, DEBUGFUNC("e1000_write_ich8_data"); - if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) - return -E1000_ERR_NVM; + if (hw->mac.type >= e1000_pch_spt) { + if (size != 4 || offset > ICH_FLASH_LINEAR_ADDR_MASK) + return -E1000_ERR_NVM; + } else { + if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) + return -E1000_ERR_NVM; + } flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); @@ -3816,12 +4424,29 @@ STATIC s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val != E1000_SUCCESS) break; - hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); + /* In SPT, This register is in Lan memory space, not + * flash. Therefore, only 32 bit access is supported + */ + if (hw->mac.type >= e1000_pch_spt) + hsflctl.regval = + E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; + else + hsflctl.regval = + E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ hsflctl.hsf_ctrl.fldbcount = size - 1; hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); + /* In SPT, This register is in Lan memory space, + * not flash. Therefore, only 32 bit access is + * supported + */ + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsflctl.regval << 16); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, + hsflctl.regval); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); @@ -3859,6 +4484,94 @@ STATIC s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, return ret_val; } +/** +* e1000_write_flash_data32_ich8lan - Writes 4 bytes to the NVM +* @hw: pointer to the HW structure +* @offset: The offset (in bytes) of the dwords to read. +* @data: The 4 bytes to write to the NVM. +* +* Writes one/two/four bytes to the NVM using the flash access registers. +**/ +STATIC s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, + u32 data) +{ + union ich8_hws_flash_status hsfsts; + union ich8_hws_flash_ctrl hsflctl; + u32 flash_linear_addr; + s32 ret_val; + u8 count = 0; + + DEBUGFUNC("e1000_write_flash_data32_ich8lan"); + + if (hw->mac.type >= e1000_pch_spt) { + if (offset > ICH_FLASH_LINEAR_ADDR_MASK) + return -E1000_ERR_NVM; + } + flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + + hw->nvm.flash_base_addr); + do { + usec_delay(1); + /* Steps */ + ret_val = e1000_flash_cycle_init_ich8lan(hw); + if (ret_val != E1000_SUCCESS) + break; + + /* In SPT, This register is in Lan memory space, not + * flash. Therefore, only 32 bit access is supported + */ + if (hw->mac.type >= e1000_pch_spt) + hsflctl.regval = E1000_READ_FLASH_REG(hw, + ICH_FLASH_HSFSTS) + >> 16; + else + hsflctl.regval = E1000_READ_FLASH_REG16(hw, + ICH_FLASH_HSFCTL); + + hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1; + hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; + + /* In SPT, This register is in Lan memory space, + * not flash. Therefore, only 32 bit access is + * supported + */ + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsflctl.regval << 16); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, + hsflctl.regval); + + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); + + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, data); + + /* check if FCERR is set to 1 , if set to 1, clear it + * and try the whole sequence a few more times else done + */ + ret_val = e1000_flash_cycle_ich8lan(hw, + ICH_FLASH_WRITE_COMMAND_TIMEOUT); + + if (ret_val == E1000_SUCCESS) + break; + + /* If we're here, then things are most likely + * completely hosed, but if the error condition + * is detected, it won't hurt to give it another + * try...ICH_FLASH_CYCLE_REPEAT_COUNT times. + */ + hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); + + if (hsfsts.hsf_status.flcerr) + /* Repeat for some time before giving up. */ + continue; + if (!hsfsts.hsf_status.flcdone) { + DEBUGOUT("Timeout error - flash cycle did not complete.\n"); + break; + } + } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); + + return ret_val; +} /** * e1000_write_flash_byte_ich8lan - Write a single byte to NVM @@ -3878,7 +4591,42 @@ STATIC s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, return e1000_write_flash_data_ich8lan(hw, offset, 1, word); } +/** +* e1000_retry_write_flash_dword_ich8lan - Writes a dword to NVM +* @hw: pointer to the HW structure +* @offset: The offset of the word to write. +* @dword: The dword to write to the NVM. +* +* Writes a single dword to the NVM using the flash access registers. +* Goes through a retry algorithm before giving up. +**/ +STATIC s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw, + u32 offset, u32 dword) +{ + s32 ret_val; + u16 program_retries; + DEBUGFUNC("e1000_retry_write_flash_dword_ich8lan"); + + /* Must convert word offset into bytes. */ + offset <<= 1; + + ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword); + + if (!ret_val) + return ret_val; + for (program_retries = 0; program_retries < 100; program_retries++) { + DEBUGOUT2("Retrying Byte %8.8X at offset %u\n", dword, offset); + usec_delay(100); + ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword); + if (ret_val == E1000_SUCCESS) + break; + } + if (program_retries == 100) + return -E1000_ERR_NVM; + + return E1000_SUCCESS; +} /** * e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM @@ -3988,12 +4736,22 @@ STATIC s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) /* Write a value 11 (block Erase) in Flash * Cycle field in hw flash control */ - hsflctl.regval = - E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); + if (hw->mac.type >= e1000_pch_spt) + hsflctl.regval = + E1000_READ_FLASH_REG(hw, + ICH_FLASH_HSFSTS)>>16; + else + hsflctl.regval = + E1000_READ_FLASH_REG16(hw, + ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, - hsflctl.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsflctl.regval << 16); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, + hsflctl.regval); /* Write the last 24 bits of an index within the * block into Flash Linear address field in Flash @@ -4426,7 +5184,7 @@ STATIC void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) E1000_WRITE_REG(hw, E1000_RFCTL, reg); /* Enable ECC on Lynxpoint */ - if (hw->mac.type == e1000_pch_lpt) { + if (hw->mac.type >= e1000_pch_lpt) { reg = E1000_READ_REG(hw, E1000_PBECCSTS); reg |= E1000_PBECCSTS_ECC_ENABLE; E1000_WRITE_REG(hw, E1000_PBECCSTS, reg); @@ -4858,7 +5616,8 @@ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw) if ((device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) || (device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) || (device_id == E1000_DEV_ID_PCH_I218_LM3) || - (device_id == E1000_DEV_ID_PCH_I218_V3)) { + (device_id == E1000_DEV_ID_PCH_I218_V3) || + (hw->mac.type >= e1000_pch_spt)) { u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); E1000_WRITE_REG(hw, E1000_FEXTNVM6, diff --git a/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h b/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h index 33e77fb8..bc4ed1dd 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h @@ -121,6 +121,18 @@ POSSIBILITY OF SUCH DAMAGE. #if !defined(EXTERNAL_RELEASE) || defined(ULP_SUPPORT) #define E1000_FEXTNVM7_DISABLE_SMB_PERST 0x00000020 #endif /* !EXTERNAL_RELEASE || ULP_SUPPORT */ +#define E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS 0x00000800 +#define E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS 0x00001000 +#define E1000_FEXTNVM11_DISABLE_PB_READ 0x00000200 +#define E1000_FEXTNVM11_DISABLE_MULR_FIX 0x00002000 + +/* bit24: RXDCTL thresholds granularity: 0 - cache lines, 1 - descriptors */ +#define E1000_RXDCTL_THRESH_UNIT_DESC 0x01000000 + +#define NVM_SIZE_MULTIPLIER 4096 /*multiplier for NVMS field*/ +#define E1000_FLASH_BASE_ADDR 0xE000 /*offset of NVM access regs*/ +#define E1000_CTRL_EXT_NVMVS 0x3 /*NVM valid sector */ +#define E1000_TARC0_CB_MULTIQ_3_REQ (1 << 28 | 1 << 29) #define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL #define E1000_ICH_RAR_ENTRIES 7 @@ -198,6 +210,10 @@ POSSIBILITY OF SUCH DAMAGE. #define I218_ULP_CONFIG1_INBAND_EXIT 0x0020 /* Inband on ULP exit */ #define I218_ULP_CONFIG1_WOL_HOST 0x0040 /* WoL Host on ULP exit */ #define I218_ULP_CONFIG1_RESET_TO_SMBUS 0x0100 /* Reset to SMBus mode */ +/* enable ULP even if when phy powered down via lanphypc */ +#define I218_ULP_CONFIG1_EN_ULP_LANPHYPC 0x0400 +/* disable clear of sticky ULP on PERST */ +#define I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST 0x0800 #define I218_ULP_CONFIG1_DISABLE_SMB_PERST 0x1000 /* Disable on PERST# */ #endif /* !EXTERNAL_RELEASE || ULP_SUPPORT */ @@ -234,9 +250,12 @@ POSSIBILITY OF SUCH DAMAGE. /* PHY Power Management Control */ #define HV_PM_CTRL PHY_REG(770, 17) -#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100 +#define HV_PM_CTRL_K1_CLK_REQ 0x200 #define HV_PM_CTRL_K1_ENABLE 0x4000 +#define I217_PLL_CLOCK_GATE_REG PHY_REG(772, 28) +#define I217_PLL_CLOCK_GATE_MASK 0x07FF + #define SW_FLAG_TIMEOUT 1000 /* SW Semaphore flag timeout in ms */ /* Inband Control */ diff --git a/src/dpdk/drivers/net/e1000/base/e1000_mbx.c b/src/dpdk/drivers/net/e1000/base/e1000_mbx.c index 6daf16b0..a92fd22e 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_mbx.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_mbx.c @@ -430,15 +430,21 @@ STATIC s32 e1000_check_for_rst_vf(struct e1000_hw *hw, STATIC s32 e1000_obtain_mbx_lock_vf(struct e1000_hw *hw) { s32 ret_val = -E1000_ERR_MBX; + int count = 10; DEBUGFUNC("e1000_obtain_mbx_lock_vf"); - /* Take ownership of the buffer */ - E1000_WRITE_REG(hw, E1000_V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); + do { + /* Take ownership of the buffer */ + E1000_WRITE_REG(hw, E1000_V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); - /* reserve mailbox for vf use */ - if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) - ret_val = E1000_SUCCESS; + /* reserve mailbox for vf use */ + if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) { + ret_val = E1000_SUCCESS; + break; + } + usec_delay(1000); + } while (count-- > 0); return ret_val; } @@ -645,18 +651,26 @@ STATIC s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) { s32 ret_val = -E1000_ERR_MBX; u32 p2v_mailbox; + int count = 10; DEBUGFUNC("e1000_obtain_mbx_lock_pf"); - /* Take ownership of the buffer */ - E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); + do { + /* Take ownership of the buffer */ + E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), + E1000_P2VMAILBOX_PFU); - /* reserve mailbox for vf use */ - p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number)); - if (p2v_mailbox & E1000_P2VMAILBOX_PFU) - ret_val = E1000_SUCCESS; + /* reserve mailbox for pf use */ + p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number)); + if (p2v_mailbox & E1000_P2VMAILBOX_PFU) { + ret_val = E1000_SUCCESS; + break; + } + usec_delay(1000); + } while (count-- > 0); return ret_val; + } /** diff --git a/src/dpdk/drivers/net/e1000/base/e1000_nvm.c b/src/dpdk/drivers/net/e1000/base/e1000_nvm.c index 762acd16..75c22827 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_nvm.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_nvm.c @@ -1295,6 +1295,7 @@ void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) case e1000_82575: case e1000_82576: case e1000_82580: + case e1000_i354: hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test); /* Use this format, unless EETRACK ID exists, * then use alternate format diff --git a/src/dpdk/drivers/net/e1000/base/e1000_osdep.h b/src/dpdk/drivers/net/e1000/base/e1000_osdep.h index 47a19481..b8868049 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_osdep.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_osdep.h @@ -44,6 +44,7 @@ #include #include #include +#include #include "../e1000_logs.h" @@ -94,17 +95,18 @@ typedef int bool; #define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS) -#define E1000_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define E1000_PCI_REG(reg) rte_read32(reg) -#define E1000_PCI_REG16(reg) (*((volatile uint16_t *)(reg))) +#define E1000_PCI_REG16(reg) rte_read16(reg) -#define E1000_PCI_REG_WRITE(reg, value) do { \ - E1000_PCI_REG((reg)) = (rte_cpu_to_le_32(value)); \ -} while (0) +#define E1000_PCI_REG_WRITE(reg, value) \ + rte_write32((rte_cpu_to_le_32(value)), reg) -#define E1000_PCI_REG_WRITE16(reg, value) do { \ - E1000_PCI_REG16((reg)) = (rte_cpu_to_le_16(value)); \ -} while (0) +#define E1000_PCI_REG_WRITE_RELAXED(reg, value) \ + rte_write32_relaxed((rte_cpu_to_le_32(value)), reg) + +#define E1000_PCI_REG_WRITE16(reg, value) \ + rte_write16((rte_cpu_to_le_16(value)), reg) #define E1000_PCI_REG_ADDR(hw, reg) \ ((volatile uint32_t *)((char *)(hw)->hw_addr + (reg))) diff --git a/src/dpdk/drivers/net/e1000/base/e1000_regs.h b/src/dpdk/drivers/net/e1000/base/e1000_regs.h index 84531a99..364a7261 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_regs.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_regs.h @@ -66,6 +66,8 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_FEXTNVM4 0x00024 /* Future Extended NVM 4 - RW */ #define E1000_FEXTNVM6 0x00010 /* Future Extended NVM 6 - RW */ #define E1000_FEXTNVM7 0x000E4 /* Future Extended NVM 7 - RW */ +#define E1000_FEXTNVM9 0x5BB4 /* Future Extended NVM 9 - RW */ +#define E1000_FEXTNVM11 0x5BBC /* Future Extended NVM 11 - RW */ #define E1000_PCIEANACFG 0x00F18 /* PCIE Analog Config */ #define E1000_FCT 0x00030 /* Flow Control Type - RW */ #define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ @@ -109,6 +111,7 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ #define E1000_PBS 0x01008 /* Packet Buffer Size */ #define E1000_PBECCSTS 0x0100C /* Packet Buffer ECC Status - RW */ +#define E1000_IOSFPC 0x00F28 /* TX corrupted data */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ #define E1000_EEMNGCTL_I210 0x01010 /* i210 MNG EEprom Mode Control */ #define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ @@ -591,6 +594,10 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_TIMADJL 0x0B60C /* Time sync time adjustment offset Low - RW */ #define E1000_TIMADJH 0x0B610 /* Time sync time adjustment offset High - RW */ #define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ +#define E1000_SYSSTMPL 0x0B648 /* HH Timesync system stamp low register */ +#define E1000_SYSSTMPH 0x0B64C /* HH Timesync system stamp hi register */ +#define E1000_PLTSTMPL 0x0B640 /* HH Timesync platform stamp low register */ +#define E1000_PLTSTMPH 0x0B644 /* HH Timesync platform stamp hi register */ #define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ #define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ #define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ diff --git a/src/dpdk/drivers/net/e1000/e1000_ethdev.h b/src/dpdk/drivers/net/e1000/e1000_ethdev.h index 6c25c8da..81a6dbb7 100644 --- a/src/dpdk/drivers/net/e1000/e1000_ethdev.h +++ b/src/dpdk/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -286,6 +291,8 @@ struct e1000_adapter { #define E1000_DEV_PRIVATE_TO_FILTER_INFO(adapter) \ (&((struct e1000_adapter *)adapter)->filter) +#define E1000_DEV_TO_PCI(eth_dev) \ + RTE_DEV_TO_PCI((eth_dev)->device) /* * RX/TX IGB function prototypes */ @@ -315,6 +322,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev); uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +386,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev); uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/src/dpdk/drivers/net/e1000/em_ethdev.c b/src/dpdk/drivers/net/e1000/em_ethdev.c index ad104ed7..d778785d 100644 --- a/src/dpdk/drivers/net/e1000/em_ethdev.c +++ b/src/dpdk/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -83,7 +83,8 @@ static int eth_em_flow_ctrl_set(struct rte_eth_dev *dev, static int eth_em_interrupt_setup(struct rte_eth_dev *dev); static int eth_em_rxq_interrupt_setup(struct rte_eth_dev *dev); static int eth_em_interrupt_get_status(struct rte_eth_dev *dev); -static int eth_em_interrupt_action(struct rte_eth_dev *dev); +static int eth_em_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *handle); static void eth_em_interrupt_handler(struct rte_intr_handle *handle, void *param); @@ -168,6 +169,19 @@ static const struct rte_pci_id pci_id_em_map[] = { { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_I218_V2) }, { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_I218_LM3) }, { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_I218_V3) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM2) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V2) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_LBG_I219_LM3) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM4) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V4) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM5) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V5) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_LM6) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_V6) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_LM7) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_V7) }, { .vendor_id = 0, /* sentinel */ }, }; @@ -278,6 +292,19 @@ eth_em_dev_is_ich8(struct e1000_hw *hw) case E1000_DEV_ID_PCH_I218_LM2: case E1000_DEV_ID_PCH_I218_V3: case E1000_DEV_ID_PCH_I218_LM3: + case E1000_DEV_ID_PCH_SPT_I219_LM: + case E1000_DEV_ID_PCH_SPT_I219_V: + case E1000_DEV_ID_PCH_SPT_I219_LM2: + case E1000_DEV_ID_PCH_SPT_I219_V2: + case E1000_DEV_ID_PCH_LBG_I219_LM3: + case E1000_DEV_ID_PCH_SPT_I219_LM4: + case E1000_DEV_ID_PCH_SPT_I219_V4: + case E1000_DEV_ID_PCH_SPT_I219_LM5: + case E1000_DEV_ID_PCH_SPT_I219_V5: + case E1000_DEV_ID_PCH_CNP_I219_LM6: + case E1000_DEV_ID_PCH_CNP_I219_V6: + case E1000_DEV_ID_PCH_CNP_I219_LM7: + case E1000_DEV_ID_PCH_CNP_I219_V7: return 1; default: return 0; @@ -287,7 +314,8 @@ eth_em_dev_is_ich8(struct e1000_hw *hw) static int eth_em_dev_init(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); struct e1000_hw *hw = @@ -295,11 +323,10 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) struct e1000_vfta * shadow_vfta = E1000_DEV_PRIVATE_TO_VFTA(eth_dev->data->dev_private); - pci_dev = eth_dev->pci_dev; - eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prepare = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -312,6 +339,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) } rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; hw->device_id = pci_dev->id.device_id; @@ -351,8 +379,8 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); - rte_intr_callback_register(&(pci_dev->intr_handle), - eth_em_interrupt_handler, (void *)eth_dev); + rte_intr_callback_register(intr_handle, + eth_em_interrupt_handler, eth_dev); return 0; } @@ -360,17 +388,16 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) static int eth_em_dev_uninit(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; PMD_INIT_FUNC_TRACE(); if (rte_eal_process_type() != RTE_PROC_PRIMARY) return -EPERM; - pci_dev = eth_dev->pci_dev; - if (adapter->stopped == 0) eth_em_close(eth_dev); @@ -382,32 +409,25 @@ eth_em_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->data->mac_addrs = NULL; /* disable uio intr before callback unregister */ - rte_intr_disable(&(pci_dev->intr_handle)); - rte_intr_callback_unregister(&(pci_dev->intr_handle), - eth_em_interrupt_handler, (void *)eth_dev); + rte_intr_disable(intr_handle); + rte_intr_callback_unregister(intr_handle, + eth_em_interrupt_handler, eth_dev); return 0; } static struct eth_driver rte_em_pmd = { .pci_drv = { - .name = "rte_em_pmd", .id_table = pci_id_em_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_em_dev_init, .eth_dev_uninit = eth_em_dev_uninit, .dev_private_size = sizeof(struct e1000_adapter), }; -static int -rte_em_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - rte_eth_driver_register(&rte_em_pmd); - return 0; -} - static int em_hw_init(struct e1000_hw *hw) { @@ -546,6 +566,8 @@ em_set_pba(struct e1000_hw *hw) case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: pba = E1000_PBA_26K; break; default: @@ -562,7 +584,9 @@ eth_em_start(struct rte_eth_dev *dev) E1000_DEV_PRIVATE(dev->data->dev_private); struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = + E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int ret, mask; uint32_t intr_vector = 0; uint32_t *speeds; @@ -615,7 +639,7 @@ eth_em_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (intr_handle->intr_vec == NULL) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } @@ -645,6 +669,7 @@ eth_em_start(struct rte_eth_dev *dev) speeds = &dev->data->dev_conf.link_speeds; if (*speeds == ETH_LINK_SPEED_AUTONEG) { hw->phy.autoneg_advertised = E1000_ALL_SPEED_DUPLEX; + hw->mac.autoneg = 1; } else { num_speeds = 0; autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0; @@ -680,6 +705,17 @@ eth_em_start(struct rte_eth_dev *dev) } if (num_speeds == 0 || (!autoneg && (num_speeds > 1))) goto error_invalid_config; + + /* Set/reset the mac.autoneg based on the link speed, + * fixed or not + */ + if (!autoneg) { + hw->mac.autoneg = 0; + hw->mac.forced_speed_duplex = + hw->phy.autoneg_advertised; + } else { + hw->mac.autoneg = 1; + } } e1000_setup_link(hw); @@ -700,7 +736,7 @@ eth_em_start(struct rte_eth_dev *dev) (void *)dev); if (dev->data->dev_conf.intr_conf.lsc != 0) PMD_INIT_LOG(INFO, "lsc won't enable because of" - " no intr multiplex\n"); + " no intr multiplexn"); } /* check if rxq interrupt is enabled */ if (dev->data->dev_conf.intr_conf.rxq != 0) @@ -732,7 +768,8 @@ eth_em_stop(struct rte_eth_dev *dev) { struct rte_eth_link link; struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; em_rxq_intr_disable(hw); em_lsc_intr_disable(hw); @@ -847,7 +884,9 @@ em_hardware_init(struct e1000_hw *hw) hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; - } else if (hw->mac.type == e1000_pch_lpt) { + } else if (hw->mac.type == e1000_pch_lpt || + hw->mac.type == e1000_pch_spt || + hw->mac.type == e1000_pch_cnp) { hw->fc.requested_mode = e1000_fc_full; } @@ -993,9 +1032,11 @@ static int eth_em_rx_queue_intr_enable(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; em_rxq_intr_enable(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } @@ -1020,6 +1061,8 @@ em_get_max_pktlen(const struct e1000_hw *hw) case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: case e1000_82574: case e1000_80003es2lan: /* 9K Jumbo Frame size */ case e1000_82583: @@ -1039,6 +1082,7 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */ dev_info->max_rx_pktlen = em_get_max_pktlen(hw); dev_info->max_mac_addrs = hw->mac.rar_entry_count; @@ -1073,6 +1117,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | @@ -1530,8 +1576,10 @@ eth_em_interrupt_get_status(struct rte_eth_dev *dev) * - On failure, a negative value. */ static int -eth_em_interrupt_action(struct rte_eth_dev *dev) +eth_em_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *intr_handle) { + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_interrupt *intr = @@ -1544,7 +1592,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev) return -1; intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); /* set get_link_status to check register later */ hw->mac.get_link_status = 1; @@ -1565,8 +1613,8 @@ eth_em_interrupt_action(struct rte_eth_dev *dev) PMD_INIT_LOG(INFO, " Port %d: Link Down", dev->data->port_id); } PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d", - dev->pci_dev->addr.domain, dev->pci_dev->addr.bus, - dev->pci_dev->addr.devid, dev->pci_dev->addr.function); + pci_dev->addr.domain, pci_dev->addr.bus, + pci_dev->addr.devid, pci_dev->addr.function); tctl = E1000_READ_REG(hw, E1000_TCTL); rctl = E1000_READ_REG(hw, E1000_RCTL); @@ -1598,14 +1646,14 @@ eth_em_interrupt_action(struct rte_eth_dev *dev) * void */ static void -eth_em_interrupt_handler(__rte_unused struct rte_intr_handle *handle, - void *param) +eth_em_interrupt_handler(struct rte_intr_handle *handle, + void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; eth_em_interrupt_get_status(dev); - eth_em_interrupt_action(dev); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + eth_em_interrupt_action(dev, handle); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } static int @@ -1799,10 +1847,6 @@ eth_em_set_mc_addr_list(struct rte_eth_dev *dev, return 0; } -struct rte_driver em_pmd_drv = { - .type = PMD_PDEV, - .init = rte_em_pmd_init, -}; - -PMD_REGISTER_DRIVER(em_pmd_drv, em); -DRIVER_REGISTER_PCI_TABLE(em, pci_id_em_map); +RTE_PMD_REGISTER_PCI(net_e1000_em, rte_em_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_e1000_em, pci_id_em_map); +RTE_PMD_REGISTER_KMOD_DEP(net_e1000_em, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/e1000/em_rxtx.c b/src/dpdk/drivers/net/e1000/em_rxtx.c index 6d8750a8..d099d6a2 100644 --- a/src/dpdk/drivers/net/e1000/em_rxtx.c +++ b/src/dpdk/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -67,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -78,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x01000000 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -611,12 +619,49 @@ end_of_tx: PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", (unsigned) txq->port_id, (unsigned) txq->queue_id, (unsigned) tx_id, (unsigned) nb_tx); - E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id); + E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); txq->tx_tail = tx_id; return nb_tx; } +/********************************************************************* + * + * TX prep functions + * + **********************************************************************/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + /********************************************************************* * * RX functions diff --git a/src/dpdk/drivers/net/e1000/igb_ethdev.c b/src/dpdk/drivers/net/e1000/igb_ethdev.c index fbf4d090..d9397744 100644 --- a/src/dpdk/drivers/net/e1000/igb_ethdev.c +++ b/src/dpdk/drivers/net/e1000/igb_ethdev.c @@ -120,6 +120,8 @@ static int eth_igb_xstats_get_names(struct rte_eth_dev *dev, unsigned limit); static void eth_igb_stats_reset(struct rte_eth_dev *dev); static void eth_igb_xstats_reset(struct rte_eth_dev *dev); +static int eth_igb_fw_version_get(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); static void eth_igb_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info); static const uint32_t *eth_igb_supported_ptypes_get(struct rte_eth_dev *dev); @@ -132,7 +134,8 @@ static int eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev); static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev); static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev); -static int eth_igb_interrupt_action(struct rte_eth_dev *dev); +static int eth_igb_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *handle); static void eth_igb_interrupt_handler(struct rte_intr_handle *handle, void *param); static int igb_hardware_init(struct e1000_hw *hw); @@ -306,22 +309,57 @@ static enum e1000_fc_mode igb_fc_setting = e1000_fc_full; * The set of PCI devices this driver supports */ static const struct rte_pci_id pci_id_igb_map[] = { - -#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, -#include "rte_pci_dev_ids.h" - -{0}, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD) }, + + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER) }, + + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER) }, + + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_DA4) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP) }, + { .vendor_id = 0, /* sentinel */ }, }; /* * The set of PCI devices this driver supports (for 82576&I350 VF) */ static const struct rte_pci_id pci_id_igbvf_map[] = { - -#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, -#include "rte_pci_dev_ids.h" - -{0}, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF_HV) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF_HV) }, + { .vendor_id = 0, /* sentinel */ }, }; static const struct rte_eth_desc_lim rx_desc_lim = { @@ -334,6 +372,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = { .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = IGB_RXD_ALIGN, + .nb_seg_max = IGB_TX_MAX_SEG, + .nb_mtu_seg_max = IGB_TX_MAX_MTU_SEG, }; static const struct eth_dev_ops eth_igb_ops = { @@ -353,6 +393,7 @@ static const struct eth_dev_ops eth_igb_ops = { .xstats_get_names = eth_igb_xstats_get_names, .stats_reset = eth_igb_stats_reset, .xstats_reset = eth_igb_xstats_reset, + .fw_version_get = eth_igb_fw_version_get, .dev_infos_get = eth_igb_infos_get, .dev_supported_ptypes_get = eth_igb_supported_ptypes_get, .mtu_set = eth_igb_mtu_set, @@ -633,15 +674,16 @@ igb_pf_reset_hw(struct e1000_hw *hw) } static void -igb_identify_hardware(struct rte_eth_dev *dev) +igb_identify_hardware(struct rte_eth_dev *dev, struct rte_pci_device *pci_dev) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); - hw->vendor_id = dev->pci_dev->id.vendor_id; - hw->device_id = dev->pci_dev->id.device_id; - hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id; - hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id; + + hw->vendor_id = pci_dev->id.vendor_id; + hw->device_id = pci_dev->id.device_id; + hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id; + hw->subsystem_device_id = pci_dev->id.subsystem_device_id; e1000_set_mac_type(hw); @@ -708,7 +750,7 @@ static int eth_igb_dev_init(struct rte_eth_dev *eth_dev) { int error = 0; - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); struct e1000_vfta * shadow_vfta = @@ -720,11 +762,10 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev) uint32_t ctrl_ext; - pci_dev = eth_dev->pci_dev; - eth_dev->dev_ops = ð_igb_ops; eth_dev->rx_pkt_burst = ð_igb_recv_pkts; eth_dev->tx_pkt_burst = ð_igb_xmit_pkts; + eth_dev->tx_pkt_prepare = ð_igb_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -736,10 +777,11 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev) } rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; hw->hw_addr= (void *)pci_dev->mem_resource[0].addr; - igb_identify_hardware(eth_dev); + igb_identify_hardware(eth_dev, pci_dev); if (e1000_setup_init_funcs(hw, FALSE) != E1000_SUCCESS) { error = -EIO; goto err_late; @@ -873,6 +915,7 @@ static int eth_igb_dev_uninit(struct rte_eth_dev *eth_dev) { struct rte_pci_device *pci_dev; + struct rte_intr_handle *intr_handle; struct e1000_hw *hw; struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); @@ -883,7 +926,8 @@ eth_igb_dev_uninit(struct rte_eth_dev *eth_dev) return -EPERM; hw = E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); - pci_dev = eth_dev->pci_dev; + pci_dev = E1000_DEV_TO_PCI(eth_dev); + intr_handle = &pci_dev->intr_handle; if (adapter->stopped == 0) eth_igb_close(eth_dev); @@ -902,9 +946,9 @@ eth_igb_dev_uninit(struct rte_eth_dev *eth_dev) igb_pf_host_uninit(eth_dev); /* disable uio intr before callback unregister */ - rte_intr_disable(&(pci_dev->intr_handle)); - rte_intr_callback_unregister(&(pci_dev->intr_handle), - eth_igb_interrupt_handler, (void *)eth_dev); + rte_intr_disable(intr_handle); + rte_intr_callback_unregister(intr_handle, + eth_igb_interrupt_handler, eth_dev); return 0; } @@ -916,6 +960,7 @@ static int eth_igbvf_dev_init(struct rte_eth_dev *eth_dev) { struct rte_pci_device *pci_dev; + struct rte_intr_handle *intr_handle; struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); struct e1000_hw *hw = @@ -928,6 +973,7 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &igbvf_eth_dev_ops; eth_dev->rx_pkt_burst = ð_igb_recv_pkts; eth_dev->tx_pkt_burst = ð_igb_xmit_pkts; + eth_dev->tx_pkt_prepare = ð_igb_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -938,9 +984,9 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev) return 0; } - pci_dev = eth_dev->pci_dev; - + pci_dev = E1000_DEV_TO_PCI(eth_dev); rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; hw->device_id = pci_dev->id.device_id; hw->vendor_id = pci_dev->id.vendor_id; @@ -1003,9 +1049,9 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev) eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id, "igb_mac_82576_vf"); - rte_intr_callback_register(&pci_dev->intr_handle, - eth_igbvf_interrupt_handler, - (void *)eth_dev); + intr_handle = &pci_dev->intr_handle; + rte_intr_callback_register(intr_handle, + eth_igbvf_interrupt_handler, eth_dev); return 0; } @@ -1015,7 +1061,7 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev) { struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); - struct rte_pci_device *pci_dev = eth_dev->pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); PMD_INIT_FUNC_TRACE(); @@ -1043,10 +1089,10 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev) static struct eth_driver rte_igb_pmd = { .pci_drv = { - .name = "rte_igb_pmd", .id_table = pci_id_igb_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_igb_dev_init, .eth_dev_uninit = eth_igb_dev_uninit, @@ -1058,22 +1104,16 @@ static struct eth_driver rte_igb_pmd = { */ static struct eth_driver rte_igbvf_pmd = { .pci_drv = { - .name = "rte_igbvf_pmd", .id_table = pci_id_igbvf_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_igbvf_dev_init, .eth_dev_uninit = eth_igbvf_dev_uninit, .dev_private_size = sizeof(struct e1000_adapter), }; -static int -rte_igb_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - rte_eth_driver_register(&rte_igb_pmd); - return 0; -} - static void igb_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev) { @@ -1085,20 +1125,6 @@ igb_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev) E1000_WRITE_REG(hw, E1000_RCTL, rctl); } -/* - * VF Driver initialization routine. - * Invoked one at EAL init time. - * Register itself as the [Virtual Poll Mode] Driver of PCI IGB devices. - */ -static int -rte_igbvf_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_igbvf_pmd); - return 0; -} - static int igb_check_mq_mode(struct rte_eth_dev *dev) { @@ -1201,7 +1227,8 @@ eth_igb_start(struct rte_eth_dev *dev) E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_adapter *adapter = E1000_DEV_PRIVATE(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int ret, mask; uint32_t intr_vector = 0; uint32_t ctrl_ext; @@ -1265,7 +1292,7 @@ eth_igb_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (intr_handle->intr_vec == NULL) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -1311,6 +1338,7 @@ eth_igb_start(struct rte_eth_dev *dev) speeds = &dev->data->dev_conf.link_speeds; if (*speeds == ETH_LINK_SPEED_AUTONEG) { hw->phy.autoneg_advertised = E1000_ALL_SPEED_DUPLEX; + hw->mac.autoneg = 1; } else { num_speeds = 0; autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0; @@ -1346,6 +1374,17 @@ eth_igb_start(struct rte_eth_dev *dev) } if (num_speeds == 0 || (!autoneg && (num_speeds > 1))) goto error_invalid_config; + + /* Set/reset the mac.autoneg based on the link speed, + * fixed or not + */ + if (!autoneg) { + hw->mac.autoneg = 0; + hw->mac.forced_speed_duplex = + hw->phy.autoneg_advertised; + } else { + hw->mac.autoneg = 1; + } } e1000_setup_link(hw); @@ -1360,7 +1399,7 @@ eth_igb_start(struct rte_eth_dev *dev) (void *)dev); if (dev->data->dev_conf.intr_conf.lsc != 0) PMD_INIT_LOG(INFO, "lsc won't enable because of" - " no intr multiplex\n"); + " no intr multiplex"); } /* check if rxq interrupt is enabled */ @@ -1397,11 +1436,12 @@ eth_igb_stop(struct rte_eth_dev *dev) struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_filter_info *filter_info = E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); struct rte_eth_link link; struct e1000_flex_filter *p_flex; struct e1000_5tuple_filter *p_5tuple, *p_5tuple_next; struct e1000_2tuple_filter *p_2tuple, *p_2tuple_next; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; igb_intr_disable(hw); @@ -1501,7 +1541,8 @@ eth_igb_close(struct rte_eth_dev *dev) struct e1000_adapter *adapter = E1000_DEV_PRIVATE(dev->data->dev_private); struct rte_eth_link link; - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; eth_igb_stop(dev); adapter->stopped = 1; @@ -1521,10 +1562,9 @@ eth_igb_close(struct rte_eth_dev *dev) igb_dev_free_queues(dev); - pci_dev = dev->pci_dev; - if (pci_dev->intr_handle.intr_vec) { - rte_free(pci_dev->intr_handle.intr_vec); - pci_dev->intr_handle.intr_vec = NULL; + if (intr_handle->intr_vec) { + rte_free(intr_handle->intr_vec); + intr_handle->intr_vec = NULL; } memset(&link, 0, sizeof(link)); @@ -1948,11 +1988,64 @@ eth_igbvf_stats_reset(struct rte_eth_dev *dev) offsetof(struct e1000_vf_stats, gprc)); } +static int +eth_igb_fw_version_get(struct rte_eth_dev *dev, char *fw_version, + size_t fw_size) +{ + struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct e1000_fw_version fw; + int ret; + + e1000_get_fw_version(hw, &fw); + + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + if (!(e1000_get_flash_presence_i210(hw))) { + ret = snprintf(fw_version, fw_size, + "%2d.%2d-%d", + fw.invm_major, fw.invm_minor, + fw.invm_img_type); + break; + } + /* fall through */ + default: + /* if option rom is valid, display its version too */ + if (fw.or_valid) { + ret = snprintf(fw_version, fw_size, + "%d.%d, 0x%08x, %d.%d.%d", + fw.eep_major, fw.eep_minor, fw.etrack_id, + fw.or_major, fw.or_build, fw.or_patch); + /* no option rom */ + } else { + if (fw.etrack_id != 0X0000) { + ret = snprintf(fw_version, fw_size, + "%d.%d, 0x%08x", + fw.eep_major, fw.eep_minor, + fw.etrack_id); + } else { + ret = snprintf(fw_version, fw_size, + "%d.%d.%d", + fw.eep_major, fw.eep_minor, + fw.eep_build); + } + } + break; + } + + ret += 1; /* add the size of '\0' */ + if (fw_size < (u32)ret) + return ret; + else + return 0; +} + static void eth_igb_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */ dev_info->max_rx_pktlen = 0x3FFF; /* See RLPML register. */ dev_info->max_mac_addrs = hw->mac.rar_entry_count; @@ -2081,6 +2174,7 @@ eth_igbvf_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */ dev_info->max_rx_pktlen = 0x3FFF; /* See RLPML register. */ dev_info->max_mac_addrs = hw->mac.rar_entry_count; @@ -2605,12 +2699,14 @@ eth_igb_interrupt_get_status(struct rte_eth_dev *dev) * - On failure, a negative value. */ static int -eth_igb_interrupt_action(struct rte_eth_dev *dev) +eth_igb_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *intr_handle) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_interrupt *intr = E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); uint32_t tctl, rctl; struct rte_eth_link link; int ret; @@ -2621,7 +2717,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev) } igb_intr_enable(dev); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) { intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; @@ -2649,10 +2745,10 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev) } PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d", - dev->pci_dev->addr.domain, - dev->pci_dev->addr.bus, - dev->pci_dev->addr.devid, - dev->pci_dev->addr.function); + pci_dev->addr.domain, + pci_dev->addr.bus, + pci_dev->addr.devid, + pci_dev->addr.function); tctl = E1000_READ_REG(hw, E1000_TCTL); rctl = E1000_READ_REG(hw, E1000_RCTL); if (link.link_status) { @@ -2667,7 +2763,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev) E1000_WRITE_REG(hw, E1000_TCTL, tctl); E1000_WRITE_REG(hw, E1000_RCTL, rctl); E1000_WRITE_FLUSH(hw); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } return 0; @@ -2685,13 +2781,12 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev) * void */ static void -eth_igb_interrupt_handler(__rte_unused struct rte_intr_handle *handle, - void *param) +eth_igb_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; eth_igb_interrupt_get_status(dev); - eth_igb_interrupt_action(dev); + eth_igb_interrupt_action(dev, handle); } static int @@ -2727,11 +2822,11 @@ void igbvf_mbx_process(struct rte_eth_dev *dev) /* PF reset VF event */ if (in_msg == E1000_PF_CONTROL_MSG) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL); } static int -eth_igbvf_interrupt_action(struct rte_eth_dev *dev) +eth_igbvf_interrupt_action(struct rte_eth_dev *dev, struct rte_intr_handle *intr_handle) { struct e1000_interrupt *intr = E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private); @@ -2742,19 +2837,19 @@ eth_igbvf_interrupt_action(struct rte_eth_dev *dev) } igbvf_intr_enable(dev); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } static void -eth_igbvf_interrupt_handler(__rte_unused struct rte_intr_handle *handle, +eth_igbvf_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; eth_igbvf_interrupt_get_status(dev); - eth_igbvf_interrupt_action(dev); + eth_igbvf_interrupt_action(dev, handle); } static int @@ -3027,8 +3122,9 @@ igbvf_dev_start(struct rte_eth_dev *dev) E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_adapter *adapter = E1000_DEV_PRIVATE(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int ret; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; uint32_t intr_vector = 0; PMD_INIT_FUNC_TRACE(); @@ -3063,7 +3159,7 @@ igbvf_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (!intr_handle->intr_vec) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -3082,7 +3178,8 @@ igbvf_dev_start(struct rte_eth_dev *dev) static void igbvf_dev_stop(struct rte_eth_dev *dev) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; PMD_INIT_FUNC_TRACE(); @@ -3281,7 +3378,7 @@ eth_igb_rss_reta_update(struct rte_eth_dev *dev, if (reta_size != ETH_RSS_RETA_SIZE_128) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, ETH_RSS_RETA_SIZE_128); + "(%d)", reta_size, ETH_RSS_RETA_SIZE_128); return -EINVAL; } @@ -3322,7 +3419,7 @@ eth_igb_rss_reta_query(struct rte_eth_dev *dev, if (reta_size != ETH_RSS_RETA_SIZE_128) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, ETH_RSS_RETA_SIZE_128); + "(%d)", reta_size, ETH_RSS_RETA_SIZE_128); return -EINVAL; } @@ -3443,7 +3540,7 @@ eth_igb_syn_filter_handle(struct rte_eth_dev *dev, (struct rte_eth_syn_filter *)arg); break; default: - PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op); + PMD_DRV_LOG(ERR, "unsupported operation %u", filter_op); ret = -EINVAL; break; } @@ -5049,16 +5146,6 @@ eth_igb_set_eeprom(struct rte_eth_dev *dev, return nvm->ops.write(hw, first, length, data); } -static struct rte_driver pmd_igb_drv = { - .type = PMD_PDEV, - .init = rte_igb_pmd_init, -}; - -static struct rte_driver pmd_igbvf_drv = { - .type = PMD_PDEV, - .init = rte_igbvf_pmd_init, -}; - static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { @@ -5077,6 +5164,8 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t mask = 1 << queue_id; uint32_t regval; @@ -5084,7 +5173,7 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); E1000_WRITE_FLUSH(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } @@ -5148,8 +5237,8 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev) uint32_t vec = E1000_MISC_VEC_ID; uint32_t base = E1000_MISC_VEC_ID; uint32_t misc_shift = 0; - - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; /* won't configure msix register if no mapping is done * between intr vector and event fd @@ -5220,7 +5309,9 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev) E1000_WRITE_FLUSH(hw); } -PMD_REGISTER_DRIVER(pmd_igb_drv, igb); -DRIVER_REGISTER_PCI_TABLE(igb, pci_id_igb_map); -PMD_REGISTER_DRIVER(pmd_igbvf_drv, igbvf); -DRIVER_REGISTER_PCI_TABLE(igbvf, pci_id_igbvf_map); +RTE_PMD_REGISTER_PCI(net_e1000_igb, rte_igb_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_e1000_igb, pci_id_igb_map); +RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb, "* igb_uio | uio_pci_generic | vfio"); +RTE_PMD_REGISTER_PCI(net_e1000_igb_vf, rte_igbvf_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_e1000_igb_vf, pci_id_igbvf_map); +RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb_vf, "* igb_uio | vfio"); diff --git a/src/dpdk/drivers/net/e1000/igb_pf.c b/src/dpdk/drivers/net/e1000/igb_pf.c index 5845bc22..67da3c24 100644 --- a/src/dpdk/drivers/net/e1000/igb_pf.c +++ b/src/dpdk/drivers/net/e1000/igb_pf.c @@ -57,7 +57,9 @@ static inline uint16_t dev_num_vf(struct rte_eth_dev *eth_dev) { - return eth_dev->pci_dev->max_vfs; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); + + return pci_dev->max_vfs; } static inline diff --git a/src/dpdk/drivers/net/e1000/igb_rxtx.c b/src/dpdk/drivers/net/e1000/igb_rxtx.c index 9d80a0b3..45f3f249 100644 --- a/src/dpdk/drivers/net/e1000/igb_rxtx.c +++ b/src/dpdk/drivers/net/e1000/igb_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -66,6 +65,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -79,6 +79,9 @@ PKT_TX_L4_MASK | \ PKT_TX_TCP_SEG) +#define IGB_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -606,7 +609,7 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, /* * Set the Transmit Descriptor Tail (TDT). */ - E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id); + E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", (unsigned) txq->port_id, (unsigned) txq->queue_id, (unsigned) tx_id, (unsigned) nb_tx); @@ -615,6 +618,52 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return nb_tx; } +/********************************************************************* + * + * TX prep functions + * + **********************************************************************/ +uint16_t +eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + /* Check some limitations for TSO in hardware */ + if (m->ol_flags & PKT_TX_TCP_SEG) + if ((m->tso_segsz > IGB_TSO_MAX_MSS) || + (m->l2_len + m->l3_len + m->l4_len > + IGB_TSO_MAX_HDRLEN)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + /********************************************************************* * * RX functions @@ -748,7 +797,9 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status) */ static uint64_t error_to_pkt_flags_map[4] = { - 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD, PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD }; return error_to_pkt_flags_map[(rx_status >> @@ -1363,6 +1414,7 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev, igb_reset_tx_queue(txq, dev); dev->tx_pkt_burst = eth_igb_xmit_pkts; + dev->tx_pkt_prepare = ð_igb_prep_pkts; dev->data->tx_queues[queue_idx] = txq; return 0; @@ -1528,7 +1580,7 @@ eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) desc - rxq->nb_rx_desc]); } - return 0; + return desc; } int diff --git a/src/dpdk/drivers/net/enic/base/vnic_dev.c b/src/dpdk/drivers/net/enic/base/vnic_dev.c index dddb1dcd..84e4840a 100644 --- a/src/dpdk/drivers/net/enic/base/vnic_dev.c +++ b/src/dpdk/drivers/net/enic/base/vnic_dev.c @@ -266,32 +266,35 @@ void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring) memset(ring->descs, 0, ring->size); } -int vnic_dev_alloc_desc_ring(__attribute__((unused)) struct vnic_dev *vdev, +int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring, - unsigned int desc_count, unsigned int desc_size, unsigned int socket_id, + unsigned int desc_count, unsigned int desc_size, + __attribute__((unused)) unsigned int socket_id, char *z_name) { - const struct rte_memzone *rz; + void *alloc_addr = NULL; + dma_addr_t alloc_pa = 0; vnic_dev_desc_ring_size(ring, desc_count, desc_size); - - rz = rte_memzone_reserve_aligned(z_name, - ring->size_unaligned, socket_id, - 0, ENIC_ALIGN); - if (!rz) { + alloc_addr = vdev->alloc_consistent(vdev->priv, + ring->size_unaligned, + &alloc_pa, (u8 *)z_name); + if (!alloc_addr) { pr_err("Failed to allocate ring (size=%d), aborting\n", (int)ring->size); return -ENOMEM; } - - ring->descs_unaligned = rz->addr; - if (!ring->descs_unaligned) { + ring->descs_unaligned = alloc_addr; + if (!alloc_pa) { pr_err("Failed to map allocated ring (size=%d), aborting\n", (int)ring->size); + vdev->free_consistent(vdev->priv, + ring->size_unaligned, + alloc_addr, + alloc_pa); return -ENOMEM; } - - ring->base_addr_unaligned = (dma_addr_t)rz->phys_addr; + ring->base_addr_unaligned = alloc_pa; ring->base_addr = VNIC_ALIGN(ring->base_addr_unaligned, ring->base_align); @@ -308,8 +311,13 @@ int vnic_dev_alloc_desc_ring(__attribute__((unused)) struct vnic_dev *vdev, void vnic_dev_free_desc_ring(__attribute__((unused)) struct vnic_dev *vdev, struct vnic_dev_ring *ring) { - if (ring->descs) + if (ring->descs) { + vdev->free_consistent(vdev->priv, + ring->size_unaligned, + ring->descs_unaligned, + ring->base_addr_unaligned); ring->descs = NULL; + } } static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, @@ -668,7 +676,6 @@ int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0); err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait); - if (err) pr_err("Can't set packet filter\n"); diff --git a/src/dpdk/drivers/net/enic/base/vnic_rq.c b/src/dpdk/drivers/net/enic/base/vnic_rq.c index 0e700a12..10a40c1b 100644 --- a/src/dpdk/drivers/net/enic/base/vnic_rq.c +++ b/src/dpdk/drivers/net/enic/base/vnic_rq.c @@ -87,9 +87,11 @@ void vnic_rq_init_start(struct vnic_rq *rq, unsigned int cq_index, iowrite32(0, &rq->ctrl->error_status); iowrite32(fetch_index, &rq->ctrl->fetch_index); iowrite32(posted_index, &rq->ctrl->posted_index); - if (rq->is_sop) - iowrite32(((rq->is_sop << 10) | rq->data_queue_idx), + if (rq->data_queue_enable) + iowrite32(((1 << 10) | rq->data_queue_idx), &rq->ctrl->data_ring); + else + iowrite32(0, &rq->ctrl->data_ring); } void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, diff --git a/src/dpdk/drivers/net/enic/base/vnic_rq.h b/src/dpdk/drivers/net/enic/base/vnic_rq.h index fd9e1704..f3fd39f7 100644 --- a/src/dpdk/drivers/net/enic/base/vnic_rq.h +++ b/src/dpdk/drivers/net/enic/base/vnic_rq.h @@ -91,11 +91,13 @@ struct vnic_rq { uint16_t rxst_idx; uint32_t tot_pkts; uint16_t data_queue_idx; + uint8_t data_queue_enable; uint8_t is_sop; uint8_t in_use; struct rte_mbuf *pkt_first_seg; struct rte_mbuf *pkt_last_seg; unsigned int max_mbufs_per_pkt; + uint16_t tot_nb_desc; }; static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq) diff --git a/src/dpdk/drivers/net/enic/enic.h b/src/dpdk/drivers/net/enic/enic.h index 9117cc76..a4540178 100644 --- a/src/dpdk/drivers/net/enic/enic.h +++ b/src/dpdk/drivers/net/enic/enic.h @@ -60,6 +60,7 @@ #define ENIC_RQ_MAX 16 #define ENIC_CQ_MAX (ENIC_WQ_MAX + (ENIC_RQ_MAX / 2)) #define ENIC_INTR_MAX (ENIC_CQ_MAX + 2) +#define ENIC_MAX_MAC_ADDR 64 #define VLAN_ETH_HLEN 18 @@ -97,13 +98,11 @@ struct enic_fdir { void (*copy_fltr_fn)(struct filter_v2 *filt, struct rte_eth_fdir_input *input, struct rte_eth_fdir_masks *masks); - }; struct enic_soft_stats { rte_atomic64_t rx_nombuf; rte_atomic64_t rx_packet_errors; - rte_atomic64_t tx_oversized; }; struct enic_memzone_entry { @@ -168,17 +167,32 @@ struct enic { /* linked list storing memory allocations */ LIST_HEAD(enic_memzone_list, enic_memzone_entry) memzone_list; rte_spinlock_t memzone_list_lock; + rte_spinlock_t mtu_lock; }; -static inline unsigned int enic_sop_rq(unsigned int rq) +/* Get the CQ index from a Start of Packet(SOP) RQ index */ +static inline unsigned int enic_sop_rq_idx_to_cq_idx(unsigned int sop_idx) { - return rq * 2; + return sop_idx / 2; } -static inline unsigned int enic_data_rq(unsigned int rq) +/* Get the RTE RQ index from a Start of Packet(SOP) RQ index */ +static inline unsigned int enic_sop_rq_idx_to_rte_idx(unsigned int sop_idx) { - return rq * 2 + 1; + return sop_idx / 2; +} + +/* Get the Start of Packet(SOP) RQ index from a RTE RQ index */ +static inline unsigned int enic_rte_rq_idx_to_sop_idx(unsigned int rte_idx) +{ + return rte_idx * 2; +} + +/* Get the Data RQ index from a RTE RQ index */ +static inline unsigned int enic_rte_rq_idx_to_data_idx(unsigned int rte_idx) +{ + return rte_idx * 2 + 1; } static inline unsigned int enic_vnic_rq_count(struct enic *enic) @@ -252,7 +266,7 @@ extern int enic_stop_rq(struct enic *enic, uint16_t queue_idx); extern void enic_free_rq(void *rxq); extern int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, unsigned int socket_id, struct rte_mempool *mp, - uint16_t nb_desc); + uint16_t nb_desc, uint16_t free_thresh); extern int enic_set_rss_nic_cfg(struct enic *enic); extern int enic_set_vnic_res(struct enic *enic); extern void enic_set_hdr_split_size(struct enic *enic, u16 split_hdr_size); @@ -264,8 +278,8 @@ extern void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats); extern void enic_dev_stats_clear(struct enic *enic); extern void enic_add_packet_filter(struct enic *enic); -extern void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr); -extern void enic_del_mac_address(struct enic *enic); +void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr); +void enic_del_mac_address(struct enic *enic, int mac_index); extern unsigned int enic_cleanup_wq(struct enic *enic, struct vnic_wq *wq); extern void enic_send_pkt(struct enic *enic, struct vnic_wq *wq, struct rte_mbuf *tx_pkt, unsigned short len, @@ -278,14 +292,17 @@ extern int enic_clsf_init(struct enic *enic); extern void enic_clsf_destroy(struct enic *enic); uint16_t enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t enic_dummy_recv_pkts(__rte_unused void *rx_queue, + __rte_unused struct rte_mbuf **rx_pkts, + __rte_unused uint16_t nb_pkts); uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); int enic_set_mtu(struct enic *enic, uint16_t new_mtu); +int enic_link_update(struct enic *enic); void enic_fdir_info(struct enic *enic); void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *stats); void copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, - struct rte_eth_fdir_masks *masks); -void copy_fltr_v2(__rte_unused struct filter_v2 *fltr, - __rte_unused struct rte_eth_fdir_input *input, __rte_unused struct rte_eth_fdir_masks *masks); +void copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, + struct rte_eth_fdir_masks *masks); #endif /* _ENIC_H_ */ diff --git a/src/dpdk/drivers/net/enic/enic_clsf.c b/src/dpdk/drivers/net/enic/enic_clsf.c index 1610951d..bcf479ac 100644 --- a/src/dpdk/drivers/net/enic/enic_clsf.c +++ b/src/dpdk/drivers/net/enic/enic_clsf.c @@ -74,7 +74,7 @@ void enic_fdir_stats_get(struct enic *enic, struct rte_eth_fdir_stats *stats) void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *info) { - info->mode = enic->fdir.modes; + info->mode = (enum rte_fdir_mode)enic->fdir.modes; info->flow_types_mask[0] = enic->fdir.types_mask; } @@ -107,7 +107,6 @@ enic_set_layer(struct filter_generic_1 *gp, unsigned int flag, memcpy(gp->layer[layer].val, val, len); } - /* Copy Flow Director filter to a VIC ipv4 filter (for Cisco VICs * without advanced filter support. */ @@ -133,28 +132,6 @@ copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, fltr->u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; } -#define TREX_PATCH -#ifdef TREX_PATCH -void -copy_fltr_recv_all(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, - struct rte_eth_fdir_masks *masks) { - struct filter_generic_1 *gp = &fltr->u.generic_1; - memset(gp, 0, sizeof(*gp)); - - struct ether_hdr eth_mask, eth_val; - memset(ð_mask, 0, sizeof(eth_mask)); - memset(ð_val, 0, sizeof(eth_val)); - - eth_val.ether_type = 0xdead; - eth_mask.ether_type = 0; - - gp->position = 0; - enic_set_layer(gp, 0, FILTER_GENERIC_1_L2, - ð_mask, ð_val, sizeof(struct ether_hdr)); - -} -#endif - /* Copy Flow Director filter to a VIC generic filter (requires advanced * filter support. */ @@ -165,15 +142,8 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, struct filter_generic_1 *gp = &fltr->u.generic_1; int i; - RTE_ASSERT(enic->adv_filters); - fltr->type = FILTER_DPDK_1; memset(gp, 0, sizeof(*gp)); -#ifdef TREX_PATCH - // important for this to be below 2. - // If added with position 2, IPv6 UDP and ICMP seems to be caught by some other rule - gp->position = 1; -#endif if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_UDP) { struct udp_hdr udp_mask, udp_val; @@ -185,7 +155,7 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, udp_val.src_port = input->flow.udp4_flow.src_port; } if (input->flow.udp4_flow.dst_port) { - udp_mask.src_port = masks->dst_port_mask; + udp_mask.dst_port = masks->dst_port_mask; udp_val.dst_port = input->flow.udp4_flow.dst_port; } @@ -241,13 +211,9 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, memset(&ip4_val, 0, sizeof(struct ipv4_hdr)); if (input->flow.ip4_flow.tos) { - ip4_mask.type_of_service = masks->ipv4_mask.tos; + ip4_mask.type_of_service = 0xff; ip4_val.type_of_service = input->flow.ip4_flow.tos; } - if (input->flow.ip4_flow.ip_id) { - ip4_mask.packet_id = 0xffff; - ip4_val.packet_id = input->flow.ip4_flow.ip_id; - } if (input->flow.ip4_flow.ttl) { ip4_mask.time_to_live = 0xff; ip4_val.time_to_live = input->flow.ip4_flow.ttl; @@ -333,7 +299,7 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, memset(&ipv6_val, 0, sizeof(struct ipv6_hdr)); if (input->flow.ipv6_flow.proto) { - ipv6_mask.proto = masks->ipv6_mask.proto; + ipv6_mask.proto = 0xff; ipv6_val.proto = input->flow.ipv6_flow.proto; } for (i = 0; i < 4; i++) { @@ -349,8 +315,8 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, input->flow.ipv6_flow.dst_ip[i]; } if (input->flow.ipv6_flow.tc) { - ipv6_mask.vtc_flow = ((uint32_t)masks->ipv6_mask.tc<<12); - ipv6_val.vtc_flow = input->flow.ipv6_flow.tc << 12; + ipv6_mask.vtc_flow = 0x00ff0000; + ipv6_val.vtc_flow = input->flow.ipv6_flow.tc << 16; } if (input->flow.ipv6_flow.hop_limits) { ipv6_mask.hop_limits = 0xff; @@ -372,11 +338,7 @@ int enic_fdir_del_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) case -EINVAL: case -ENOENT: enic->fdir.stats.f_remove++; -#ifdef TREX_PATCH - return pos; -#else return -EINVAL; -#endif default: /* The entry is present in the table */ key = enic->fdir.nodes[pos]; @@ -420,7 +382,7 @@ int enic_fdir_add_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) } /* Get the enicpmd RQ from the DPDK Rx queue */ - queue = enic_sop_rq(params->action.rx_queue); + queue = enic_rte_rq_idx_to_sop_idx(params->action.rx_queue); if (!enic->rq[queue].in_use) return -EINVAL; @@ -487,18 +449,8 @@ int enic_fdir_add_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) key->filter = *params; key->rq_index = queue; -#ifdef TREX_PATCH - switch (params->soft_id) { - case 100: - copy_fltr_recv_all(&fltr, ¶ms->input, &enic->rte_dev->data->dev_conf.fdir_conf.mask); - break; - default: -#endif - enic->fdir.copy_fltr_fn(&fltr, ¶ms->input, - &enic->rte_dev->data->dev_conf.fdir_conf.mask); -#ifdef TREX_PATCH - } -#endif + enic->fdir.copy_fltr_fn(&fltr, ¶ms->input, + &enic->rte_dev->data->dev_conf.fdir_conf.mask); if (!vnic_dev_classifier(enic->vdev, CLSF_ADD, &queue, &fltr)) { key->fltr_id = queue; diff --git a/src/dpdk/drivers/net/enic/enic_compat.h b/src/dpdk/drivers/net/enic/enic_compat.h index 5dbd983b..fc58bb41 100644 --- a/src/dpdk/drivers/net/enic/enic_compat.h +++ b/src/dpdk/drivers/net/enic/enic_compat.h @@ -41,6 +41,7 @@ #include #include #include +#include #define ENIC_PAGE_ALIGN 4096UL #define ENIC_ALIGN ENIC_PAGE_ALIGN @@ -95,42 +96,52 @@ typedef unsigned long long dma_addr_t; static inline uint32_t ioread32(volatile void *addr) { - return *(volatile uint32_t *)addr; + return rte_read32(addr); } static inline uint16_t ioread16(volatile void *addr) { - return *(volatile uint16_t *)addr; + return rte_read16(addr); } static inline uint8_t ioread8(volatile void *addr) { - return *(volatile uint8_t *)addr; + return rte_read8(addr); } static inline void iowrite32(uint32_t val, volatile void *addr) { - *(volatile uint32_t *)addr = val; + rte_write32(val, addr); +} + +static inline void iowrite32_relaxed(uint32_t val, volatile void *addr) +{ + rte_write32_relaxed(val, addr); } static inline void iowrite16(uint16_t val, volatile void *addr) { - *(volatile uint16_t *)addr = val; + rte_write16(val, addr); } static inline void iowrite8(uint8_t val, volatile void *addr) { - *(volatile uint8_t *)addr = val; + rte_write8(val, addr); } static inline unsigned int readl(volatile void __iomem *addr) { - return *(volatile unsigned int *)addr; + return rte_read32(addr); +} + +static inline unsigned int readl_relaxed(volatile void __iomem *addr) +{ + return rte_read32_relaxed(addr); } static inline void writel(unsigned int val, volatile void __iomem *addr) { - *(volatile unsigned int *)addr = val; + rte_write32(val, addr); } #define min_t(type, x, y) ({ \ diff --git a/src/dpdk/drivers/net/enic/enic_ethdev.c b/src/dpdk/drivers/net/enic/enic_ethdev.c index 6a86e23f..bffa8700 100644 --- a/src/dpdk/drivers/net/enic/enic_ethdev.c +++ b/src/dpdk/drivers/net/enic/enic_ethdev.c @@ -154,7 +154,7 @@ static int enicpmd_dev_setup_intr(struct enic *enic) return 0; /* check start of packet (SOP) RQs only in case scatter is disabled. */ for (index = 0; index < enic->rq_count; index++) { - if (!enic->rq[enic_sop_rq(index)].ctrl) + if (!enic->rq[enic_rte_rq_idx_to_sop_idx(index)].ctrl) break; } if (enic->rq_count != index) @@ -262,6 +262,35 @@ static void enicpmd_dev_rx_queue_release(void *rxq) enic_free_rq(rxq); } +static uint32_t enicpmd_dev_rx_queue_count(struct rte_eth_dev *dev, + uint16_t rx_queue_id) +{ + struct enic *enic = pmd_priv(dev); + uint32_t queue_count = 0; + struct vnic_cq *cq; + uint32_t cq_tail; + uint16_t cq_idx; + int rq_num; + + if (rx_queue_id >= dev->data->nb_rx_queues) { + dev_err(enic, "Invalid RX queue id=%d", rx_queue_id); + return 0; + } + + rq_num = enic_rte_rq_idx_to_sop_idx(rx_queue_id); + cq = &enic->cq[enic_cq_rq(enic, rq_num)]; + cq_idx = cq->to_clean; + + cq_tail = ioread32(&cq->ctrl->cq_tail); + + if (cq_tail < cq_idx) + cq_tail += cq->ring.desc_count; + + queue_count = cq_tail - cq_idx; + + return queue_count; +} + static int enicpmd_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t queue_idx, uint16_t nb_desc, @@ -284,18 +313,15 @@ static int enicpmd_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, } eth_dev->data->rx_queues[queue_idx] = - (void *)&enic->rq[enic_sop_rq(queue_idx)]; + (void *)&enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; - ret = enic_alloc_rq(enic, queue_idx, socket_id, mp, nb_desc); + ret = enic_alloc_rq(enic, queue_idx, socket_id, mp, nb_desc, + rx_conf->rx_free_thresh); if (ret) { dev_err(enic, "error in allocating rq\n"); return ret; } - enic->rq[queue_idx].rx_free_thresh = rx_conf->rx_free_thresh; - dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx, - enic->rq[queue_idx].rx_free_thresh); - return enicpmd_dev_setup_intr(enic); } @@ -405,17 +431,9 @@ static int enicpmd_dev_link_update(struct rte_eth_dev *eth_dev, __rte_unused int wait_to_complete) { struct enic *enic = pmd_priv(eth_dev); - int ret; - int link_status = 0; ENICPMD_FUNC_TRACE(); - link_status = enic_get_link_status(enic); - ret = (link_status == enic->link_status); - enic->link_status = link_status; - eth_dev->data->dev_link.link_status = link_status; - eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; - eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev); - return ret; + return enic_link_update(enic); } static void enicpmd_dev_stats_get(struct rte_eth_dev *eth_dev, @@ -435,22 +453,19 @@ static void enicpmd_dev_stats_reset(struct rte_eth_dev *eth_dev) enic_dev_stats_clear(enic); } - - - static void enicpmd_dev_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *device_info) { struct enic *enic = pmd_priv(eth_dev); ENICPMD_FUNC_TRACE(); + device_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device); /* Scattered Rx uses two receive queues per rx queue exposed to dpdk */ device_info->max_rx_queues = enic->conf_rq_count / 2; device_info->max_tx_queues = enic->conf_wq_count; device_info->min_rx_bufsize = ENIC_MIN_MTU; - device_info->max_rx_pktlen = enic->rte_dev->data->mtu - + ETHER_HDR_LEN + 4; - device_info->max_mac_addrs = 1; + device_info->max_rx_pktlen = enic->max_mtu + ETHER_HDR_LEN + 4; + device_info->max_mac_addrs = ENIC_MAX_MAC_ADDR; device_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP | DEV_RX_OFFLOAD_IPV4_CKSUM | @@ -460,17 +475,18 @@ static void enicpmd_dev_info_get(struct rte_eth_dev *eth_dev, DEV_TX_OFFLOAD_VLAN_INSERT | DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM | - DEV_TX_OFFLOAD_TCP_CKSUM; + DEV_TX_OFFLOAD_TCP_CKSUM | + DEV_TX_OFFLOAD_TCP_TSO; device_info->default_rxconf = (struct rte_eth_rxconf) { .rx_free_thresh = ENIC_DEFAULT_RX_FREE_THRESH }; - - device_info->speed_capa = ETH_LINK_SPEED_40G; } static const uint32_t *enicpmd_dev_supported_ptypes_get(struct rte_eth_dev *dev) { static const uint32_t ptypes[] = { + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER_VLAN, RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, RTE_PTYPE_L4_TCP, @@ -531,12 +547,12 @@ static void enicpmd_add_mac_addr(struct rte_eth_dev *eth_dev, enic_set_mac_address(enic, mac_addr->addr_bytes); } -static void enicpmd_remove_mac_addr(struct rte_eth_dev *eth_dev, __rte_unused uint32_t index) +static void enicpmd_remove_mac_addr(struct rte_eth_dev *eth_dev, uint32_t index) { struct enic *enic = pmd_priv(eth_dev); ENICPMD_FUNC_TRACE(); - enic_del_mac_address(enic); + enic_del_mac_address(enic, index); } static int enicpmd_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu) @@ -575,7 +591,7 @@ static const struct eth_dev_ops enicpmd_eth_dev_ops = { .tx_queue_stop = enicpmd_dev_tx_queue_stop, .rx_queue_setup = enicpmd_dev_rx_queue_setup, .rx_queue_release = enicpmd_dev_rx_queue_release, - .rx_queue_count = NULL, + .rx_queue_count = enicpmd_dev_rx_queue_count, .rx_descriptor_done = NULL, .tx_queue_setup = enicpmd_dev_tx_queue_setup, .tx_queue_release = enicpmd_dev_tx_queue_release, @@ -607,7 +623,7 @@ static int eth_enicpmd_dev_init(struct rte_eth_dev *eth_dev) eth_dev->rx_pkt_burst = &enic_recv_pkts; eth_dev->tx_pkt_burst = &enic_xmit_pkts; - pdev = eth_dev->pci_dev; + pdev = RTE_DEV_TO_PCI(eth_dev->device); rte_eth_copy_pci_info(eth_dev, pdev); enic->pdev = pdev; addr = &pdev->addr; @@ -620,32 +636,15 @@ static int eth_enicpmd_dev_init(struct rte_eth_dev *eth_dev) static struct eth_driver rte_enic_pmd = { .pci_drv = { - .name = "rte_enic_pmd", .id_table = pci_id_enic_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_enicpmd_dev_init, .dev_private_size = sizeof(struct enic), }; -/* Driver initialization routine. - * Invoked once at EAL init time. - * Register as the [Poll Mode] Driver of Cisco ENIC device. - */ -static int -rte_enic_pmd_init(__rte_unused const char *name, - __rte_unused const char *params) -{ - ENICPMD_FUNC_TRACE(); - - rte_eth_driver_register(&rte_enic_pmd); - return 0; -} - -static struct rte_driver rte_enic_driver = { - .type = PMD_PDEV, - .init = rte_enic_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_enic_driver, enic); -DRIVER_REGISTER_PCI_TABLE(enic, pci_id_enic_map); +RTE_PMD_REGISTER_PCI(net_enic, rte_enic_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_enic, pci_id_enic_map); +RTE_PMD_REGISTER_KMOD_DEP(net_enic, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/enic/enic_main.c b/src/dpdk/drivers/net/enic/enic_main.c index 4530dcf4..21e8edeb 100644 --- a/src/dpdk/drivers/net/enic/enic_main.c +++ b/src/dpdk/drivers/net/enic/enic_main.c @@ -137,7 +137,6 @@ static void enic_clear_soft_stats(struct enic *enic) struct enic_soft_stats *soft_stats = &enic->soft_stats; rte_atomic64_clear(&soft_stats->rx_nombuf); rte_atomic64_clear(&soft_stats->rx_packet_errors); - rte_atomic64_clear(&soft_stats->tx_oversized); } static void enic_init_soft_stats(struct enic *enic) @@ -145,7 +144,6 @@ static void enic_init_soft_stats(struct enic *enic) struct enic_soft_stats *soft_stats = &enic->soft_stats; rte_atomic64_init(&soft_stats->rx_nombuf); rte_atomic64_init(&soft_stats->rx_packet_errors); - rte_atomic64_init(&soft_stats->tx_oversized); enic_clear_soft_stats(enic); } @@ -168,7 +166,6 @@ void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats) return; } - /* The number of truncated packets can only be calculated by * subtracting a hardware counter from error packets received by * the driver. Note: this causes transient inaccuracies in the @@ -177,26 +174,28 @@ void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats) * which can make ibytes be slightly higher than it should be. */ rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors); - rx_truncated = rx_packet_errors - stats->rx.rx_errors - - stats->rx.rx_no_bufs; + rx_truncated = rx_packet_errors - stats->rx.rx_errors; r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated; r_stats->opackets = stats->tx.tx_frames_ok; - r_stats->ibytes = stats->rx.rx_unicast_bytes_ok+stats->rx.rx_multicast_bytes_ok+stats->rx.rx_broadcast_bytes_ok; + r_stats->ibytes = stats->rx.rx_bytes_ok; r_stats->obytes = stats->tx.tx_bytes_ok; r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop; - r_stats->oerrors = stats->tx.tx_errors + rte_atomic64_read(&soft_stats->tx_oversized); + r_stats->oerrors = stats->tx.tx_errors; r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated; r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf); } -void enic_del_mac_address(struct enic *enic) +void enic_del_mac_address(struct enic *enic, int mac_index) { - if (vnic_dev_del_addr(enic->vdev, enic->mac_addr)) + struct rte_eth_dev *eth_dev = enic->rte_dev; + uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes; + + if (vnic_dev_del_addr(enic->vdev, mac_addr)) dev_err(enic, "del mac addr failed\n"); } @@ -209,15 +208,6 @@ void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr) return; } - err = vnic_dev_del_addr(enic->vdev, enic->mac_addr); - if (err) { - dev_err(enic, "del mac addr failed\n"); - return; - } - - ether_addr_copy((struct ether_addr *)mac_addr, - (struct ether_addr *)enic->mac_addr); - err = vnic_dev_add_addr(enic->vdev, mac_addr); if (err) { dev_err(enic, "add mac addr failed\n"); @@ -244,14 +234,14 @@ void enic_init_vnic_resources(struct enic *enic) struct vnic_rq *data_rq; for (index = 0; index < enic->rq_count; index++) { - cq_idx = enic_cq_rq(enic, enic_sop_rq(index)); + cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index)); - vnic_rq_init(&enic->rq[enic_sop_rq(index)], + vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)], cq_idx, error_interrupt_enable, error_interrupt_offset); - data_rq = &enic->rq[enic_data_rq(index)]; + data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)]; if (data_rq->in_use) vnic_rq_init(data_rq, cq_idx, @@ -414,14 +404,32 @@ enic_free_consistent(void *priv, rte_free(mze); } +int enic_link_update(struct enic *enic) +{ + struct rte_eth_dev *eth_dev = enic->rte_dev; + int ret; + int link_status = 0; + + link_status = enic_get_link_status(enic); + ret = (link_status == enic->link_status); + enic->link_status = link_status; + eth_dev->data->dev_link.link_status = link_status; + eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; + eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev); + return ret; +} + static void enic_intr_handler(__rte_unused struct rte_intr_handle *handle, void *arg) { - struct enic *enic = pmd_priv((struct rte_eth_dev *)arg); + struct rte_eth_dev *dev = (struct rte_eth_dev *)arg; + struct enic *enic = pmd_priv(dev); vnic_intr_return_all_credits(&enic->intr); + enic_link_update(enic); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); enic_log_q_error(enic); } @@ -433,7 +441,13 @@ int enic_enable(struct enic *enic) eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev); eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; - vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */ + + /* vnic notification of link status has already been turned on in + * enic_dev_init() which is called during probe time. Here we are + * just turning on interrupt vector 0 if needed. + */ + if (eth_dev->data->dev_conf.intr_conf.lsc) + vnic_dev_notify_set(enic->vdev, 0); if (enic_clsf_init(enic)) dev_warning(enic, "Init of hash table for clsf failed."\ @@ -441,17 +455,17 @@ int enic_enable(struct enic *enic) for (index = 0; index < enic->rq_count; index++) { err = enic_alloc_rx_queue_mbufs(enic, - &enic->rq[enic_sop_rq(index)]); + &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]); if (err) { dev_err(enic, "Failed to alloc sop RX queue mbufs\n"); return err; } err = enic_alloc_rx_queue_mbufs(enic, - &enic->rq[enic_data_rq(index)]); + &enic->rq[enic_rte_rq_idx_to_data_idx(index)]); if (err) { /* release the allocated mbufs for the sop rq*/ enic_rxmbuf_queue_release(enic, - &enic->rq[enic_sop_rq(index)]); + &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]); dev_err(enic, "Failed to alloc data RX queue mbufs\n"); return err; @@ -520,7 +534,10 @@ void enic_free_rq(void *rxq) if (rq_data->in_use) vnic_rq_free(rq_data); - vnic_cq_free(&enic->cq[rq_sop->index]); + vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]); + + rq_sop->in_use = 0; + rq_data->in_use = 0; } void enic_start_wq(struct enic *enic, uint16_t queue_idx) @@ -545,8 +562,10 @@ int enic_stop_wq(struct enic *enic, uint16_t queue_idx) void enic_start_rq(struct enic *enic, uint16_t queue_idx) { - struct vnic_rq *rq_sop = &enic->rq[enic_sop_rq(queue_idx)]; - struct vnic_rq *rq_data = &enic->rq[rq_sop->data_queue_idx]; + struct vnic_rq *rq_sop; + struct vnic_rq *rq_data; + rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; + rq_data = &enic->rq[rq_sop->data_queue_idx]; struct rte_eth_dev *eth_dev = enic->rte_dev; if (rq_data->in_use) @@ -560,8 +579,10 @@ int enic_stop_rq(struct enic *enic, uint16_t queue_idx) { int ret1 = 0, ret2 = 0; struct rte_eth_dev *eth_dev = enic->rte_dev; - struct vnic_rq *rq_sop = &enic->rq[enic_sop_rq(queue_idx)]; - struct vnic_rq *rq_data = &enic->rq[rq_sop->data_queue_idx]; + struct vnic_rq *rq_sop; + struct vnic_rq *rq_data; + rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; + rq_data = &enic->rq[rq_sop->data_queue_idx]; ret2 = vnic_rq_disable(rq_sop); rte_mb(); @@ -579,16 +600,17 @@ int enic_stop_rq(struct enic *enic, uint16_t queue_idx) int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, unsigned int socket_id, struct rte_mempool *mp, - uint16_t nb_desc) + uint16_t nb_desc, uint16_t free_thresh) { int rc; - uint16_t sop_queue_idx = enic_sop_rq(queue_idx); - uint16_t data_queue_idx = enic_data_rq(queue_idx); + uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx); + uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx); struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx]; struct vnic_rq *rq_data = &enic->rq[data_queue_idx]; unsigned int mbuf_size, mbufs_per_pkt; unsigned int nb_sop_desc, nb_data_desc; uint16_t min_sop, max_sop, min_data, max_data; + uint16_t mtu = enic->rte_dev->data->mtu; rq_sop->is_sop = 1; rq_sop->data_queue_idx = data_queue_idx; @@ -599,14 +621,18 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, rq_data->socket_id = socket_id; rq_data->mp = mp; rq_sop->in_use = 1; + rq_sop->rx_free_thresh = free_thresh; + rq_data->rx_free_thresh = free_thresh; + dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx, + free_thresh); mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM); if (enic->rte_dev->data->dev_conf.rxmode.enable_scatter) { - dev_info(enic, "Scatter rx mode enabled\n"); + dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx); /* ceil((mtu + ETHER_HDR_LEN + 4)/mbuf_size) */ - mbufs_per_pkt = ((enic->config.mtu + ETHER_HDR_LEN + 4) + + mbufs_per_pkt = ((mtu + ETHER_HDR_LEN + 4) + (mbuf_size - 1)) / mbuf_size; } else { dev_info(enic, "Scatter rx mode disabled\n"); @@ -614,10 +640,13 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, } if (mbufs_per_pkt > 1) { - dev_info(enic, "Scatter rx mode in use\n"); + dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx); + rq_sop->data_queue_enable = 1; rq_data->in_use = 1; } else { - dev_info(enic, "Scatter rx mode not being used\n"); + dev_info(enic, "Rq %u Scatter rx mode not being used\n", + queue_idx); + rq_sop->data_queue_enable = 0; rq_data->in_use = 0; } @@ -654,7 +683,7 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, } if (mbufs_per_pkt > 1) { dev_info(enic, "For mtu %d and mbuf size %d valid rx descriptor range is %d to %d\n", - enic->config.mtu, mbuf_size, min_sop + min_data, + mtu, mbuf_size, min_sop + min_data, max_sop + max_data); } dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n", @@ -705,6 +734,8 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, goto err_free_sop_mbuf; } + rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */ + return 0; err_free_sop_mbuf: @@ -801,6 +832,10 @@ int enic_disable(struct enic *enic) vnic_intr_mask(&enic->intr); (void)vnic_intr_masked(&enic->intr); /* flush write */ + rte_intr_disable(&enic->pdev->intr_handle); + rte_intr_callback_unregister(&enic->pdev->intr_handle, + enic_intr_handler, + (void *)enic->rte_dev); vnic_dev_disable(enic->vdev); @@ -822,8 +857,14 @@ int enic_disable(struct enic *enic) } } + /* If we were using interrupts, set the interrupt vector to -1 + * to disable interrupts. We are not disabling link notifcations, + * though, as we want the polling of link status to continue working. + */ + if (enic->rte_dev->data->dev_conf.intr_conf.lsc) + vnic_dev_notify_set(enic->vdev, -1); + vnic_dev_set_reset_flag(enic->vdev, 1); - vnic_dev_notify_unset(enic->vdev); for (i = 0; i < enic->wq_count; i++) vnic_wq_clean(&enic->wq[i], enic_free_wq_buf); @@ -925,7 +966,7 @@ static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) for (i = 0; i < (1 << rss_hash_bits); i++) (*rss_cpu_buf_va).cpu[i / 4].b[i % 4] = - enic_sop_rq(i % enic->rq_count); + enic_rte_rq_idx_to_sop_idx(i % enic->rq_count); err = enic_set_rss_cpu(enic, rss_cpu_buf_pa, @@ -1025,6 +1066,9 @@ static void enic_dev_deinit(struct enic *enic) { struct rte_eth_dev *eth_dev = enic->rte_dev; + /* stop link status checking */ + vnic_dev_notify_unset(enic->vdev); + rte_free(eth_dev->data->mac_addrs); } @@ -1066,6 +1110,56 @@ int enic_set_vnic_res(struct enic *enic) return rc; } +/* Initialize the completion queue for an RQ */ +static int +enic_reinit_rq(struct enic *enic, unsigned int rq_idx) +{ + struct vnic_rq *sop_rq, *data_rq; + unsigned int cq_idx = enic_cq_rq(enic, rq_idx); + int rc = 0; + + sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; + data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)]; + + vnic_cq_clean(&enic->cq[cq_idx]); + vnic_cq_init(&enic->cq[cq_idx], + 0 /* flow_control_enable */, + 1 /* color_enable */, + 0 /* cq_head */, + 0 /* cq_tail */, + 1 /* cq_tail_color */, + 0 /* interrupt_enable */, + 1 /* cq_entry_enable */, + 0 /* cq_message_enable */, + 0 /* interrupt offset */, + 0 /* cq_message_addr */); + + + vnic_rq_init_start(sop_rq, enic_cq_rq(enic, + enic_rte_rq_idx_to_sop_idx(rq_idx)), 0, + sop_rq->ring.desc_count - 1, 1, 0); + if (data_rq->in_use) { + vnic_rq_init_start(data_rq, + enic_cq_rq(enic, + enic_rte_rq_idx_to_data_idx(rq_idx)), 0, + data_rq->ring.desc_count - 1, 1, 0); + } + + rc = enic_alloc_rx_queue_mbufs(enic, sop_rq); + if (rc) + return rc; + + if (data_rq->in_use) { + rc = enic_alloc_rx_queue_mbufs(enic, data_rq); + if (rc) { + enic_rxmbuf_queue_release(enic, sop_rq); + return rc; + } + } + + return 0; +} + /* The Cisco NIC can send and receive packets up to a max packet size * determined by the NIC type and firmware. There is also an MTU * configured into the NIC via the CIMC/UCSM management interface @@ -1075,6 +1169,9 @@ int enic_set_vnic_res(struct enic *enic) */ int enic_set_mtu(struct enic *enic, uint16_t new_mtu) { + unsigned int rq_idx; + struct vnic_rq *rq; + int rc = 0; uint16_t old_mtu; /* previous setting */ uint16_t config_mtu; /* Value configured into NIC via CIMC/UCSM */ struct rte_eth_dev *eth_dev = enic->rte_dev; @@ -1082,10 +1179,6 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu) old_mtu = eth_dev->data->mtu; config_mtu = enic->config.mtu; - /* only works with Rx scatter disabled */ - if (enic->rte_dev->data->dev_conf.rxmode.enable_scatter) - return -ENOTSUP; - if (new_mtu > enic->max_mtu) { dev_err(enic, "MTU not updated: requested (%u) greater than max (%u)\n", @@ -1103,11 +1196,83 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu) "MTU (%u) is greater than value configured in NIC (%u)\n", new_mtu, config_mtu); + /* The easy case is when scatter is disabled. However if the MTU + * becomes greater than the mbuf data size, packet drops will ensue. + */ + if (!enic->rte_dev->data->dev_conf.rxmode.enable_scatter) { + eth_dev->data->mtu = new_mtu; + goto set_mtu_done; + } + + /* Rx scatter is enabled so reconfigure RQ's on the fly. The point is to + * change Rx scatter mode if necessary for better performance. I.e. if + * MTU was greater than the mbuf size and now it's less, scatter Rx + * doesn't have to be used and vice versa. + */ + rte_spinlock_lock(&enic->mtu_lock); + + /* Stop traffic on all RQs */ + for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) { + rq = &enic->rq[rq_idx]; + if (rq->is_sop && rq->in_use) { + rc = enic_stop_rq(enic, + enic_sop_rq_idx_to_rte_idx(rq_idx)); + if (rc) { + dev_err(enic, "Failed to stop Rq %u\n", rq_idx); + goto set_mtu_done; + } + } + } + + /* replace Rx funciton with a no-op to avoid getting stale pkts */ + eth_dev->rx_pkt_burst = enic_dummy_recv_pkts; + rte_mb(); + + /* Allow time for threads to exit the real Rx function. */ + usleep(100000); + + /* now it is safe to reconfigure the RQs */ + /* update the mtu */ eth_dev->data->mtu = new_mtu; + /* free and reallocate RQs with the new MTU */ + for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) { + rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; + + enic_free_rq(rq); + rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp, + rq->tot_nb_desc, rq->rx_free_thresh); + if (rc) { + dev_err(enic, + "Fatal MTU alloc error- No traffic will pass\n"); + goto set_mtu_done; + } + + rc = enic_reinit_rq(enic, rq_idx); + if (rc) { + dev_err(enic, + "Fatal MTU RQ reinit- No traffic will pass\n"); + goto set_mtu_done; + } + } + + /* put back the real receive function */ + rte_mb(); + eth_dev->rx_pkt_burst = enic_recv_pkts; + rte_mb(); + + /* restart Rx traffic */ + for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) { + rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; + if (rq->is_sop && rq->in_use) + enic_start_rq(enic, rq_idx); + } + +set_mtu_done: dev_info(enic, "MTU changed from %u to %u\n", old_mtu, new_mtu); - return 0; + rte_spinlock_unlock(&enic->mtu_lock); + return rc; } static int enic_dev_init(struct enic *enic) @@ -1125,24 +1290,32 @@ static int enic_dev_init(struct enic *enic) return err; } + /* Get available resource counts */ + enic_get_res_counts(enic); + if (enic->conf_rq_count == 1) { + dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n"); + dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n"); + dev_err(enic, "See the ENIC PMD guide for more information.\n"); + return -EINVAL; + } + /* Get the supported filters */ enic_fdir_info(enic); - eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN, 0); + eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN + * ENIC_MAX_MAC_ADDR, 0); if (!eth_dev->data->mac_addrs) { dev_err(enic, "mac addr storage alloc failed, aborting.\n"); return -1; } ether_addr_copy((struct ether_addr *) enic->mac_addr, - ð_dev->data->mac_addrs[0]); - - - /* Get available resource counts - */ - enic_get_res_counts(enic); + eth_dev->data->mac_addrs); vnic_dev_set_reset_flag(enic->vdev, 0); + /* set up link status checking */ + vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */ + return 0; } diff --git a/src/dpdk/drivers/net/enic/enic_res.c b/src/dpdk/drivers/net/enic/enic_res.c index 140c6efb..8a230a16 100644 --- a/src/dpdk/drivers/net/enic/enic_res.c +++ b/src/dpdk/drivers/net/enic/enic_res.c @@ -89,11 +89,10 @@ int enic_get_vnic_config(struct enic *enic) /* max packet size is only defined in newer VIC firmware * and will be 0 for legacy firmware and VICs */ - if (c->max_pkt_size > ENIC_DEFAULT_RX_MAX_PKT_SIZE) + if (c->max_pkt_size > ENIC_DEFAULT_MAX_PKT_SIZE) enic->max_mtu = c->max_pkt_size - (ETHER_HDR_LEN + 4); else - enic->max_mtu = ENIC_DEFAULT_RX_MAX_PKT_SIZE - - (ETHER_HDR_LEN + 4); + enic->max_mtu = ENIC_DEFAULT_MAX_PKT_SIZE - (ETHER_HDR_LEN + 4); if (c->mtu == 0) c->mtu = 1500; diff --git a/src/dpdk/drivers/net/enic/enic_res.h b/src/dpdk/drivers/net/enic/enic_res.h index cda2da1e..303530ef 100644 --- a/src/dpdk/drivers/net/enic/enic_res.h +++ b/src/dpdk/drivers/net/enic/enic_res.h @@ -48,11 +48,7 @@ #define ENIC_MIN_MTU 68 /* Does not include (possible) inserted VLAN tag and FCS */ -#define ENIC_DEFAULT_RX_MAX_PKT_SIZE 9022 - -/* Does not include (possible) inserted VLAN tag and FCS */ -#define ENIC_TX_MAX_PKT_SIZE 9208 - +#define ENIC_DEFAULT_MAX_PKT_SIZE 9022 #define ENIC_MULTICAST_PERFECT_FILTERS 32 #define ENIC_UNICAST_PERFECT_FILTERS 32 diff --git a/src/dpdk/drivers/net/enic/enic_rxtx.c b/src/dpdk/drivers/net/enic/enic_rxtx.c index cd155024..26b83ae9 100644 --- a/src/dpdk/drivers/net/enic/enic_rxtx.c +++ b/src/dpdk/drivers/net/enic/enic_rxtx.c @@ -37,6 +37,9 @@ #include "enic_compat.h" #include "rq_enet_desc.h" #include "enic.h" +#include +#include +#include #define RTE_PMD_USE_PREFETCH @@ -129,6 +132,60 @@ enic_cq_rx_desc_n_bytes(struct cq_desc *cqd) CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; } +/* Find the offset to L5. This is needed by enic TSO implementation. + * Return 0 if not a TCP packet or can't figure out the length. + */ +static inline uint8_t tso_header_len(struct rte_mbuf *mbuf) +{ + struct ether_hdr *eh; + struct vlan_hdr *vh; + struct ipv4_hdr *ip4; + struct ipv6_hdr *ip6; + struct tcp_hdr *th; + uint8_t hdr_len; + uint16_t ether_type; + + /* offset past Ethernet header */ + eh = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + ether_type = eh->ether_type; + hdr_len = sizeof(struct ether_hdr); + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + vh = rte_pktmbuf_mtod_offset(mbuf, struct vlan_hdr *, hdr_len); + ether_type = vh->eth_proto; + hdr_len += sizeof(struct vlan_hdr); + } + + /* offset past IP header */ + switch (rte_be_to_cpu_16(ether_type)) { + case ETHER_TYPE_IPv4: + ip4 = rte_pktmbuf_mtod_offset(mbuf, struct ipv4_hdr *, hdr_len); + if (ip4->next_proto_id != IPPROTO_TCP) + return 0; + hdr_len += (ip4->version_ihl & 0xf) * 4; + break; + case ETHER_TYPE_IPv6: + ip6 = rte_pktmbuf_mtod_offset(mbuf, struct ipv6_hdr *, hdr_len); + if (ip6->proto != IPPROTO_TCP) + return 0; + hdr_len += sizeof(struct ipv6_hdr); + break; + default: + return 0; + } + + if ((hdr_len + sizeof(struct tcp_hdr)) > mbuf->pkt_len) + return 0; + + /* offset past TCP header */ + th = rte_pktmbuf_mtod_offset(mbuf, struct tcp_hdr *, hdr_len); + hdr_len += (th->data_off >> 4) * 4; + + if (hdr_len > mbuf->pkt_len) + return 0; + + return hdr_len; +} + static inline uint8_t enic_cq_rx_check_err(struct cq_desc *cqd) { @@ -149,30 +206,18 @@ enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd) uint8_t cqrd_flags = cqrd->flags; static const uint32_t cq_type_table[128] __rte_cache_aligned = { [0x00] = RTE_PTYPE_UNKNOWN, - [0x20] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_NONFRAG, - [0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_UDP, - [0x24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_TCP, - [0x60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_FRAG, - [0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_UDP, - [0x64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_TCP, - [0x10] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_NONFRAG, - [0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_UDP, - [0x14] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_TCP, - [0x50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_FRAG, - [0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_UDP, - [0x54] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_TCP, + [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, + [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, + [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, /* All others reserved */ }; cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT @@ -185,9 +230,10 @@ static inline void enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) { struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; - uint16_t ciflags, bwflags, pkt_flags = 0; + uint16_t ciflags, bwflags, pkt_flags = 0, vlan_tci; ciflags = enic_cq_rx_desc_ciflags(cqrd); bwflags = enic_cq_rx_desc_bwflags(cqrd); + vlan_tci = enic_cq_rx_desc_vlan(cqrd); mbuf->ol_flags = 0; @@ -195,13 +241,17 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) if (unlikely(!enic_cq_rx_desc_eop(ciflags))) goto mbuf_flags_done; - /* VLAN stripping */ + /* VLAN STRIPPED flag. The L2 packet type updated here also */ if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) { pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; - mbuf->vlan_tci = enic_cq_rx_desc_vlan(cqrd); + mbuf->packet_type |= RTE_PTYPE_L2_ETHER; } else { - mbuf->vlan_tci = 0; + if (vlan_tci != 0) + mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN; + else + mbuf->packet_type |= RTE_PTYPE_L2_ETHER; } + mbuf->vlan_tci = vlan_tci; /* RSS flag */ if (enic_cq_rx_desc_rss_type(cqrd)) { @@ -210,13 +260,25 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) } /* checksum flags */ - if (!enic_cq_rx_desc_csum_not_calc(cqrd) && - (mbuf->packet_type & RTE_PTYPE_L3_IPV4)) { - if (unlikely(!enic_cq_rx_desc_ipv4_csum_ok(cqrd))) - pkt_flags |= PKT_RX_IP_CKSUM_BAD; - if (mbuf->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)) { - if (unlikely(!enic_cq_rx_desc_tcp_udp_csum_ok(cqrd))) - pkt_flags |= PKT_RX_L4_CKSUM_BAD; + if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) { + if (enic_cq_rx_desc_csum_not_calc(cqrd)) + pkt_flags |= (PKT_RX_IP_CKSUM_UNKNOWN & + PKT_RX_L4_CKSUM_UNKNOWN); + else { + uint32_t l4_flags; + l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK; + + if (enic_cq_rx_desc_ipv4_csum_ok(cqrd)) + pkt_flags |= PKT_RX_IP_CKSUM_GOOD; + else + pkt_flags |= PKT_RX_IP_CKSUM_BAD; + + if (l4_flags & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)) { + if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd)) + pkt_flags |= PKT_RX_L4_CKSUM_GOOD; + else + pkt_flags |= PKT_RX_L4_CKSUM_BAD; + } } } @@ -224,6 +286,17 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) mbuf->ol_flags = pkt_flags; } +/* dummy receive function to replace actual function in + * order to do safe reconfiguration operations. + */ +uint16_t +enic_dummy_recv_pkts(__rte_unused void *rx_queue, + __rte_unused struct rte_mbuf **rx_pkts, + __rte_unused uint16_t nb_pkts) +{ + return 0; +} + uint16_t enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) @@ -373,10 +446,11 @@ enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, rte_mb(); if (data_rq->in_use) - iowrite32(data_rq->posted_index, - &data_rq->ctrl->posted_index); + iowrite32_relaxed(data_rq->posted_index, + &data_rq->ctrl->posted_index); rte_compiler_barrier(); - iowrite32(sop_rq->posted_index, &sop_rq->ctrl->posted_index); + iowrite32_relaxed(sop_rq->posted_index, + &sop_rq->ctrl->posted_index); } @@ -459,6 +533,8 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint8_t vlan_tag_insert; uint8_t eop; uint64_t bus_addr; + uint8_t offload_mode; + uint16_t header_len; enic_cleanup_wq(enic, wq); wq_desc_avail = vnic_wq_desc_avail(wq); @@ -470,23 +546,16 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, for (index = 0; index < nb_pkts; index++) { tx_pkt = *tx_pkts++; - pkt_len = tx_pkt->pkt_len; - data_len = tx_pkt->data_len; - ol_flags = tx_pkt->ol_flags; nb_segs = tx_pkt->nb_segs; - - if (pkt_len > ENIC_TX_MAX_PKT_SIZE) { - rte_pktmbuf_free(tx_pkt); - rte_atomic64_inc(&enic->soft_stats.tx_oversized); - continue; - } - if (nb_segs > wq_desc_avail) { if (index > 0) goto post; goto done; } + pkt_len = tx_pkt->pkt_len; + data_len = tx_pkt->data_len; + ol_flags = tx_pkt->ol_flags; mss = 0; vlan_id = 0; vlan_tag_insert = 0; @@ -497,13 +566,17 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, desc_p = descs + head_idx; eop = (data_len == pkt_len); - - if (ol_flags & ol_flags_mask) { - if (ol_flags & PKT_TX_VLAN_PKT) { - vlan_tag_insert = 1; - vlan_id = tx_pkt->vlan_tci; + offload_mode = WQ_ENET_OFFLOAD_MODE_CSUM; + header_len = 0; + + if (tx_pkt->tso_segsz) { + header_len = tso_header_len(tx_pkt); + if (header_len) { + offload_mode = WQ_ENET_OFFLOAD_MODE_TSO; + mss = tx_pkt->tso_segsz; } - + } + if ((ol_flags & ol_flags_mask) && (header_len == 0)) { if (ol_flags & PKT_TX_IP_CKSUM) mss |= ENIC_CALC_IP_CKSUM; @@ -516,8 +589,14 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, } } - wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop, - eop, 0, vlan_tag_insert, vlan_id, 0); + if (ol_flags & PKT_TX_VLAN_PKT) { + vlan_tag_insert = 1; + vlan_id = tx_pkt->vlan_tci; + } + + wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, header_len, + offload_mode, eop, eop, 0, vlan_tag_insert, + vlan_id, 0); *desc_p = desc_tmp; buf = &wq->bufs[head_idx]; @@ -537,8 +616,9 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + tx_pkt->data_off); wq_enet_desc_enc((struct wq_enet_desc *) &desc_tmp, bus_addr, data_len, - mss, 0, 0, eop, eop, 0, - vlan_tag_insert, vlan_id, 0); + mss, 0, offload_mode, eop, eop, + 0, vlan_tag_insert, vlan_id, + 0); *desc_p = desc_tmp; buf = &wq->bufs[head_idx]; @@ -550,7 +630,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, } post: rte_wmb(); - iowrite32(head_idx, &wq->ctrl->posted_index); + iowrite32_relaxed(head_idx, &wq->ctrl->posted_index); done: wq->ring.desc_avail = wq_desc_avail; wq->head_idx = head_idx; diff --git a/src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h b/src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h index a21daa2a..f07b678f 100644 --- a/src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h +++ b/src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h @@ -39,6 +39,8 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include + #include "../fm10k_logs.h" /* TODO: this does not look like it should be used... */ @@ -88,17 +90,16 @@ typedef int bool; #endif /* offsets are WORD offsets, not BYTE offsets */ -#define FM10K_WRITE_REG(hw, reg, val) \ - ((((volatile uint32_t *)(hw)->hw_addr)[(reg)]) = ((uint32_t)(val))) -#define FM10K_READ_REG(hw, reg) \ - (((volatile uint32_t *)(hw)->hw_addr)[(reg)]) +#define FM10K_WRITE_REG(hw, reg, val) \ + rte_write32((val), ((hw)->hw_addr + (reg))) + +#define FM10K_READ_REG(hw, reg) rte_read32(((hw)->hw_addr + (reg))) + #define FM10K_WRITE_FLUSH(a) FM10K_READ_REG(a, FM10K_CTRL) -#define FM10K_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define FM10K_PCI_REG(reg) rte_read32(reg) -#define FM10K_PCI_REG_WRITE(reg, value) do { \ - FM10K_PCI_REG((reg)) = (value); \ -} while (0) +#define FM10K_PCI_REG_WRITE(reg, value) rte_write32((value), (reg)) /* not implemented */ #define FM10K_READ_PCI_WORD(hw, reg) 0 diff --git a/src/dpdk/drivers/net/fm10k/fm10k.h b/src/dpdk/drivers/net/fm10k/fm10k.h index 05aa1a25..c6fed215 100644 --- a/src/dpdk/drivers/net/fm10k/fm10k.h +++ b/src/dpdk/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset); uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/src/dpdk/drivers/net/fm10k/fm10k_ethdev.c b/src/dpdk/drivers/net/fm10k/fm10k_ethdev.c index 01f4a72c..dd021e46 100644 --- a/src/dpdk/drivers/net/fm10k/fm10k_ethdev.c +++ b/src/dpdk/drivers/net/fm10k/fm10k_ethdev.c @@ -52,12 +52,14 @@ #define MAX_QUERY_SWITCH_STATE_TIMES 10 /* Wait interval to get switch status */ #define WAIT_SWITCH_MSG_US 100000 +/* A period of quiescence for switch */ +#define FM10K_SWITCH_QUIESCE_US 10000 /* Number of chars per uint32 type */ #define CHARS_PER_UINT32 (sizeof(uint32_t)) #define BIT_MASK_PER_UINT32 ((1 << CHARS_PER_UINT32) - 1) /* default 1:1 map from queue ID to interrupt vector ID */ -#define Q2V(dev, queue_id) (dev->pci_dev->intr_handle.intr_vec[queue_id]) +#define Q2V(pci_dev, queue_id) ((pci_dev)->intr_handle.intr_vec[queue_id]) /* First 64 Logical ports for PF/VMDQ, second 64 for Flow director */ #define MAX_LPORT_NUM 128 @@ -675,7 +677,7 @@ fm10k_dev_tx_init(struct rte_eth_dev *dev) /* Enable use of FTAG bit in TX descriptor, PFVTCTL * register is read-only for VF. */ - if (fm10k_check_ftag(dev->pci_dev->devargs)) { + if (fm10k_check_ftag(dev->device->devargs)) { if (hw->mac.type == fm10k_mac_pf) { FM10K_WRITE_REG(hw, FM10K_PFVTCTL(i), FM10K_PFVTCTL_FTAG_DESC_ENABLE); @@ -693,8 +695,9 @@ fm10k_dev_tx_init(struct rte_eth_dev *dev) base_addr >> (CHAR_BIT * sizeof(uint32_t))); FM10K_WRITE_REG(hw, FM10K_TDLEN(i), size); - /* assign default SGLORT for each TX queue */ - FM10K_WRITE_REG(hw, FM10K_TX_SGLORT(i), hw->mac.dglort_map); + /* assign default SGLORT for each TX queue by PF */ + if (hw->mac.type == fm10k_mac_pf) + FM10K_WRITE_REG(hw, FM10K_TX_SGLORT(i), hw->mac.dglort_map); } /* set up vector or scalar TX function as appropriate */ @@ -708,7 +711,8 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct fm10k_macvlan_filter_info *macvlan; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; int i, ret; struct fm10k_rx_queue *rxq; uint64_t base_addr; @@ -722,13 +726,13 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev) i = 0; if (rte_intr_dp_is_en(intr_handle)) { for (; i < dev->data->nb_rx_queues; i++) { - FM10K_WRITE_REG(hw, FM10K_RXINT(i), Q2V(dev, i)); + FM10K_WRITE_REG(hw, FM10K_RXINT(i), Q2V(pdev, i)); if (hw->mac.type == fm10k_mac_pf) - FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, i)), + FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, i)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); else - FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, i)), + FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, i)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); } @@ -1168,7 +1172,8 @@ static void fm10k_dev_stop(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; int i; PMD_INIT_FUNC_TRACE(); @@ -1187,10 +1192,10 @@ fm10k_dev_stop(struct rte_eth_dev *dev) FM10K_WRITE_REG(hw, FM10K_RXINT(i), 3 << FM10K_RXINT_TIMER_SHIFT); if (hw->mac.type == fm10k_mac_pf) - FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, i)), + FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, i)), FM10K_ITR_MASK_SET); else - FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, i)), + FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, i)), FM10K_ITR_MASK_SET); } } @@ -1233,6 +1238,9 @@ fm10k_dev_close(struct rte_eth_dev *dev) MAX_LPORT_NUM, false); fm10k_mbx_unlock(hw); + /* allow 10ms for device to quiesce */ + rte_delay_us(FM10K_SWITCH_QUIESCE_US); + /* Stop mailbox service first */ fm10k_close_mbx_service(hw); fm10k_dev_stop(dev); @@ -1309,6 +1317,7 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (i = 0; i < FM10K_NB_HW_XSTATS; i++) { xstats[count].value = *(uint64_t *)(((char *)hw_stats) + fm10k_hw_stats_strings[count].offset); + xstats[count].id = count; count++; } @@ -1318,12 +1327,14 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, xstats[count].value = *(uint64_t *)(((char *)&hw_stats->q[q]) + fm10k_hw_stats_rx_q_strings[i].offset); + xstats[count].id = count; count++; } for (i = 0; i < FM10K_NB_TX_Q_XSTATS; i++) { xstats[count].value = *(uint64_t *)(((char *)&hw_stats->q[q]) + fm10k_hw_stats_tx_q_strings[i].offset); + xstats[count].id = count; count++; } } @@ -1381,16 +1392,18 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); PMD_INIT_FUNC_TRACE(); + dev_info->pci_dev = pdev; dev_info->min_rx_bufsize = FM10K_MIN_RX_BUF_SIZE; dev_info->max_rx_pktlen = FM10K_MAX_PKT_SIZE; dev_info->max_rx_queues = hw->mac.max_queues; dev_info->max_tx_queues = hw->mac.max_queues; dev_info->max_mac_addrs = FM10K_MAX_MACADDR_NUM; dev_info->max_hash_mac_addrs = 0; - dev_info->max_vfs = dev->pci_dev->max_vfs; + dev_info->max_vfs = pdev->max_vfs; dev_info->vmdq_pool_base = 0; dev_info->vmdq_queue_base = 0; dev_info->max_vmdq_pools = ETH_32_POOLS; @@ -1441,6 +1454,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2327,15 +2342,16 @@ static int fm10k_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); /* Enable ITR */ if (hw->mac.type == fm10k_mac_pf) - FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, queue_id)), + FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, queue_id)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); else - FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, queue_id)), + FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(&pdev->intr_handle); return 0; } @@ -2343,13 +2359,14 @@ static int fm10k_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); /* Disable ITR */ if (hw->mac.type == fm10k_mac_pf) - FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, queue_id)), + FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, queue_id)), FM10K_ITR_MASK_SET); else - FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, queue_id)), + FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), FM10K_ITR_MASK_SET); return 0; } @@ -2358,7 +2375,8 @@ static int fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; uint32_t intr_vector, vec; uint16_t queue_id; int result = 0; @@ -2374,7 +2392,7 @@ fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev) intr_vector = dev->data->nb_rx_queues; /* disable interrupt first */ - rte_intr_disable(&dev->pci_dev->intr_handle); + rte_intr_disable(intr_handle); if (hw->mac.type == fm10k_mac_pf) fm10k_dev_disable_intr_pf(dev); else @@ -2409,7 +2427,7 @@ fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev) fm10k_dev_enable_intr_pf(dev); else fm10k_dev_enable_intr_vf(dev); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); hw->mac.ops.update_int_moderator(hw); return result; } @@ -2524,7 +2542,7 @@ error: */ static void fm10k_dev_interrupt_handler_pf( - __rte_unused struct rte_intr_handle *handle, + struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; @@ -2575,7 +2593,7 @@ fm10k_dev_interrupt_handler_pf( FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(handle); } /** @@ -2591,7 +2609,7 @@ fm10k_dev_interrupt_handler_pf( */ static void fm10k_dev_interrupt_handler_vf( - __rte_unused struct rte_intr_handle *handle, + struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; @@ -2609,7 +2627,7 @@ fm10k_dev_interrupt_handler_vf( FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(handle); } /* Mailbox message handler in VF */ @@ -2731,7 +2749,7 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) int use_sse = 1; uint16_t tx_ftag_en = 0; - if (fm10k_check_ftag(dev->pci_dev->devargs)) + if (fm10k_check_ftag(dev->device->devargs)) tx_ftag_en = 1; for (i = 0; i < dev->data->nb_tx_queues; i++) { @@ -2749,8 +2767,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prepare = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prepare = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2762,7 +2782,7 @@ fm10k_set_rx_function(struct rte_eth_dev *dev) uint16_t i, rx_using_sse; uint16_t rx_ftag_en = 0; - if (fm10k_check_ftag(dev->pci_dev->devargs)) + if (fm10k_check_ftag(dev->device->devargs)) rx_ftag_en = 1; /* In order to allow Vector Rx there are a few configuration @@ -2821,6 +2841,8 @@ static int eth_fm10k_dev_init(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; int diag, i; struct fm10k_macvlan_filter_info *macvlan; @@ -2829,23 +2851,25 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prepare = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; - rte_eth_copy_pci_info(dev, dev->pci_dev); + rte_eth_copy_pci_info(dev, pdev); + dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private); memset(macvlan, 0, sizeof(*macvlan)); /* Vendor and Device ID need to be set before init of shared code */ memset(hw, 0, sizeof(*hw)); - hw->device_id = dev->pci_dev->id.device_id; - hw->vendor_id = dev->pci_dev->id.vendor_id; - hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id; - hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id; + hw->device_id = pdev->id.device_id; + hw->vendor_id = pdev->id.vendor_id; + hw->subsystem_device_id = pdev->id.subsystem_device_id; + hw->subsystem_vendor_id = pdev->id.subsystem_vendor_id; hw->revision_id = 0; - hw->hw_addr = (void *)dev->pci_dev->mem_resource[0].addr; + hw->hw_addr = (void *)pdev->mem_resource[0].addr; if (hw->hw_addr == NULL) { PMD_INIT_LOG(ERR, "Bad mem resource." " Try to blacklist unused devices."); @@ -2915,20 +2939,20 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) /*PF/VF has different interrupt handling mechanism */ if (hw->mac.type == fm10k_mac_pf) { /* register callback func to eal lib */ - rte_intr_callback_register(&(dev->pci_dev->intr_handle), + rte_intr_callback_register(intr_handle, fm10k_dev_interrupt_handler_pf, (void *)dev); /* enable MISC interrupt */ fm10k_dev_enable_intr_pf(dev); } else { /* VF */ - rte_intr_callback_register(&(dev->pci_dev->intr_handle), + rte_intr_callback_register(intr_handle, fm10k_dev_interrupt_handler_vf, (void *)dev); fm10k_dev_enable_intr_vf(dev); } /* Enable intr after callback registered */ - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); hw->mac.ops.update_int_moderator(hw); @@ -2998,7 +3022,8 @@ static int eth_fm10k_dev_uninit(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); - + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; PMD_INIT_FUNC_TRACE(); /* only uninitialize in the primary process */ @@ -3013,7 +3038,7 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev) dev->tx_pkt_burst = NULL; /* disable uio/vfio intr */ - rte_intr_disable(&(dev->pci_dev->intr_handle)); + rte_intr_disable(intr_handle); /*PF/VF has different interrupt handling mechanism */ if (hw->mac.type == fm10k_mac_pf) { @@ -3021,13 +3046,13 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev) fm10k_dev_disable_intr_pf(dev); /* unregister callback func to eal lib */ - rte_intr_callback_unregister(&(dev->pci_dev->intr_handle), + rte_intr_callback_unregister(intr_handle, fm10k_dev_interrupt_handler_pf, (void *)dev); } else { /* disable interrupt */ fm10k_dev_disable_intr_vf(dev); - rte_intr_callback_unregister(&(dev->pci_dev->intr_handle), + rte_intr_callback_unregister(intr_handle, fm10k_dev_interrupt_handler_vf, (void *)dev); } @@ -3055,34 +3080,16 @@ static const struct rte_pci_id pci_id_fm10k_map[] = { static struct eth_driver rte_pmd_fm10k = { .pci_drv = { - .name = "rte_pmd_fm10k", .id_table = pci_id_fm10k_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_fm10k_dev_init, .eth_dev_uninit = eth_fm10k_dev_uninit, .dev_private_size = sizeof(struct fm10k_adapter), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI FM10K devices. - */ -static int -rte_pmd_fm10k_init(__rte_unused const char *name, - __rte_unused const char *params) -{ - PMD_INIT_FUNC_TRACE(); - rte_eth_driver_register(&rte_pmd_fm10k); - return 0; -} - -static struct rte_driver rte_fm10k_driver = { - .type = PMD_PDEV, - .init = rte_pmd_fm10k_init, -}; - -PMD_REGISTER_DRIVER(rte_fm10k_driver, fm10k); -DRIVER_REGISTER_PCI_TABLE(fm10k, pci_id_fm10k_map); +RTE_PMD_REGISTER_PCI(net_fm10k, rte_pmd_fm10k.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_fm10k, pci_id_fm10k_map); +RTE_PMD_REGISTER_KMOD_DEP(net_fm10k, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/fm10k/fm10k_rxtx.c b/src/dpdk/drivers/net/fm10k/fm10k_rxtx.c index 5b2d04bf..144e5e6b 100644 --- a/src/dpdk/drivers/net/fm10k/fm10k_rxtx.c +++ b/src/dpdk/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT | \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -96,6 +106,20 @@ rx_desc_to_ol_flags(struct rte_mbuf *m, const union fm10k_rx_desc *d) if (d->w.pkt_info & FM10K_RXD_RSSTYPE_MASK) m->ol_flags |= PKT_RX_RSS_HASH; + + if (unlikely((d->d.staterr & + (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)) == + (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE))) + m->ol_flags |= PKT_RX_IP_CKSUM_BAD; + else + m->ol_flags |= PKT_RX_IP_CKSUM_GOOD; + + if (unlikely((d->d.staterr & + (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)) == + (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E))) + m->ol_flags |= PKT_RX_L4_CKSUM_BAD; + else + m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; } uint16_t @@ -583,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} diff --git a/src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c b/src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c index 9ea747e1..27f3e43f 100644 --- a/src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c +++ b/src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c @@ -67,6 +67,8 @@ fm10k_reset_tx_queue(struct fm10k_tx_queue *txq); #define RXEFLAG_SHIFT (13) /* IPE/L4E flag shift */ #define L3L4EFLAG_SHIFT (14) +/* shift PKT_RX_L4_CKSUM_GOOD into one byte by 1 bit */ +#define CKSUM_SHIFT (1) static inline void fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) @@ -92,11 +94,18 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0001, 0x0001, 0x0001); + /* mask the lower byte of ol_flags */ + const __m128i ol_flags_msk = _mm_set_epi16( + 0x0000, 0x0000, 0x0000, 0x0000, + 0x00FF, 0x00FF, 0x00FF, 0x00FF); + const __m128i l3l4cksum_flag = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, - PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, 0); + (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> CKSUM_SHIFT, + (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> CKSUM_SHIFT, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> CKSUM_SHIFT, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> CKSUM_SHIFT); const __m128i rxe_flag = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, @@ -139,6 +148,10 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) /* Process L4/L3 checksum error flags */ cksumflag = _mm_srli_epi16(cksumflag, L3L4EFLAG_SHIFT); cksumflag = _mm_shuffle_epi8(l3l4cksum_flag, cksumflag); + + /* clean the higher byte and shift back the flag bits */ + cksumflag = _mm_and_si128(cksumflag, ol_flags_msk); + cksumflag = _mm_slli_epi16(cksumflag, CKSUM_SHIFT); vtag1 = _mm_or_si128(cksumflag, vtag1); vol.dword = _mm_cvtsi128_si64(vtag1); @@ -234,11 +247,8 @@ fm10k_rx_vec_condition_check(struct rte_eth_dev *dev) if (fconf->mode != RTE_FDIR_MODE_NONE) return -1; - /* - no csum error report support - * - no header split support - */ - if (rxmode->hw_ip_checksum == 1 || - rxmode->header_split == 1) + /* no header split support */ + if (rxmode->header_split == 1) return -1; return 0; @@ -406,7 +416,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, */ rxdp = rxq->hw_ring + next_dd; - _mm_prefetch((const void *)rxdp, _MM_HINT_T0); + rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act @@ -468,6 +478,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); @@ -476,8 +487,10 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, mbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]); descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); descs0[0] = _mm_loadu_si128((__m128i *)(rxdp)); /* B.2 copy 2 mbuf point into rx_pkts */ diff --git a/src/dpdk/drivers/net/i40e/base/i40e_adminq.c b/src/dpdk/drivers/net/i40e/base/i40e_adminq.c index 0d3a83fa..5bdf3f77 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_adminq.c +++ b/src/dpdk/drivers/net/i40e/base/i40e_adminq.c @@ -1077,11 +1077,11 @@ enum i40e_status_code i40e_clean_arq_element(struct i40e_hw *hw, desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc); desc_idx = ntc; + hw->aq.arq_last_status = + (enum i40e_admin_queue_err)LE16_TO_CPU(desc->retval); flags = LE16_TO_CPU(desc->flags); if (flags & I40E_AQ_FLAG_ERR) { ret_code = I40E_ERR_ADMIN_QUEUE_ERROR; - hw->aq.arq_last_status = - (enum i40e_admin_queue_err)LE16_TO_CPU(desc->retval); i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQRX: Event received with error 0x%X.\n", diff --git a/src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h b/src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h index 2b7a7608..67cef7cf 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h @@ -139,12 +139,10 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_list_func_capabilities = 0x000A, i40e_aqc_opc_list_dev_capabilities = 0x000B, -#ifdef X722_SUPPORT /* Proxy commands */ i40e_aqc_opc_set_proxy_config = 0x0104, i40e_aqc_opc_set_ns_proxy_table_entry = 0x0105, -#endif /* LAA */ i40e_aqc_opc_mac_address_read = 0x0107, i40e_aqc_opc_mac_address_write = 0x0108, @@ -152,12 +150,11 @@ enum i40e_admin_queue_opc { /* PXE */ i40e_aqc_opc_clear_pxe_mode = 0x0110, -#ifdef X722_SUPPORT /* WoL commands */ i40e_aqc_opc_set_wol_filter = 0x0120, i40e_aqc_opc_get_wake_reason = 0x0121, + i40e_aqc_opc_clear_all_wol_filters = 0x025E, -#endif /* internal switch commands */ i40e_aqc_opc_get_switch_config = 0x0200, i40e_aqc_opc_add_statistics = 0x0201, @@ -196,6 +193,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_remove_control_packet_filter = 0x025B, i40e_aqc_opc_add_cloud_filters = 0x025C, i40e_aqc_opc_remove_cloud_filters = 0x025D, + i40e_aqc_opc_clear_wol_switch_filters = 0x025E, i40e_aqc_opc_add_mirror_rule = 0x0260, i40e_aqc_opc_delete_mirror_rule = 0x0261, @@ -223,6 +221,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_suspend_port_tx = 0x041B, i40e_aqc_opc_resume_port_tx = 0x041C, i40e_aqc_opc_configure_partition_bw = 0x041D, + /* hmc */ + i40e_aqc_opc_query_hmc_resource_profile = 0x0500, + i40e_aqc_opc_set_hmc_resource_profile = 0x0501, /* phy commands*/ i40e_aqc_opc_get_phy_abilities = 0x0600, @@ -278,12 +279,10 @@ enum i40e_admin_queue_opc { /* Tunnel commands */ i40e_aqc_opc_add_udp_tunnel = 0x0B00, i40e_aqc_opc_del_udp_tunnel = 0x0B01, -#ifdef X722_SUPPORT i40e_aqc_opc_set_rss_key = 0x0B02, i40e_aqc_opc_set_rss_lut = 0x0B03, i40e_aqc_opc_get_rss_key = 0x0B04, i40e_aqc_opc_get_rss_lut = 0x0B05, -#endif /* Async Events */ i40e_aqc_opc_event_lan_overflow = 0x1001, @@ -471,13 +470,15 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration); /* Set ARP Proxy command / response (indirect 0x0104) */ struct i40e_aqc_arp_proxy_data { __le16 command_flags; -#define I40E_AQ_ARP_INIT_IPV4 0x0008 -#define I40E_AQ_ARP_UNSUP_CTL 0x0010 -#define I40E_AQ_ARP_ENA 0x0020 -#define I40E_AQ_ARP_ADD_IPV4 0x0040 -#define I40E_AQ_ARP_DEL_IPV4 0x0080 +#define I40E_AQ_ARP_INIT_IPV4 0x0800 +#define I40E_AQ_ARP_UNSUP_CTL 0x1000 +#define I40E_AQ_ARP_ENA 0x2000 +#define I40E_AQ_ARP_ADD_IPV4 0x4000 +#define I40E_AQ_ARP_DEL_IPV4 0x8000 __le16 table_id; - __le32 pfpm_proxyfc; + __le32 enabled_offloads; +#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE 0x00000020 +#define I40E_AQ_ARP_OFFLOAD_ENABLE 0x00000800 __le32 ip_addr; u8 mac_addr[6]; u8 reserved[2]; @@ -492,17 +493,19 @@ struct i40e_aqc_ns_proxy_data { __le16 table_idx_ipv6_0; __le16 table_idx_ipv6_1; __le16 control; -#define I40E_AQ_NS_PROXY_ADD_0 0x0100 -#define I40E_AQ_NS_PROXY_DEL_0 0x0200 -#define I40E_AQ_NS_PROXY_ADD_1 0x0400 -#define I40E_AQ_NS_PROXY_DEL_1 0x0800 -#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x1000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x2000 -#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x4000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x8000 -#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0001 -#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002 -#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0004 +#define I40E_AQ_NS_PROXY_ADD_0 0x0001 +#define I40E_AQ_NS_PROXY_DEL_0 0x0002 +#define I40E_AQ_NS_PROXY_ADD_1 0x0004 +#define I40E_AQ_NS_PROXY_DEL_1 0x0008 +#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x0010 +#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x0020 +#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x0040 +#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x0080 +#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0100 +#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0200 +#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0400 +#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE 0x0800 +#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE 0x1000 u8 mac_addr_0[6]; u8 mac_addr_1[6]; u8 local_mac_addr[6]; @@ -532,7 +535,8 @@ struct i40e_aqc_mac_address_read { #define I40E_AQC_PORT_ADDR_VALID 0x40 #define I40E_AQC_WOL_ADDR_VALID 0x80 #define I40E_AQC_MC_MAG_EN_VALID 0x100 -#define I40E_AQC_ADDR_VALID_MASK 0x1F0 +#define I40E_AQC_WOL_PRESERVE_STATUS 0x200 +#define I40E_AQC_ADDR_VALID_MASK 0x3F0 u8 reserved[6]; __le32 addr_high; __le32 addr_low; @@ -552,6 +556,8 @@ I40E_CHECK_STRUCT_LEN(24, i40e_aqc_mac_address_read_data); /* Manage MAC Address Write Command (0x0108) */ struct i40e_aqc_mac_address_write { __le16 command_flags; +#define I40E_AQC_MC_MAG_EN 0x0100 +#define I40E_AQC_WOL_PRESERVE_ON_PFR 0x0200 #define I40E_AQC_WRITE_TYPE_LAA_ONLY 0x0000 #define I40E_AQC_WRITE_TYPE_LAA_WOL 0x4000 #define I40E_AQC_WRITE_TYPE_PORT 0x8000 @@ -575,15 +581,24 @@ struct i40e_aqc_clear_pxe { I40E_CHECK_CMD_LENGTH(i40e_aqc_clear_pxe); -#ifdef X722_SUPPORT /* Set WoL Filter (0x0120) */ struct i40e_aqc_set_wol_filter { __le16 filter_index; #define I40E_AQC_MAX_NUM_WOL_FILTERS 8 +#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT 15 +#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_MASK (0x1 << \ + I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT) + +#define I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT 0 +#define I40E_AQC_SET_WOL_FILTER_INDEX_MASK (0x7 << \ + I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT) __le16 cmd_flags; #define I40E_AQC_SET_WOL_FILTER 0x8000 #define I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL 0x4000 +#define I40E_AQC_SET_WOL_FILTER_WOL_PRESERVE_ON_PFR 0x2000 +#define I40E_AQC_SET_WOL_FILTER_ACTION_CLEAR 0 +#define I40E_AQC_SET_WOL_FILTER_ACTION_SET 1 __le16 valid_flags; #define I40E_AQC_SET_WOL_FILTER_ACTION_VALID 0x8000 #define I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID 0x4000 @@ -594,24 +609,29 @@ struct i40e_aqc_set_wol_filter { I40E_CHECK_CMD_LENGTH(i40e_aqc_set_wol_filter); +struct i40e_aqc_set_wol_filter_data { + u8 filter[128]; + u8 mask[16]; +}; + +I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data); + /* Get Wake Reason (0x0121) */ struct i40e_aqc_get_wake_reason_completion { u8 reserved_1[2]; __le16 wake_reason; +#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT 0 +#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_MASK (0xFF << \ + I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT) +#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT 8 +#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_MASK (0xFF << \ + I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT) u8 reserved_2[12]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_wake_reason_completion); -struct i40e_aqc_set_wol_filter_data { - u8 filter[128]; - u8 mask[16]; -}; - -I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data); - -#endif /* X722_SUPPORT */ /* Switch configuration commands (0x02xx) */ /* Used by many indirect commands that only pass an seid and a buffer in the @@ -694,6 +714,8 @@ struct i40e_aqc_set_port_parameters { #define I40E_AQ_SET_P_PARAMS_PAD_SHORT_PACKETS 2 /* must set! */ #define I40E_AQ_SET_P_PARAMS_DOUBLE_VLAN_ENA 4 __le16 bad_frame_vsi; +#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_SHIFT 0x0 +#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_MASK 0x3FF __le16 default_seid; /* reserved for command */ u8 reserved[10]; }; @@ -745,6 +767,7 @@ I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp); /* Set Switch Configuration (direct 0x0205) */ struct i40e_aqc_set_switch_config { __le16 flags; +/* flags used for both fields below */ #define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 __le16 valid_flags; @@ -913,16 +936,12 @@ struct i40e_aqc_vsi_properties_data { I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) /* queueing option section */ u8 queueing_opt_flags; -#ifdef X722_SUPPORT #define I40E_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA 0x04 #define I40E_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA 0x08 -#endif #define I40E_AQ_VSI_QUE_OPT_TCP_ENA 0x10 #define I40E_AQ_VSI_QUE_OPT_FCOE_ENA 0x20 -#ifdef X722_SUPPORT #define I40E_AQ_VSI_QUE_OPT_RSS_LUT_PF 0x00 #define I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI 0x40 -#endif u8 queueing_opt_reserved[3]; /* scheduler section */ u8 up_enable_bits; @@ -1644,6 +1663,24 @@ struct i40e_aqc_configure_partition_bw_data { I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data); +/* Get and set the active HMC resource profile and status. + * (direct 0x0500) and (direct 0x0501) + */ +struct i40e_aq_get_set_hmc_resource_profile { + u8 pm_profile; + u8 pe_vf_enabled; + u8 reserved[14]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile); + +enum i40e_aq_hmc_profile { + /* I40E_HMC_PROFILE_NO_CHANGE = 0, reserved */ + I40E_HMC_PROFILE_DEFAULT = 1, + I40E_HMC_PROFILE_FAVOR_VF = 2, + I40E_HMC_PROFILE_EQUAL = 3, +}; + /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */ /* set in param0 for get phy abilities to report qualified modules */ @@ -1691,6 +1728,7 @@ enum i40e_aq_phy_type { #define I40E_LINK_SPEED_10GB_SHIFT 0x3 #define I40E_LINK_SPEED_40GB_SHIFT 0x4 #define I40E_LINK_SPEED_20GB_SHIFT 0x5 +#define I40E_LINK_SPEED_25GB_SHIFT 0x6 enum i40e_aq_link_speed { I40E_LINK_SPEED_UNKNOWN = 0, @@ -1698,7 +1736,8 @@ enum i40e_aq_link_speed { I40E_LINK_SPEED_1GB = (1 << I40E_LINK_SPEED_1000MB_SHIFT), I40E_LINK_SPEED_10GB = (1 << I40E_LINK_SPEED_10GB_SHIFT), I40E_LINK_SPEED_40GB = (1 << I40E_LINK_SPEED_40GB_SHIFT), - I40E_LINK_SPEED_20GB = (1 << I40E_LINK_SPEED_20GB_SHIFT) + I40E_LINK_SPEED_20GB = (1 << I40E_LINK_SPEED_20GB_SHIFT), + I40E_LINK_SPEED_25GB = (1 << I40E_LINK_SPEED_25GB_SHIFT), }; struct i40e_aqc_module_desc { @@ -1721,6 +1760,8 @@ struct i40e_aq_get_phy_abilities_resp { #define I40E_AQ_PHY_LINK_ENABLED 0x08 #define I40E_AQ_PHY_AN_ENABLED 0x10 #define I40E_AQ_PHY_FLAG_MODULE_QUAL 0x20 +#define I40E_AQ_PHY_FEC_ABILITY_KR 0x40 +#define I40E_AQ_PHY_FEC_ABILITY_RS 0x80 __le16 eee_capability; #define I40E_AQ_EEE_100BASE_TX 0x0002 #define I40E_AQ_EEE_1000BASE_T 0x0004 @@ -1731,7 +1772,22 @@ struct i40e_aq_get_phy_abilities_resp { __le32 eeer_val; u8 d3_lpan; #define I40E_AQ_SET_PHY_D3_LPAN_ENA 0x01 - u8 reserved[3]; + u8 phy_type_ext; +#define I40E_AQ_PHY_TYPE_EXT_25G_KR 0x01 +#define I40E_AQ_PHY_TYPE_EXT_25G_CR 0x02 +#define I40E_AQ_PHY_TYPE_EXT_25G_SR 0x04 +#define I40E_AQ_PHY_TYPE_EXT_25G_LR 0x08 + u8 fec_cfg_curr_mod_ext_info; +#define I40E_AQ_ENABLE_FEC_KR 0x01 +#define I40E_AQ_ENABLE_FEC_RS 0x02 +#define I40E_AQ_REQUEST_FEC_KR 0x04 +#define I40E_AQ_REQUEST_FEC_RS 0x08 +#define I40E_AQ_ENABLE_FEC_AUTO 0x10 +#define I40E_AQ_FEC +#define I40E_AQ_MODULE_TYPE_EXT_MASK 0xE0 +#define I40E_AQ_MODULE_TYPE_EXT_SHIFT 5 + + u8 ext_comp_code; u8 phy_id[4]; u8 module_type[3]; u8 qualified_module_count; @@ -1753,7 +1809,16 @@ struct i40e_aq_set_phy_config { /* same bits as above in all */ __le16 eee_capability; __le32 eeer; u8 low_power_ctrl; - u8 reserved[3]; + u8 phy_type_ext; + u8 fec_config; +#define I40E_AQ_SET_FEC_ABILITY_KR BIT(0) +#define I40E_AQ_SET_FEC_ABILITY_RS BIT(1) +#define I40E_AQ_SET_FEC_REQUEST_KR BIT(2) +#define I40E_AQ_SET_FEC_REQUEST_RS BIT(3) +#define I40E_AQ_SET_FEC_AUTO BIT(4) +#define I40E_AQ_PHY_FEC_CONFIG_SHIFT 0x0 +#define I40E_AQ_PHY_FEC_CONFIG_MASK (0x1F << I40E_AQ_PHY_FEC_CONFIG_SHIFT) + u8 reserved; }; I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config); @@ -1833,16 +1898,26 @@ struct i40e_aqc_get_link_status { #define I40E_AQ_LINK_TX_DRAINED 0x01 #define I40E_AQ_LINK_TX_FLUSHED 0x03 #define I40E_AQ_LINK_FORCED_40G 0x10 +/* 25G Error Codes */ +#define I40E_AQ_25G_NO_ERR 0X00 +#define I40E_AQ_25G_NOT_PRESENT 0X01 +#define I40E_AQ_25G_NVM_CRC_ERR 0X02 +#define I40E_AQ_25G_SBUS_UCODE_ERR 0X03 +#define I40E_AQ_25G_SERDES_UCODE_ERR 0X04 +#define I40E_AQ_25G_NIMB_UCODE_ERR 0X05 u8 loopback; /* use defines from i40e_aqc_set_lb_mode */ __le16 max_frame_size; u8 config; +#define I40E_AQ_CONFIG_FEC_KR_ENA 0x01 +#define I40E_AQ_CONFIG_FEC_RS_ENA 0x02 #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 external_power_ability; + u8 power_desc; #define I40E_AQ_LINK_POWER_CLASS_1 0x00 #define I40E_AQ_LINK_POWER_CLASS_2 0x01 #define I40E_AQ_LINK_POWER_CLASS_3 0x02 #define I40E_AQ_LINK_POWER_CLASS_4 0x03 +#define I40E_AQ_PWR_CLASS_MASK 0x03 u8 reserved[4]; }; @@ -2340,7 +2415,6 @@ struct i40e_aqc_del_udp_tunnel_completion { }; I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion); -#ifdef X722_SUPPORT struct i40e_aqc_get_set_rss_key { #define I40E_AQC_SET_RSS_KEY_VSI_VALID (0x1 << 15) @@ -2381,7 +2455,6 @@ struct i40e_aqc_get_set_rss_lut { }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_lut); -#endif /* tunnel key structure 0x0B10 */ diff --git a/src/dpdk/drivers/net/i40e/base/i40e_common.c b/src/dpdk/drivers/net/i40e/base/i40e_common.c index 98ed4b68..b8d81651 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_common.c +++ b/src/dpdk/drivers/net/i40e/base/i40e_common.c @@ -71,7 +71,6 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_25G_SFP28: hw->mac.type = I40E_MAC_XL710; break; -#ifdef X722_SUPPORT #ifdef X722_A0_SUPPORT case I40E_DEV_ID_X722_A0: #endif @@ -81,21 +80,16 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: case I40E_DEV_ID_SFP_I_X722: - case I40E_DEV_ID_QSFP_I_X722: hw->mac.type = I40E_MAC_X722; break; -#endif -#ifdef X722_SUPPORT #if defined(INTEGRATED_VF) || defined(VF_DRIVER) case I40E_DEV_ID_X722_VF: - case I40E_DEV_ID_X722_VF_HV: #ifdef X722_A0_SUPPORT case I40E_DEV_ID_X722_A0_VF: #endif hw->mac.type = I40E_MAC_X722_VF; break; #endif /* INTEGRATED_VF || VF_DRIVER */ -#endif /* X722_SUPPORT */ #if defined(INTEGRATED_VF) || defined(VF_DRIVER) case I40E_DEV_ID_VF: case I40E_DEV_ID_VF_HV: @@ -115,7 +109,6 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw) return status; } -#ifndef I40E_NDIS_SUPPORT /** * i40e_aq_str - convert AQ err code to a string * @hw: pointer to the HW structure @@ -322,7 +315,6 @@ const char *i40e_stat_str(struct i40e_hw *hw, enum i40e_status_code stat_err) return hw->err_str; } -#endif /* I40E_NDIS_SUPPORT */ /** * i40e_debug_aq * @hw: debug mask related to admin queue @@ -383,8 +375,7 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, d_buf[j] = buf[i]; i40e_debug(hw, mask, "\t0x%04X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n", - i_sav, d_buf[0], d_buf[1], - d_buf[2], d_buf[3], + i_sav, d_buf[0], d_buf[1], d_buf[2], d_buf[3], d_buf[4], d_buf[5], d_buf[6], d_buf[7], d_buf[8], d_buf[9], d_buf[10], d_buf[11], d_buf[12], d_buf[13], d_buf[14], d_buf[15]); @@ -449,7 +440,6 @@ enum i40e_status_code i40e_aq_queue_shutdown(struct i40e_hw *hw, return status; } -#ifdef X722_SUPPORT /** * i40e_aq_get_set_rss_lut @@ -608,7 +598,6 @@ enum i40e_status_code i40e_aq_set_rss_key(struct i40e_hw *hw, { return i40e_aq_get_set_rss_key(hw, vsi_id, key, true); } -#endif /* X722_SUPPORT */ /* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the * hardware to a bit-field that can be used by SW to more easily determine the @@ -773,7 +762,7 @@ struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = { /* Non Tunneled IPv6 */ I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY3), + I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), I40E_PTT_UNUSED_ENTRY(91), I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), @@ -1024,9 +1013,7 @@ enum i40e_status_code i40e_init_shared_code(struct i40e_hw *hw) switch (hw->mac.type) { case I40E_MAC_XL710: -#ifdef X722_SUPPORT case I40E_MAC_X722: -#endif break; default: return I40E_ERR_DEVICE_NOT_SUPPORTED; @@ -1046,11 +1033,9 @@ enum i40e_status_code i40e_init_shared_code(struct i40e_hw *hw) else hw->pf_id = (u8)(func_rid & 0x7); -#ifdef X722_SUPPORT if (hw->mac.type == I40E_MAC_X722) hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE; -#endif status = i40e_init_nvm(hw); return status; } @@ -1128,7 +1113,8 @@ enum i40e_status_code i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr) status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL); if (flags & I40E_AQC_LAN_ADDR_VALID) - memcpy(mac_addr, &addrs.pf_lan_mac, sizeof(addrs.pf_lan_mac)); + i40e_memcpy(mac_addr, &addrs.pf_lan_mac, sizeof(addrs.pf_lan_mac), + I40E_NONDMA_TO_NONDMA); return status; } @@ -1151,7 +1137,8 @@ enum i40e_status_code i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr) return status; if (flags & I40E_AQC_PORT_ADDR_VALID) - memcpy(mac_addr, &addrs.port_mac, sizeof(addrs.port_mac)); + i40e_memcpy(mac_addr, &addrs.port_mac, sizeof(addrs.port_mac), + I40E_NONDMA_TO_NONDMA); else status = I40E_ERR_INVALID_MAC_ADDR; @@ -1190,6 +1177,33 @@ void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable) wr32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block), reg_val); } +/** + * i40e_get_san_mac_addr - get SAN MAC address + * @hw: pointer to the HW structure + * @mac_addr: pointer to SAN MAC address + * + * Reads the adapter's SAN MAC address from NVM + **/ +enum i40e_status_code i40e_get_san_mac_addr(struct i40e_hw *hw, + u8 *mac_addr) +{ + struct i40e_aqc_mac_address_read_data addrs; + enum i40e_status_code status; + u16 flags = 0; + + status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL); + if (status) + return status; + + if (flags & I40E_AQC_SAN_ADDR_VALID) + i40e_memcpy(mac_addr, &addrs.pf_san_mac, sizeof(addrs.pf_san_mac), + I40E_NONDMA_TO_NONDMA); + else + status = I40E_ERR_INVALID_MAC_ADDR; + + return status; +} + /** * i40e_read_pba_string - Reads part number string from EEPROM * @hw: pointer to hardware structure @@ -1264,6 +1278,8 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) case I40E_PHY_TYPE_1000BASE_LX: case I40E_PHY_TYPE_40GBASE_SR4: case I40E_PHY_TYPE_40GBASE_LR4: + case I40E_PHY_TYPE_25GBASE_LR: + case I40E_PHY_TYPE_25GBASE_SR: media = I40E_MEDIA_TYPE_FIBER; break; case I40E_PHY_TYPE_100BASE_TX: @@ -1278,6 +1294,7 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) case I40E_PHY_TYPE_10GBASE_SFPP_CU: case I40E_PHY_TYPE_40GBASE_AOC: case I40E_PHY_TYPE_10GBASE_AOC: + case I40E_PHY_TYPE_25GBASE_CR: media = I40E_MEDIA_TYPE_DA; break; case I40E_PHY_TYPE_1000BASE_KX: @@ -1285,6 +1302,7 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) case I40E_PHY_TYPE_10GBASE_KR: case I40E_PHY_TYPE_40GBASE_KR4: case I40E_PHY_TYPE_20GBASE_KR2: + case I40E_PHY_TYPE_25GBASE_KR: media = I40E_MEDIA_TYPE_BACKPLANE; break; case I40E_PHY_TYPE_SGMII: @@ -1670,8 +1688,10 @@ enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw, if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) status = I40E_ERR_UNKNOWN_PHY; - if (report_init) + if (report_init) { hw->phy.phy_types = LE32_TO_CPU(abilities->phy_type); + hw->phy.phy_types |= ((u64)abilities->phy_type_ext << 32); + } return status; } @@ -1763,10 +1783,13 @@ enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures, config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK; /* Copy over all the old settings */ config.phy_type = abilities.phy_type; + config.phy_type_ext = abilities.phy_type_ext; config.link_speed = abilities.link_speed; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; + config.fec_config = abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_PHY_FEC_CONFIG_MASK; status = i40e_aq_set_phy_config(hw, &config, NULL); if (status) @@ -1926,6 +1949,8 @@ enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw, hw_link_info->link_speed = (enum i40e_aq_link_speed)resp->link_speed; hw_link_info->link_info = resp->link_info; hw_link_info->an_info = resp->an_info; + hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA | + I40E_AQ_CONFIG_FEC_RS_ENA); hw_link_info->ext_info = resp->ext_info; hw_link_info->loopback = resp->loopback; hw_link_info->max_frame_size = LE16_TO_CPU(resp->max_frame_size); @@ -1948,12 +1973,13 @@ enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw, else hw_link_info->crc_enable = false; - if (resp->command_flags & CPU_TO_LE16(I40E_AQ_LSE_ENABLE)) + if (resp->command_flags & CPU_TO_LE16(I40E_AQ_LSE_IS_ENABLED)) hw_link_info->lse_enable = true; else hw_link_info->lse_enable = false; - if ((hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 && + if ((hw->mac.type == I40E_MAC_XL710) && + (hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; @@ -2214,6 +2240,34 @@ enum i40e_status_code i40e_aq_set_default_vsi(struct i40e_hw *hw, return status; } +/** + * i40e_aq_clear_default_vsi + * @hw: pointer to the hw struct + * @seid: vsi number + * @cmd_details: pointer to command details structure or NULL + **/ +enum i40e_status_code i40e_aq_clear_default_vsi(struct i40e_hw *hw, + u16 seid, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *) + &desc.params.raw; + enum i40e_status_code status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + cmd->promiscuous_flags = CPU_TO_LE16(0); + cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_DEFAULT); + cmd->seid = CPU_TO_LE16(seid); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + /** * i40e_aq_set_vsi_unicast_promiscuous * @hw: pointer to the hw struct @@ -2289,6 +2343,43 @@ enum i40e_status_code i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, return status; } +/** +* i40e_aq_set_vsi_full_promiscuous +* @hw: pointer to the hw struct +* @seid: VSI number +* @set: set promiscuous enable/disable +* @cmd_details: pointer to command details structure or NULL +**/ +enum i40e_status_code i40e_aq_set_vsi_full_promiscuous(struct i40e_hw *hw, + u16 seid, bool set, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; + enum i40e_status_code status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + if (set) + flags = I40E_AQC_SET_VSI_PROMISC_UNICAST | + I40E_AQC_SET_VSI_PROMISC_MULTICAST | + I40E_AQC_SET_VSI_PROMISC_BROADCAST; + + cmd->promiscuous_flags = CPU_TO_LE16(flags); + + cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_UNICAST | + I40E_AQC_SET_VSI_PROMISC_MULTICAST | + I40E_AQC_SET_VSI_PROMISC_BROADCAST); + + cmd->seid = CPU_TO_LE16(seid); + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + /** * i40e_aq_set_vsi_mc_promisc_on_vlan * @hw: pointer to the hw struct @@ -2357,6 +2448,40 @@ enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, return status; } +/** + * i40e_aq_set_vsi_bc_promisc_on_vlan + * @hw: pointer to the hw struct + * @seid: vsi number + * @enable: set broadcast promiscuous enable/disable for a given VLAN + * @vid: The VLAN tag filter - capture any broadcast packet with this VLAN tag + * @cmd_details: pointer to command details structure or NULL + **/ +enum i40e_status_code i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw, + u16 seid, bool enable, u16 vid, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; + enum i40e_status_code status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + if (enable) + flags |= I40E_AQC_SET_VSI_PROMISC_BROADCAST; + + cmd->promiscuous_flags = CPU_TO_LE16(flags); + cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_BROADCAST); + cmd->seid = CPU_TO_LE16(seid); + cmd->vlan_tag = CPU_TO_LE16(vid | I40E_AQC_SET_VSI_VLAN_VALID); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + /** * i40e_aq_set_vsi_broadcast * @hw: pointer to the hw struct @@ -2691,14 +2816,17 @@ enum i40e_status_code i40e_update_link_info(struct i40e_hw *hw) if (status) return status; - if (hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) { + /* extra checking needed to ensure link info to user is timely */ + if ((hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) && + ((hw->phy.link_info.link_info & I40E_AQ_LINK_UP) || + !(hw->phy.link_info_old.link_info & I40E_AQ_LINK_UP))) { status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL); if (status) return status; - memcpy(hw->phy.link_info.module_type, &abilities.module_type, - sizeof(hw->phy.link_info.module_type)); + i40e_memcpy(hw->phy.link_info.module_type, &abilities.module_type, + sizeof(hw->phy.link_info.module_type), I40E_NONDMA_TO_NONDMA); } return status; } @@ -3549,6 +3677,14 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, break; case I40E_AQ_CAP_ID_MNG_MODE: p->management_mode = number; + if (major_rev > 1) { + p->mng_protocols_over_mctp = logical_id; + i40e_debug(hw, I40E_DEBUG_INIT, + "HW Capability: Protocols over MCTP = %d\n", + p->mng_protocols_over_mctp); + } else { + p->mng_protocols_over_mctp = 0; + } i40e_debug(hw, I40E_DEBUG_INIT, "HW Capability: Management Mode = %d\n", p->management_mode); @@ -3768,7 +3904,6 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, if (number & I40E_NVM_MGMT_UPDATE_DISABLED) p->update_disabled = true; break; -#ifdef X722_SUPPORT case I40E_AQ_CAP_ID_WOL_AND_PROXY: hw->num_wol_proxy_filters = (u16)number; hw->wol_proxy_vsi_seid = (u16)logical_id; @@ -3778,12 +3913,10 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, else p->acpi_prog_method = I40E_ACPI_PROGRAMMING_METHOD_HW_FVL; p->proxy_support = (phys_id & I40E_PROXY_SUPPORT_MASK) ? 1 : 0; - p->proxy_support = p->proxy_support; i40e_debug(hw, I40E_DEBUG_INIT, "HW Capability: WOL proxy filters = %d\n", hw->num_wol_proxy_filters); break; -#endif default: break; } @@ -3792,16 +3925,8 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, if (p->fcoe) i40e_debug(hw, I40E_DEBUG_ALL, "device is FCoE capable\n"); -#ifdef I40E_FCOE_ENA - /* Software override ensuring FCoE is disabled if npar or mfp - * mode because it is not supported in these modes. - */ - if (p->npar_enable || p->flex10_enable) - p->fcoe = false; -#else /* Always disable FCoE if compiled without the I40E_FCOE_ENA flag */ p->fcoe = false; -#endif /* count the enabled ports (aka the "not disabled" ports) */ hw->num_ports = 0; @@ -3828,8 +3953,10 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, /* partition id is 1-based, and functions are evenly spread * across the ports as partitions */ - hw->partition_id = (hw->pf_id / hw->num_ports) + 1; - hw->num_partitions = num_functions / hw->num_ports; + if (hw->num_ports != 0) { + hw->partition_id = (hw->pf_id / hw->num_ports) + 1; + hw->num_partitions = num_functions / hw->num_ports; + } /* additional HW specific goodies that might * someday be HW version specific @@ -4314,11 +4441,15 @@ enum i40e_status_code i40e_aq_start_stop_dcbx(struct i40e_hw *hw, /** * i40e_aq_add_udp_tunnel * @hw: pointer to the hw struct - * @udp_port: the UDP port to add + * @udp_port: the UDP port to add in Host byte order * @header_len: length of the tunneling header length in DWords * @protocol_index: protocol index type * @filter_index: pointer to filter index * @cmd_details: pointer to command details structure or NULL + * + * Note: Firmware expects the udp_port value to be in Little Endian format, + * and this function will call CPU_TO_LE16 to convert from Host byte order to + * Little Endian order. **/ enum i40e_status_code i40e_aq_add_udp_tunnel(struct i40e_hw *hw, u16 udp_port, u8 protocol_index, @@ -5452,12 +5583,12 @@ STATIC void i40e_fix_up_geneve_vni( u16 tnl_type; u32 ti; - tnl_type = (le16_to_cpu(f[i].flags) & + tnl_type = (LE16_TO_CPU(f[i].flags) & I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >> I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT; if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) { - ti = le32_to_cpu(f[i].tenant_id); - f[i].tenant_id = cpu_to_le32(ti << 8); + ti = LE32_TO_CPU(f[i].tenant_id); + f[i].tenant_id = CPU_TO_LE32(ti << 8); } } } @@ -5961,9 +6092,6 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw, desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF); desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD); - if (bwd_size > I40E_AQ_LARGE_BUF) - desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB); - desc.datalen = CPU_TO_LE16(bwd_size); status = i40e_asq_send_command(hw, &desc, bw_data, bwd_size, cmd_details); @@ -5972,7 +6100,92 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw, } /** - * i40e_read_phy_register + * i40e_read_phy_register_clause22 + * @hw: pointer to the HW structure + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Reads specified PHY register value + **/ +enum i40e_status_code i40e_read_phy_register_clause22(struct i40e_hw *hw, + u16 reg, u8 phy_addr, u16 *value) +{ + enum i40e_status_code status = I40E_ERR_TIMEOUT; + u8 port_num = (u8)hw->func_caps.mdio_port_num; + u32 command = 0; + u16 retry = 1000; + + command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_CLAUSE22_OPCODE_READ_MASK) | + (I40E_MDIO_CLAUSE22_STCODE_MASK) | + (I40E_GLGEN_MSCA_MDICMD_MASK); + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = I40E_SUCCESS; + break; + } + i40e_usec_delay(10); + retry--; + } while (retry); + + if (status) { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't write command to external PHY.\n"); + } else { + command = rd32(hw, I40E_GLGEN_MSRWD(port_num)); + *value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >> + I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT; + } + + return status; +} + +/** + * i40e_write_phy_register_clause22 + * @hw: pointer to the HW structure + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Writes specified PHY register value + **/ +enum i40e_status_code i40e_write_phy_register_clause22(struct i40e_hw *hw, + u16 reg, u8 phy_addr, u16 value) +{ + enum i40e_status_code status = I40E_ERR_TIMEOUT; + u8 port_num = (u8)hw->func_caps.mdio_port_num; + u32 command = 0; + u16 retry = 1000; + + command = value << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT; + wr32(hw, I40E_GLGEN_MSRWD(port_num), command); + + command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK) | + (I40E_MDIO_CLAUSE22_STCODE_MASK) | + (I40E_GLGEN_MSCA_MDICMD_MASK); + + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = I40E_SUCCESS; + break; + } + i40e_usec_delay(10); + retry--; + } while (retry); + + return status; +} + +/** + * i40e_read_phy_register_clause45 * @hw: pointer to the HW structure * @page: registers page number * @reg: register address in the page @@ -5981,9 +6194,8 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw, * * Reads specified PHY register value **/ -enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, - u8 page, u16 reg, u8 phy_addr, - u16 *value) +enum i40e_status_code i40e_read_phy_register_clause45(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 *value) { enum i40e_status_code status = I40E_ERR_TIMEOUT; u32 command = 0; @@ -5993,8 +6205,8 @@ enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | - (I40E_MDIO_OPCODE_ADDRESS) | - (I40E_MDIO_STCODE) | + (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) | + (I40E_MDIO_CLAUSE45_STCODE_MASK) | (I40E_GLGEN_MSCA_MDICMD_MASK) | (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); wr32(hw, I40E_GLGEN_MSCA(port_num), command); @@ -6016,8 +6228,8 @@ enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | - (I40E_MDIO_OPCODE_READ) | - (I40E_MDIO_STCODE) | + (I40E_MDIO_CLAUSE45_OPCODE_READ_MASK) | + (I40E_MDIO_CLAUSE45_STCODE_MASK) | (I40E_GLGEN_MSCA_MDICMD_MASK) | (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); status = I40E_ERR_TIMEOUT; @@ -6047,7 +6259,7 @@ phy_read_end: } /** - * i40e_write_phy_register + * i40e_write_phy_register_clause45 * @hw: pointer to the HW structure * @page: registers page number * @reg: register address in the page @@ -6056,9 +6268,8 @@ phy_read_end: * * Writes value to specified PHY register **/ -enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, - u8 page, u16 reg, u8 phy_addr, - u16 value) +enum i40e_status_code i40e_write_phy_register_clause45(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 value) { enum i40e_status_code status = I40E_ERR_TIMEOUT; u32 command = 0; @@ -6068,8 +6279,8 @@ enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | - (I40E_MDIO_OPCODE_ADDRESS) | - (I40E_MDIO_STCODE) | + (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) | + (I40E_MDIO_CLAUSE45_STCODE_MASK) | (I40E_GLGEN_MSCA_MDICMD_MASK) | (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); wr32(hw, I40E_GLGEN_MSCA(port_num), command); @@ -6093,8 +6304,8 @@ enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | - (I40E_MDIO_OPCODE_WRITE) | - (I40E_MDIO_STCODE) | + (I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK) | + (I40E_MDIO_CLAUSE45_STCODE_MASK) | (I40E_GLGEN_MSCA_MDICMD_MASK) | (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); status = I40E_ERR_TIMEOUT; @@ -6114,6 +6325,78 @@ phy_write_end: return status; } +/** + * i40e_write_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Writes value to specified PHY register + **/ +enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 value) +{ + enum i40e_status_code status; + + switch (hw->device_id) { + case I40E_DEV_ID_1G_BASE_T_X722: + status = i40e_write_phy_register_clause22(hw, + reg, phy_addr, value); + break; + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_10G_BASE_T4: + case I40E_DEV_ID_10G_BASE_T_X722: + case I40E_DEV_ID_25G_B: + case I40E_DEV_ID_25G_SFP28: + status = i40e_write_phy_register_clause45(hw, + page, reg, phy_addr, value); + break; + default: + status = I40E_ERR_UNKNOWN_PHY; + break; + } + + return status; +} + +/** + * i40e_read_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Reads specified PHY register value + **/ +enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 *value) +{ + enum i40e_status_code status; + + switch (hw->device_id) { + case I40E_DEV_ID_1G_BASE_T_X722: + status = i40e_read_phy_register_clause22(hw, reg, phy_addr, + value); + break; + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_10G_BASE_T4: + case I40E_DEV_ID_10G_BASE_T_X722: + case I40E_DEV_ID_25G_B: + case I40E_DEV_ID_25G_SFP28: + status = i40e_read_phy_register_clause45(hw, page, reg, + phy_addr, value); + break; + default: + status = I40E_ERR_UNKNOWN_PHY; + break; + } + + return status; +} + /** * i40e_get_phy_address * @hw: pointer to the HW structure @@ -6156,14 +6439,16 @@ enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw, for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, led_addr++) { - status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_read_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + &led_reg); if (status) goto phy_blinking_end; led_ctl = led_reg; if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { led_reg = 0; - status = i40e_write_phy_register(hw, + status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, led_addr, phy_addr, led_reg); @@ -6175,20 +6460,18 @@ enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw, if (time > 0 && interval > 0) { for (i = 0; i < time * 1000; i += interval) { - status = i40e_read_phy_register(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, - &led_reg); + status = i40e_read_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); if (status) goto restore_config; if (led_reg & I40E_PHY_LED_MANUAL_ON) led_reg = 0; else led_reg = I40E_PHY_LED_MANUAL_ON; - status = i40e_write_phy_register(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, - led_reg); + status = i40e_write_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); if (status) goto restore_config; i40e_msec_delay(interval); @@ -6196,8 +6479,9 @@ enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw, } restore_config: - status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, - phy_addr, led_ctl); + status = i40e_write_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_ctl); phy_blinking_end: return status; @@ -6228,8 +6512,10 @@ enum i40e_status_code i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, temp_addr++) { - status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, - temp_addr, phy_addr, ®_val); + status = i40e_read_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + temp_addr, phy_addr, + ®_val); if (status) return status; *val = reg_val; @@ -6262,41 +6548,42 @@ enum i40e_status_code i40e_led_set_phy(struct i40e_hw *hw, bool on, i = rd32(hw, I40E_PFGEN_PORTNUM); port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); phy_addr = i40e_get_phy_address(hw, port_num); - - status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, - phy_addr, &led_reg); + status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); if (status) return status; led_ctl = led_reg; if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { led_reg = 0; - status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_reg); + status = i40e_write_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + led_reg); if (status) return status; } - status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); if (status) goto restore_config; if (on) led_reg = I40E_PHY_LED_MANUAL_ON; else led_reg = 0; - status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_reg); + status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); if (status) goto restore_config; if (mode & I40E_PHY_LED_MODE_ORIG) { led_ctl = (mode & I40E_PHY_LED_MODE_MASK); - status = i40e_write_phy_register(hw, + status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, led_addr, phy_addr, led_ctl); } return status; restore_config: - status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, - phy_addr, led_ctl); + status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_ctl); return status; } #endif /* PF_DRIVER */ @@ -6522,7 +6809,6 @@ enum i40e_status_code i40e_vf_reset(struct i40e_hw *hw) I40E_SUCCESS, NULL, 0, NULL); } #endif /* VF_DRIVER */ -#ifdef X722_SUPPORT /** * i40e_aq_set_arp_proxy_config @@ -6545,10 +6831,13 @@ enum i40e_status_code i40e_aq_set_arp_proxy_config(struct i40e_hw *hw, i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_proxy_config); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD); desc.params.external.addr_high = CPU_TO_LE32(I40E_HI_DWORD((u64)proxy_config)); desc.params.external.addr_low = CPU_TO_LE32(I40E_LO_DWORD((u64)proxy_config)); + desc.datalen = CPU_TO_LE16(sizeof(struct i40e_aqc_arp_proxy_data)); status = i40e_asq_send_command(hw, &desc, proxy_config, sizeof(struct i40e_aqc_arp_proxy_data), @@ -6579,10 +6868,13 @@ enum i40e_status_code i40e_aq_set_ns_proxy_table_entry(struct i40e_hw *hw, i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_ns_proxy_table_entry); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD); desc.params.external.addr_high = CPU_TO_LE32(I40E_HI_DWORD((u64)ns_proxy_table_entry)); desc.params.external.addr_low = CPU_TO_LE32(I40E_LO_DWORD((u64)ns_proxy_table_entry)); + desc.datalen = CPU_TO_LE16(sizeof(struct i40e_aqc_ns_proxy_data)); status = i40e_asq_send_command(hw, &desc, ns_proxy_table_entry, sizeof(struct i40e_aqc_ns_proxy_data), @@ -6629,9 +6921,11 @@ enum i40e_status_code i40e_aq_set_clear_wol_filter(struct i40e_hw *hw, if (set_filter) { if (!filter) return I40E_ERR_PARAM; + cmd_flags |= I40E_AQC_SET_WOL_FILTER; - buff_len = sizeof(*filter); + cmd_flags |= I40E_AQC_SET_WOL_FILTER_WOL_PRESERVE_ON_PFR; } + if (no_wol_tco) cmd_flags |= I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL; cmd->cmd_flags = CPU_TO_LE16(cmd_flags); @@ -6642,6 +6936,12 @@ enum i40e_status_code i40e_aq_set_clear_wol_filter(struct i40e_hw *hw, valid_flags |= I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID; cmd->valid_flags = CPU_TO_LE16(valid_flags); + buff_len = sizeof(*filter); + desc.datalen = CPU_TO_LE16(buff_len); + + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD); + cmd->address_high = CPU_TO_LE32(I40E_HI_DWORD((u64)filter)); cmd->address_low = CPU_TO_LE32(I40E_LO_DWORD((u64)filter)); @@ -6678,4 +6978,23 @@ enum i40e_status_code i40e_aq_get_wake_event_reason(struct i40e_hw *hw, return status; } -#endif /* X722_SUPPORT */ +/** +* i40e_aq_clear_all_wol_filters +* @hw: pointer to the hw struct +* @cmd_details: pointer to command details structure or NULL +* +* Get information for the reason of a Wake Up event +**/ +enum i40e_status_code i40e_aq_clear_all_wol_filters(struct i40e_hw *hw, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + enum i40e_status_code status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_clear_all_wol_filters); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} \ No newline at end of file diff --git a/src/dpdk/drivers/net/i40e/base/i40e_devids.h b/src/dpdk/drivers/net/i40e/base/i40e_devids.h index ed73e1d2..4546689a 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_devids.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_devids.h @@ -55,7 +55,6 @@ POSSIBILITY OF SUCH DAMAGE. #define I40E_DEV_ID_VF 0x154C #define I40E_DEV_ID_VF_HV 0x1571 #endif /* VF_DRIVER */ -#ifdef X722_SUPPORT #ifdef X722_A0_SUPPORT #define I40E_DEV_ID_X722_A0 0x374C #if defined(INTEGRATED_VF) || defined(VF_DRIVER) @@ -68,12 +67,9 @@ POSSIBILITY OF SUCH DAMAGE. #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 #define I40E_DEV_ID_SFP_I_X722 0x37D3 -#define I40E_DEV_ID_QSFP_I_X722 0x37D4 #if defined(INTEGRATED_VF) || defined(VF_DRIVER) || defined(I40E_NDIS_SUPPORT) #define I40E_DEV_ID_X722_VF 0x37CD -#define I40E_DEV_ID_X722_VF_HV 0x37D9 #endif /* VF_DRIVER */ -#endif /* X722_SUPPORT */ #define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \ (d) == I40E_DEV_ID_QSFP_B || \ diff --git a/src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c b/src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c index 22606484..f03f3813 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c +++ b/src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c @@ -1239,11 +1239,6 @@ enum i40e_status_code i40e_hmc_get_object_va(struct i40e_hw *hw, u64 obj_offset_in_fpm; u32 sd_idx, sd_lmt; - if (NULL == hmc_info) { - ret_code = I40E_ERR_BAD_PTR; - DEBUGOUT("i40e_hmc_get_object_va: bad hmc_info ptr\n"); - goto exit; - } if (NULL == hmc_info->hmc_obj) { ret_code = I40E_ERR_BAD_PTR; DEBUGOUT("i40e_hmc_get_object_va: bad hmc_info->hmc_obj ptr\n"); diff --git a/src/dpdk/drivers/net/i40e/base/i40e_nvm.c b/src/dpdk/drivers/net/i40e/base/i40e_nvm.c index 4fa1220b..e8965024 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_nvm.c +++ b/src/dpdk/drivers/net/i40e/base/i40e_nvm.c @@ -219,19 +219,15 @@ enum i40e_status_code i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, { enum i40e_status_code ret_code = I40E_SUCCESS; -#ifdef X722_SUPPORT - if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) { - ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); - if (!ret_code) { + ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); + if (!ret_code) { + if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) { ret_code = i40e_read_nvm_word_aq(hw, offset, data); - i40e_release_nvm(hw); + } else { + ret_code = i40e_read_nvm_word_srctl(hw, offset, data); } - } else { - ret_code = i40e_read_nvm_word_srctl(hw, offset, data); + i40e_release_nvm(hw); } -#else - ret_code = i40e_read_nvm_word_srctl(hw, offset, data); -#endif return ret_code; } @@ -249,14 +245,10 @@ enum i40e_status_code __i40e_read_nvm_word(struct i40e_hw *hw, { enum i40e_status_code ret_code = I40E_SUCCESS; -#ifdef X722_SUPPORT if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) ret_code = i40e_read_nvm_word_aq(hw, offset, data); else ret_code = i40e_read_nvm_word_srctl(hw, offset, data); -#else - ret_code = i40e_read_nvm_word_srctl(hw, offset, data); -#endif return ret_code; } @@ -348,14 +340,10 @@ enum i40e_status_code __i40e_read_nvm_buffer(struct i40e_hw *hw, { enum i40e_status_code ret_code = I40E_SUCCESS; -#ifdef X722_SUPPORT if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, data); else ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); -#else - ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); -#endif return ret_code; } @@ -375,7 +363,6 @@ enum i40e_status_code i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, { enum i40e_status_code ret_code = I40E_SUCCESS; -#ifdef X722_SUPPORT if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) { ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); if (!ret_code) { @@ -386,9 +373,6 @@ enum i40e_status_code i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, } else { ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); } -#else - ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); -#endif return ret_code; } @@ -901,9 +885,20 @@ enum i40e_status_code i40e_nvmupd_command(struct i40e_hw *hw, *((u16 *)&bytes[2]) = hw->nvm_wait_opcode; } + /* Clear error status on read */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + return I40E_SUCCESS; } + /* Clear status even it is not read and log */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) { + i40e_debug(hw, I40E_DEBUG_NVM, + "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n"); + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + } + switch (hw->nvmupd_state) { case I40E_NVMUPD_STATE_INIT: status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno); @@ -1253,6 +1248,7 @@ retry: void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode) { if (opcode == hw->nvm_wait_opcode) { + i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: clearing wait on opcode 0x%04x\n", opcode); if (hw->nvm_release_on_done) { @@ -1261,6 +1257,11 @@ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode) } hw->nvm_wait_opcode = 0; + if (hw->aq.arq_last_status) { + hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR; + return; + } + switch (hw->nvmupd_state) { case I40E_NVMUPD_STATE_INIT_WAIT: hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; @@ -1423,7 +1424,8 @@ STATIC enum i40e_status_code i40e_nvmupd_exec_aq(struct i40e_hw *hw, if (hw->nvm_buff.va) { buff = hw->nvm_buff.va; - memcpy(buff, &bytes[aq_desc_len], aq_data_len); + i40e_memcpy(buff, &bytes[aq_desc_len], aq_data_len, + I40E_NONDMA_TO_NONDMA); } } @@ -1496,7 +1498,7 @@ STATIC enum i40e_status_code i40e_nvmupd_get_aq_result(struct i40e_hw *hw, __func__, cmd->offset, cmd->offset + len); buff = ((u8 *)&hw->nvm_wb_desc) + cmd->offset; - memcpy(bytes, buff, len); + i40e_memcpy(bytes, buff, len, I40E_NONDMA_TO_NONDMA); bytes += len; remainder -= len; @@ -1510,7 +1512,7 @@ STATIC enum i40e_status_code i40e_nvmupd_get_aq_result(struct i40e_hw *hw, i40e_debug(hw, I40E_DEBUG_NVM, "%s: databuf bytes %d to %d\n", __func__, start_byte, start_byte + remainder); - memcpy(bytes, buff, remainder); + i40e_memcpy(bytes, buff, remainder, I40E_NONDMA_TO_NONDMA); } return I40E_SUCCESS; diff --git a/src/dpdk/drivers/net/i40e/base/i40e_osdep.h b/src/dpdk/drivers/net/i40e/base/i40e_osdep.h index 38e7ba5b..c57ecded 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_osdep.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_osdep.h @@ -44,6 +44,7 @@ #include #include #include +#include #include "../i40e_logs.h" @@ -153,15 +154,18 @@ do { \ * I40E_PRTQF_FD_MSK */ -#define I40E_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define I40E_PCI_REG(reg) rte_read32(reg) #define I40E_PCI_REG_ADDR(a, reg) \ ((volatile uint32_t *)((char *)(a)->hw_addr + (reg))) static inline uint32_t i40e_read_addr(volatile void *addr) { return rte_le_to_cpu_32(I40E_PCI_REG(addr)); } -#define I40E_PCI_REG_WRITE(reg, value) \ - do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0) + +#define I40E_PCI_REG_WRITE(reg, value) \ + rte_write32((rte_cpu_to_le_32(value)), reg) +#define I40E_PCI_REG_WRITE_RELAXED(reg, value) \ + rte_write32_relaxed((rte_cpu_to_le_32(value)), reg) #define I40E_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_GLGEN_STAT) #define I40EVF_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_VFGEN_RSTAT) diff --git a/src/dpdk/drivers/net/i40e/base/i40e_prototype.h b/src/dpdk/drivers/net/i40e/base/i40e_prototype.h index 03dda937..109d3c56 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_prototype.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_prototype.h @@ -78,7 +78,6 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void i40e_idle_aq(struct i40e_hw *hw); bool i40e_check_asq_alive(struct i40e_hw *hw); enum i40e_status_code i40e_aq_queue_shutdown(struct i40e_hw *hw, bool unloading); -#ifdef X722_SUPPORT enum i40e_status_code i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 seid, bool pf_lut, u8 *lut, u16 lut_size); @@ -90,11 +89,8 @@ enum i40e_status_code i40e_aq_get_rss_key(struct i40e_hw *hw, enum i40e_status_code i40e_aq_set_rss_key(struct i40e_hw *hw, u16 seid, struct i40e_aqc_get_set_rss_key_data *key); -#endif -#ifndef I40E_NDIS_SUPPORT const char *i40e_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err); const char *i40e_stat_str(struct i40e_hw *hw, enum i40e_status_code stat_err); -#endif /* I40E_NDIS_SUPPORT */ #ifdef PF_DRIVER @@ -124,6 +120,8 @@ enum i40e_status_code i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags, struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_clear_default_vsi(struct i40e_hw *hw, u16 vsi_id, + struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw, bool qualified_modules, bool report_init, struct i40e_aq_get_phy_abilities_resp *abilities, @@ -170,12 +168,18 @@ enum i40e_status_code i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, bool rx_only_promisc); enum i40e_status_code i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_set_vsi_full_promiscuous(struct i40e_hw *hw, + u16 seid, bool set, + struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw, u16 seid, bool enable, u16 vid, struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, u16 seid, bool enable, u16 vid, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw, + u16 seid, bool enable, u16 vid, + struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, u16 seid, bool enable, struct i40e_asq_cmd_details *cmd_details); @@ -438,6 +442,7 @@ enum i40e_status_code i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr); enum i40e_status_code i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num, u32 pba_num_size); void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable); +enum i40e_status_code i40e_get_san_mac_addr(struct i40e_hw *hw, u8 *mac_addr); enum i40e_aq_link_speed i40e_get_link_speed(struct i40e_hw *hw); /* prototype for functions used for NVM access */ enum i40e_status_code i40e_init_nvm(struct i40e_hw *hw); @@ -518,7 +523,6 @@ enum i40e_status_code i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, u32 reg_addr, u32 reg_val, struct i40e_asq_cmd_details *cmd_details); void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); -#ifdef X722_SUPPORT enum i40e_status_code i40e_aq_set_arp_proxy_config(struct i40e_hw *hw, struct i40e_aqc_arp_proxy_data *proxy_config, struct i40e_asq_cmd_details *cmd_details); @@ -534,11 +538,20 @@ enum i40e_status_code i40e_aq_set_clear_wol_filter(struct i40e_hw *hw, enum i40e_status_code i40e_aq_get_wake_event_reason(struct i40e_hw *hw, u16 *wake_reason, struct i40e_asq_cmd_details *cmd_details); -#endif -enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, u8 page, - u16 reg, u8 phy_addr, u16 *value); -enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, u8 page, - u16 reg, u8 phy_addr, u16 value); +enum i40e_status_code i40e_aq_clear_all_wol_filters(struct i40e_hw *hw, + struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_read_phy_register_clause22(struct i40e_hw *hw, + u16 reg, u8 phy_addr, u16 *value); +enum i40e_status_code i40e_write_phy_register_clause22(struct i40e_hw *hw, + u16 reg, u8 phy_addr, u16 value); +enum i40e_status_code i40e_read_phy_register_clause45(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 *value); +enum i40e_status_code i40e_write_phy_register_clause45(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 value); +enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 *value); +enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 value); u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw, u32 time, u32 interval); diff --git a/src/dpdk/drivers/net/i40e/base/i40e_register.h b/src/dpdk/drivers/net/i40e/base/i40e_register.h index fd0a7230..3a305b67 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_register.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_register.h @@ -3401,7 +3401,6 @@ POSSIBILITY OF SUCH DAMAGE. #define I40E_VFQF_HREGION_OVERRIDE_ENA_7_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_7_SHIFT) #define I40E_VFQF_HREGION_REGION_7_SHIFT 29 #define I40E_VFQF_HREGION_REGION_7_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_7_SHIFT) -#ifdef X722_SUPPORT #ifdef PF_DRIVER #define I40E_MNGSB_FDCRC 0x000B7050 /* Reset: POR */ @@ -5366,5 +5365,4 @@ POSSIBILITY OF SUCH DAMAGE. #define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20 #define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK I40E_MASK(0xFFF, I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT) -#endif /* X722_SUPPORT */ #endif /* _I40E_REGISTER_H_ */ diff --git a/src/dpdk/drivers/net/i40e/base/i40e_type.h b/src/dpdk/drivers/net/i40e/base/i40e_type.h index 5349419f..590d97c7 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_type.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_type.h @@ -157,13 +157,22 @@ enum i40e_debug_mask { #define I40E_PCI_LINK_SPEED_5000 0x2 #define I40E_PCI_LINK_SPEED_8000 0x3 -#define I40E_MDIO_STCODE 0 -#define I40E_MDIO_OPCODE_ADDRESS 0 -#define I40E_MDIO_OPCODE_WRITE I40E_MASK(1, \ +#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_MASK(1, \ + I40E_GLGEN_MSCA_STCODE_SHIFT) +#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_MASK(1, \ I40E_GLGEN_MSCA_OPCODE_SHIFT) -#define I40E_MDIO_OPCODE_READ_INC_ADDR I40E_MASK(2, \ +#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_MASK(2, \ I40E_GLGEN_MSCA_OPCODE_SHIFT) -#define I40E_MDIO_OPCODE_READ I40E_MASK(3, \ + +#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_MASK(0, \ + I40E_GLGEN_MSCA_STCODE_SHIFT) +#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_MASK(0, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_MASK(1, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_CLAUSE45_OPCODE_READ_INC_ADDR_MASK I40E_MASK(2, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_MASK(3, \ I40E_GLGEN_MSCA_OPCODE_SHIFT) #define I40E_PHY_COM_REG_PAGE 0x1E @@ -187,9 +196,7 @@ enum i40e_memcpy_type { I40E_DMA_TO_NONDMA }; -#ifdef X722_SUPPORT #define I40E_FW_API_VERSION_MINOR_X722 0x0005 -#endif #define I40E_FW_API_VERSION_MINOR_X710 0x0005 @@ -203,13 +210,10 @@ enum i40e_memcpy_type { */ enum i40e_mac_type { I40E_MAC_UNKNOWN = 0, - I40E_MAC_X710, I40E_MAC_XL710, I40E_MAC_VF, -#ifdef X722_SUPPORT I40E_MAC_X722, I40E_MAC_X722_VF, -#endif I40E_MAC_GENERIC, }; @@ -264,6 +268,7 @@ struct i40e_link_status { enum i40e_aq_link_speed link_speed; u8 link_info; u8 an_info; + u8 fec_info; u8 ext_info; u8 loopback; /* is Link Status Event notification to SW enabled */ @@ -292,61 +297,73 @@ struct i40e_link_status { #define I40E_MODULE_TYPE_1000BASE_T 0x08 }; -enum i40e_aq_capabilities_phy_type { - I40E_CAP_PHY_TYPE_SGMII = BIT(I40E_PHY_TYPE_SGMII), - I40E_CAP_PHY_TYPE_1000BASE_KX = BIT(I40E_PHY_TYPE_1000BASE_KX), - I40E_CAP_PHY_TYPE_10GBASE_KX4 = BIT(I40E_PHY_TYPE_10GBASE_KX4), - I40E_CAP_PHY_TYPE_10GBASE_KR = BIT(I40E_PHY_TYPE_10GBASE_KR), - I40E_CAP_PHY_TYPE_40GBASE_KR4 = BIT(I40E_PHY_TYPE_40GBASE_KR4), - I40E_CAP_PHY_TYPE_XAUI = BIT(I40E_PHY_TYPE_XAUI), - I40E_CAP_PHY_TYPE_XFI = BIT(I40E_PHY_TYPE_XFI), - I40E_CAP_PHY_TYPE_SFI = BIT(I40E_PHY_TYPE_SFI), - I40E_CAP_PHY_TYPE_XLAUI = BIT(I40E_PHY_TYPE_XLAUI), - I40E_CAP_PHY_TYPE_XLPPI = BIT(I40E_PHY_TYPE_XLPPI), - I40E_CAP_PHY_TYPE_40GBASE_CR4_CU = BIT(I40E_PHY_TYPE_40GBASE_CR4_CU), - I40E_CAP_PHY_TYPE_10GBASE_CR1_CU = BIT(I40E_PHY_TYPE_10GBASE_CR1_CU), - I40E_CAP_PHY_TYPE_10GBASE_AOC = BIT(I40E_PHY_TYPE_10GBASE_AOC), - I40E_CAP_PHY_TYPE_40GBASE_AOC = BIT(I40E_PHY_TYPE_40GBASE_AOC), - I40E_CAP_PHY_TYPE_100BASE_TX = BIT(I40E_PHY_TYPE_100BASE_TX), - I40E_CAP_PHY_TYPE_1000BASE_T = BIT(I40E_PHY_TYPE_1000BASE_T), - I40E_CAP_PHY_TYPE_10GBASE_T = BIT(I40E_PHY_TYPE_10GBASE_T), - I40E_CAP_PHY_TYPE_10GBASE_SR = BIT(I40E_PHY_TYPE_10GBASE_SR), - I40E_CAP_PHY_TYPE_10GBASE_LR = BIT(I40E_PHY_TYPE_10GBASE_LR), - I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU = BIT(I40E_PHY_TYPE_10GBASE_SFPP_CU), - I40E_CAP_PHY_TYPE_10GBASE_CR1 = BIT(I40E_PHY_TYPE_10GBASE_CR1), - I40E_CAP_PHY_TYPE_40GBASE_CR4 = BIT(I40E_PHY_TYPE_40GBASE_CR4), - I40E_CAP_PHY_TYPE_40GBASE_SR4 = BIT(I40E_PHY_TYPE_40GBASE_SR4), - I40E_CAP_PHY_TYPE_40GBASE_LR4 = BIT(I40E_PHY_TYPE_40GBASE_LR4), - I40E_CAP_PHY_TYPE_1000BASE_SX = BIT(I40E_PHY_TYPE_1000BASE_SX), - I40E_CAP_PHY_TYPE_1000BASE_LX = BIT(I40E_PHY_TYPE_1000BASE_LX), - I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL = BIT(I40E_PHY_TYPE_1000BASE_T_OPTICAL), - I40E_CAP_PHY_TYPE_20GBASE_KR2 = BIT(I40E_PHY_TYPE_20GBASE_KR2) -}; - struct i40e_phy_info { struct i40e_link_status link_info; struct i40e_link_status link_info_old; bool get_link_info; enum i40e_media_type media_type; /* all the phy types the NVM is capable of */ - u32 phy_types; -}; - + u64 phy_types; +}; + +#define I40E_CAP_PHY_TYPE_SGMII BIT_ULL(I40E_PHY_TYPE_SGMII) +#define I40E_CAP_PHY_TYPE_1000BASE_KX BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) +#define I40E_CAP_PHY_TYPE_10GBASE_KX4 BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) +#define I40E_CAP_PHY_TYPE_10GBASE_KR BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) +#define I40E_CAP_PHY_TYPE_40GBASE_KR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) +#define I40E_CAP_PHY_TYPE_XAUI BIT_ULL(I40E_PHY_TYPE_XAUI) +#define I40E_CAP_PHY_TYPE_XFI BIT_ULL(I40E_PHY_TYPE_XFI) +#define I40E_CAP_PHY_TYPE_SFI BIT_ULL(I40E_PHY_TYPE_SFI) +#define I40E_CAP_PHY_TYPE_XLAUI BIT_ULL(I40E_PHY_TYPE_XLAUI) +#define I40E_CAP_PHY_TYPE_XLPPI BIT_ULL(I40E_PHY_TYPE_XLPPI) +#define I40E_CAP_PHY_TYPE_40GBASE_CR4_CU BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) +#define I40E_CAP_PHY_TYPE_10GBASE_CR1_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) +#define I40E_CAP_PHY_TYPE_10GBASE_AOC BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) +#define I40E_CAP_PHY_TYPE_40GBASE_AOC BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) +#define I40E_CAP_PHY_TYPE_100BASE_TX BIT_ULL(I40E_PHY_TYPE_100BASE_TX) +#define I40E_CAP_PHY_TYPE_1000BASE_T BIT_ULL(I40E_PHY_TYPE_1000BASE_T) +#define I40E_CAP_PHY_TYPE_10GBASE_T BIT_ULL(I40E_PHY_TYPE_10GBASE_T) +#define I40E_CAP_PHY_TYPE_10GBASE_SR BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) +#define I40E_CAP_PHY_TYPE_10GBASE_LR BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) +#define I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) +#define I40E_CAP_PHY_TYPE_10GBASE_CR1 BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) +#define I40E_CAP_PHY_TYPE_40GBASE_CR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) +#define I40E_CAP_PHY_TYPE_40GBASE_SR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) +#define I40E_CAP_PHY_TYPE_40GBASE_LR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) +#define I40E_CAP_PHY_TYPE_1000BASE_SX BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) +#define I40E_CAP_PHY_TYPE_1000BASE_LX BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) +#define I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) +#define I40E_CAP_PHY_TYPE_20GBASE_KR2 BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) +/* + * Defining the macro I40E_TYPE_OFFSET to implement a bit shift for some + * PHY types. There is an unused bit (31) in the I40E_CAP_PHY_TYPE_* bit + * fields but no corresponding gap in the i40e_aq_phy_type enumeration. So, + * a shift is needed to adjust for this with values larger than 31. The + * only affected values are I40E_PHY_TYPE_25GBASE_*. + */ +#define I40E_PHY_TYPE_OFFSET 1 +#define I40E_CAP_PHY_TYPE_25GBASE_KR BIT_ULL(I40E_PHY_TYPE_25GBASE_KR + \ + I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_CR BIT_ULL(I40E_PHY_TYPE_25GBASE_CR + \ + I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_SR BIT_ULL(I40E_PHY_TYPE_25GBASE_SR + \ + I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \ + I40E_PHY_TYPE_OFFSET) #define I40E_HW_CAP_MAX_GPIO 30 #define I40E_HW_CAP_MDIO_PORT_MODE_MDIO 0 #define I40E_HW_CAP_MDIO_PORT_MODE_I2C 1 -#ifdef X722_SUPPORT enum i40e_acpi_programming_method { I40E_ACPI_PROGRAMMING_METHOD_HW_FVL = 0, I40E_ACPI_PROGRAMMING_METHOD_AQC_FPK = 1 }; -#define I40E_WOL_SUPPORT_MASK 1 -#define I40E_ACPI_PROGRAMMING_METHOD_MASK (1 << 1) -#define I40E_PROXY_SUPPORT_MASK (1 << 2) +#define I40E_WOL_SUPPORT_MASK 0x1 +#define I40E_ACPI_PROGRAMMING_METHOD_MASK 0x2 +#define I40E_PROXY_SUPPORT_MASK 0x4 -#endif /* Capabilities of a PF or a VF or the whole device */ struct i40e_hw_capabilities { u32 switch_mode; @@ -355,6 +372,10 @@ struct i40e_hw_capabilities { #define I40E_NVM_IMAGE_TYPE_UDP_CLOUD 0x3 u32 management_mode; + u32 mng_protocols_over_mctp; +#define I40E_MNG_PROTOCOL_PLDM 0x2 +#define I40E_MNG_PROTOCOL_OEM_COMMANDS 0x4 +#define I40E_MNG_PROTOCOL_NCSI 0x8 u32 npar_enable; u32 os2bmc; u32 valid_functions; @@ -410,11 +431,9 @@ struct i40e_hw_capabilities { u32 enabled_tcmap; u32 maxtc; u64 wr_csr_prot; -#ifdef X722_SUPPORT bool apm_wol_support; enum i40e_acpi_programming_method acpi_prog_method; bool proxy_support; -#endif }; struct i40e_mac_info { @@ -472,6 +491,7 @@ enum i40e_nvmupd_state { I40E_NVMUPD_STATE_WRITING, I40E_NVMUPD_STATE_INIT_WAIT, I40E_NVMUPD_STATE_WRITE_WAIT, + I40E_NVMUPD_STATE_ERROR }; /* nvm_access definition and its masks/shifts need to be accessible to @@ -550,6 +570,7 @@ struct i40e_bus_info { u16 func; u16 device; u16 lan_id; + u16 bus_id; }; /* Flow control (FC) parameters */ @@ -674,30 +695,22 @@ struct i40e_hw { struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */ struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ -#ifdef X722_SUPPORT /* WoL and proxy support */ u16 num_wol_proxy_filters; u16 wol_proxy_vsi_seid; -#endif #define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0) u64 flags; /* debug mask */ u32 debug_mask; -#ifndef I40E_NDIS_SUPPORT char err_str[16]; -#endif /* I40E_NDIS_SUPPORT */ }; STATIC INLINE bool i40e_is_vf(struct i40e_hw *hw) { -#ifdef X722_SUPPORT return (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF); -#else - return hw->mac.type == I40E_MAC_VF; -#endif } struct i40e_driver_version { @@ -801,11 +814,7 @@ enum i40e_rx_desc_status_bits { I40E_RX_DESC_STATUS_CRCP_SHIFT = 4, I40E_RX_DESC_STATUS_TSYNINDX_SHIFT = 5, /* 2 BITS */ I40E_RX_DESC_STATUS_TSYNVALID_SHIFT = 7, -#ifdef X722_SUPPORT I40E_RX_DESC_STATUS_EXT_UDP_0_SHIFT = 8, -#else - I40E_RX_DESC_STATUS_RESERVED1_SHIFT = 8, -#endif I40E_RX_DESC_STATUS_UMBCAST_SHIFT = 9, /* 2 BITS */ I40E_RX_DESC_STATUS_FLM_SHIFT = 11, @@ -813,11 +822,7 @@ enum i40e_rx_desc_status_bits { I40E_RX_DESC_STATUS_LPBK_SHIFT = 14, I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT = 15, I40E_RX_DESC_STATUS_RESERVED2_SHIFT = 16, /* 2 BITS */ -#ifdef X722_SUPPORT I40E_RX_DESC_STATUS_INT_UDP_0_SHIFT = 18, -#else - I40E_RX_DESC_STATUS_UDP_0_SHIFT = 18, -#endif I40E_RX_DESC_STATUS_LAST /* this entry must be last!!! */ }; @@ -1195,10 +1200,8 @@ enum i40e_tx_ctx_desc_eipt_offload { #define I40E_TXD_CTX_QW0_DECTTL_MASK (0xFULL << \ I40E_TXD_CTX_QW0_DECTTL_SHIFT) -#ifdef X722_SUPPORT #define I40E_TXD_CTX_QW0_L4T_CS_SHIFT 23 #define I40E_TXD_CTX_QW0_L4T_CS_MASK BIT_ULL(I40E_TXD_CTX_QW0_L4T_CS_SHIFT) -#endif struct i40e_nop_desc { __le64 rsvd; __le64 dtype_cmd; @@ -1235,38 +1238,24 @@ struct i40e_filter_program_desc { /* Packet Classifier Types for filters */ enum i40e_filter_pctype { -#ifdef X722_SUPPORT /* Note: Values 0-28 are reserved for future use. * Value 29, 30, 32 are not supported on XL710 and X710. */ I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP = 29, I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP = 30, -#else - /* Note: Values 0-30 are reserved for future use */ -#endif I40E_FILTER_PCTYPE_NONF_IPV4_UDP = 31, -#ifdef X722_SUPPORT I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK = 32, -#else - /* Note: Value 32 is reserved for future use */ -#endif I40E_FILTER_PCTYPE_NONF_IPV4_TCP = 33, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP = 34, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER = 35, I40E_FILTER_PCTYPE_FRAG_IPV4 = 36, -#ifdef X722_SUPPORT /* Note: Values 37-38 are reserved for future use. * Value 39, 40, 42 are not supported on XL710 and X710. */ I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP = 39, I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP = 40, -#else - /* Note: Values 37-40 are reserved for future use */ -#endif I40E_FILTER_PCTYPE_NONF_IPV6_UDP = 41, -#ifdef X722_SUPPORT I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK = 42, -#endif I40E_FILTER_PCTYPE_NONF_IPV6_TCP = 43, I40E_FILTER_PCTYPE_NONF_IPV6_SCTP = 44, I40E_FILTER_PCTYPE_NONF_IPV6_OTHER = 45, @@ -1321,12 +1310,10 @@ enum i40e_filter_program_desc_pcmd { I40E_TXD_FLTR_QW1_CMD_SHIFT) #define I40E_TXD_FLTR_QW1_FD_STATUS_MASK (0x3ULL << \ I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) -#ifdef X722_SUPPORT #define I40E_TXD_FLTR_QW1_ATR_SHIFT (0xEULL + \ I40E_TXD_FLTR_QW1_CMD_SHIFT) #define I40E_TXD_FLTR_QW1_ATR_MASK BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT) -#endif #define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20 #define I40E_TXD_FLTR_QW1_CNTINDEX_MASK (0x1FFUL << \ @@ -1388,6 +1375,23 @@ struct i40e_veb_tc_stats { u64 tc_tx_bytes[I40E_MAX_TRAFFIC_CLASS]; }; +/* Statistics collected per function for FCoE */ +struct i40e_fcoe_stats { + u64 rx_fcoe_packets; /* fcoeprc */ + u64 rx_fcoe_dwords; /* focedwrc */ + u64 rx_fcoe_dropped; /* fcoerpdc */ + u64 tx_fcoe_packets; /* fcoeptc */ + u64 tx_fcoe_dwords; /* focedwtc */ + u64 fcoe_bad_fccrc; /* fcoecrc */ + u64 fcoe_last_error; /* fcoelast */ + u64 fcoe_ddp_count; /* fcoeddpc */ +}; + +/* offset to per function FCoE statistics block */ +#define I40E_FCOE_VF_STAT_OFFSET 0 +#define I40E_FCOE_PF_STAT_OFFSET 128 +#define I40E_FCOE_STAT_MAX (I40E_FCOE_PF_STAT_OFFSET + I40E_MAX_PF) + /* Statistics collected by the MAC */ struct i40e_hw_port_stats { /* eth stats collected by the port */ @@ -1481,6 +1485,7 @@ struct i40e_hw_port_stats { #define I40E_SR_EMPR_REGS_AUTO_LOAD_PTR 0x3A #define I40E_SR_GLOBR_REGS_AUTO_LOAD_PTR 0x3B #define I40E_SR_CORER_REGS_AUTO_LOAD_PTR 0x3C +#define I40E_SR_PHY_ACTIVITY_LIST_PTR 0x3D #define I40E_SR_PCIE_ALT_AUTO_LOAD_PTR 0x3E #define I40E_SR_SW_CHECKSUM_WORD 0x3F #define I40E_SR_1ST_FREE_PROVISION_AREA_PTR 0x40 @@ -1509,6 +1514,208 @@ struct i40e_hw_port_stats { #define I40E_SRRD_SRCTL_ATTEMPTS 100000 +/* FCoE Tx context descriptor - Use the i40e_tx_context_desc struct */ + +enum i40E_fcoe_tx_ctx_desc_cmd_bits { + I40E_FCOE_TX_CTX_DESC_OPCODE_SINGLE_SEND = 0x00, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS2 = 0x01, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS3 = 0x05, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_ETSO_FC_CLASS2 = 0x02, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_ETSO_FC_CLASS3 = 0x06, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_FC_CLASS2 = 0x03, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_FC_CLASS3 = 0x07, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_DDP_CTX_INVL = 0x08, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_CTX_INVL = 0x09, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_RELOFF = 0x10, + I40E_FCOE_TX_CTX_DESC_CLRSEQ = 0x20, + I40E_FCOE_TX_CTX_DESC_DIFENA = 0x40, + I40E_FCOE_TX_CTX_DESC_IL2TAG2 = 0x80 +}; + +/* FCoE DIF/DIX Context descriptor */ +struct i40e_fcoe_difdix_context_desc { + __le64 flags_buff0_buff1_ref; + __le64 difapp_msk_bias; +}; + +#define I40E_FCOE_DIFDIX_CTX_QW0_FLAGS_SHIFT 0 +#define I40E_FCOE_DIFDIX_CTX_QW0_FLAGS_MASK (0xFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW0_FLAGS_SHIFT) + +enum i40e_fcoe_difdix_ctx_desc_flags_bits { + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_RSVD = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_APPTYPE_TAGCHK = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_APPTYPE_TAGNOTCHK = 0x0004, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_OPAQUE = 0x0000, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_CHKINTEGRITY = 0x0008, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_CHKINTEGRITY_APPTAG = 0x0010, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_CHKINTEGRITY_APPREFTAG = 0x0018, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_CNST = 0x0000, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_INC1BLK = 0x0020, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_APPTAG = 0x0040, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_RSVD = 0x0060, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIXMODE_XSUM = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIXMODE_CRC = 0x0080, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_UNTAG = 0x0000, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_BUF = 0x0100, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_RSVD = 0x0200, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_EMBDTAGS = 0x0300, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFLAN_UNTAG = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFLAN_TAG = 0x0400, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFBLK_512B = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFBLK_4K = 0x0800 +}; + +#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF0_SHIFT 12 +#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF0_MASK (0x3FFULL << \ + I40E_FCOE_DIFDIX_CTX_QW0_BUFF0_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF1_SHIFT 22 +#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF1_MASK (0x3FFULL << \ + I40E_FCOE_DIFDIX_CTX_QW0_BUFF1_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW0_REF_SHIFT 32 +#define I40E_FCOE_DIFDIX_CTX_QW0_REF_MASK (0xFFFFFFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW0_REF_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW1_APP_SHIFT 0 +#define I40E_FCOE_DIFDIX_CTX_QW1_APP_MASK (0xFFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW1_APP_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW1_APP_MSK_SHIFT 16 +#define I40E_FCOE_DIFDIX_CTX_QW1_APP_MSK_MASK (0xFFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW1_APP_MSK_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW1_REF_BIAS_SHIFT 32 +#define I40E_FCOE_DIFDIX_CTX_QW0_REF_BIAS_MASK (0xFFFFFFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW1_REF_BIAS_SHIFT) + +/* FCoE DIF/DIX Buffers descriptor */ +struct i40e_fcoe_difdix_buffers_desc { + __le64 buff_addr0; + __le64 buff_addr1; +}; + +/* FCoE DDP Context descriptor */ +struct i40e_fcoe_ddp_context_desc { + __le64 rsvd; + __le64 type_cmd_foff_lsize; +}; + +#define I40E_FCOE_DDP_CTX_QW1_DTYPE_SHIFT 0 +#define I40E_FCOE_DDP_CTX_QW1_DTYPE_MASK (0xFULL << \ + I40E_FCOE_DDP_CTX_QW1_DTYPE_SHIFT) + +#define I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT 4 +#define I40E_FCOE_DDP_CTX_QW1_CMD_MASK (0xFULL << \ + I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT) + +enum i40e_fcoe_ddp_ctx_desc_cmd_bits { + I40E_FCOE_DDP_CTX_DESC_BSIZE_512B = 0x00, /* 2 BITS */ + I40E_FCOE_DDP_CTX_DESC_BSIZE_4K = 0x01, /* 2 BITS */ + I40E_FCOE_DDP_CTX_DESC_BSIZE_8K = 0x02, /* 2 BITS */ + I40E_FCOE_DDP_CTX_DESC_BSIZE_16K = 0x03, /* 2 BITS */ + I40E_FCOE_DDP_CTX_DESC_DIFENA = 0x04, /* 1 BIT */ + I40E_FCOE_DDP_CTX_DESC_LASTSEQH = 0x08, /* 1 BIT */ +}; + +#define I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT 16 +#define I40E_FCOE_DDP_CTX_QW1_FOFF_MASK (0x3FFFULL << \ + I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT) + +#define I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT 32 +#define I40E_FCOE_DDP_CTX_QW1_LSIZE_MASK (0x3FFFULL << \ + I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT) + +/* FCoE DDP/DWO Queue Context descriptor */ +struct i40e_fcoe_queue_context_desc { + __le64 dmaindx_fbase; /* 0:11 DMAINDX, 12:63 FBASE */ + __le64 flen_tph; /* 0:12 FLEN, 13:15 TPH */ +}; + +#define I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_SHIFT 0 +#define I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_MASK (0xFFFULL << \ + I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_SHIFT) + +#define I40E_FCOE_QUEUE_CTX_QW0_FBASE_SHIFT 12 +#define I40E_FCOE_QUEUE_CTX_QW0_FBASE_MASK (0xFFFFFFFFFFFFFULL << \ + I40E_FCOE_QUEUE_CTX_QW0_FBASE_SHIFT) + +#define I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT 0 +#define I40E_FCOE_QUEUE_CTX_QW1_FLEN_MASK (0x1FFFULL << \ + I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT) + +#define I40E_FCOE_QUEUE_CTX_QW1_TPH_SHIFT 13 +#define I40E_FCOE_QUEUE_CTX_QW1_TPH_MASK (0x7ULL << \ + I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT) + +enum i40e_fcoe_queue_ctx_desc_tph_bits { + I40E_FCOE_QUEUE_CTX_DESC_TPHRDESC = 0x1, + I40E_FCOE_QUEUE_CTX_DESC_TPHDATA = 0x2 +}; + +#define I40E_FCOE_QUEUE_CTX_QW1_RECIPE_SHIFT 30 +#define I40E_FCOE_QUEUE_CTX_QW1_RECIPE_MASK (0x3ULL << \ + I40E_FCOE_QUEUE_CTX_QW1_RECIPE_SHIFT) + +/* FCoE DDP/DWO Filter Context descriptor */ +struct i40e_fcoe_filter_context_desc { + __le32 param; + __le16 seqn; + + /* 48:51(0:3) RSVD, 52:63(4:15) DMAINDX */ + __le16 rsvd_dmaindx; + + /* 0:7 FLAGS, 8:52 RSVD, 53:63 LANQ */ + __le64 flags_rsvd_lanq; +}; + +#define I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT 4 +#define I40E_FCOE_FILTER_CTX_QW0_DMAINDX_MASK (0xFFF << \ + I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT) + +enum i40e_fcoe_filter_ctx_desc_flags_bits { + I40E_FCOE_FILTER_CTX_DESC_CTYP_DDP = 0x00, + I40E_FCOE_FILTER_CTX_DESC_CTYP_DWO = 0x01, + I40E_FCOE_FILTER_CTX_DESC_ENODE_INIT = 0x00, + I40E_FCOE_FILTER_CTX_DESC_ENODE_RSP = 0x02, + I40E_FCOE_FILTER_CTX_DESC_FC_CLASS2 = 0x00, + I40E_FCOE_FILTER_CTX_DESC_FC_CLASS3 = 0x04 +}; + +#define I40E_FCOE_FILTER_CTX_QW1_FLAGS_SHIFT 0 +#define I40E_FCOE_FILTER_CTX_QW1_FLAGS_MASK (0xFFULL << \ + I40E_FCOE_FILTER_CTX_QW1_FLAGS_SHIFT) + +#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT 8 +#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_MASK (0x3FULL << \ + I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT) + +#define I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT 53 +#define I40E_FCOE_FILTER_CTX_QW1_LANQINDX_MASK (0x7FFULL << \ + I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT) + enum i40e_switch_element_types { I40E_SWITCH_ELEMENT_TYPE_MAC = 1, I40E_SWITCH_ELEMENT_TYPE_PF = 2, diff --git a/src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h b/src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h index fd51ec32..8fba6081 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h @@ -170,6 +170,11 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 + +#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \ + I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \ + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/src/dpdk/drivers/net/i40e/i40e_ethdev.c b/src/dpdk/drivers/net/i40e/i40e_ethdev.c index ca1a4808..4492bcc1 100644 --- a/src/dpdk/drivers/net/i40e/i40e_ethdev.c +++ b/src/dpdk/drivers/net/i40e/i40e_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,6 +51,7 @@ #include #include #include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -62,6 +63,7 @@ #include "i40e_rxtx.h" #include "i40e_pf.h" #include "i40e_regs.h" +#include "rte_pmd_i40e.h" #define ETH_I40E_FLOATING_VEB_ARG "enable_floating_veb" #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list" @@ -108,7 +110,6 @@ I40E_PFINT_ICR0_ENA_GRST_MASK | \ I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | \ I40E_PFINT_ICR0_ENA_STORM_DETECT_MASK | \ - I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK | \ I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | \ I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | \ I40E_PFINT_ICR0_ENA_VFLR_MASK | \ @@ -139,60 +140,6 @@ #define I40E_DEFAULT_DCB_APP_NUM 1 #define I40E_DEFAULT_DCB_APP_PRIO 3 -#define I40E_INSET_NONE 0x00000000000000000ULL - -/* bit0 ~ bit 7 */ -#define I40E_INSET_DMAC 0x0000000000000001ULL -#define I40E_INSET_SMAC 0x0000000000000002ULL -#define I40E_INSET_VLAN_OUTER 0x0000000000000004ULL -#define I40E_INSET_VLAN_INNER 0x0000000000000008ULL -#define I40E_INSET_VLAN_TUNNEL 0x0000000000000010ULL - -/* bit 8 ~ bit 15 */ -#define I40E_INSET_IPV4_SRC 0x0000000000000100ULL -#define I40E_INSET_IPV4_DST 0x0000000000000200ULL -#define I40E_INSET_IPV6_SRC 0x0000000000000400ULL -#define I40E_INSET_IPV6_DST 0x0000000000000800ULL -#define I40E_INSET_SRC_PORT 0x0000000000001000ULL -#define I40E_INSET_DST_PORT 0x0000000000002000ULL -#define I40E_INSET_SCTP_VT 0x0000000000004000ULL - -/* bit 16 ~ bit 31 */ -#define I40E_INSET_IPV4_TOS 0x0000000000010000ULL -#define I40E_INSET_IPV4_PROTO 0x0000000000020000ULL -#define I40E_INSET_IPV4_TTL 0x0000000000040000ULL -#define I40E_INSET_IPV6_TC 0x0000000000080000ULL -#define I40E_INSET_IPV6_FLOW 0x0000000000100000ULL -#define I40E_INSET_IPV6_NEXT_HDR 0x0000000000200000ULL -#define I40E_INSET_IPV6_HOP_LIMIT 0x0000000000400000ULL -#define I40E_INSET_TCP_FLAGS 0x0000000000800000ULL - -/* bit 32 ~ bit 47, tunnel fields */ -#define I40E_INSET_TUNNEL_IPV4_DST 0x0000000100000000ULL -#define I40E_INSET_TUNNEL_IPV6_DST 0x0000000200000000ULL -#define I40E_INSET_TUNNEL_DMAC 0x0000000400000000ULL -#define I40E_INSET_TUNNEL_SRC_PORT 0x0000000800000000ULL -#define I40E_INSET_TUNNEL_DST_PORT 0x0000001000000000ULL -#define I40E_INSET_TUNNEL_ID 0x0000002000000000ULL - -/* bit 48 ~ bit 55 */ -#define I40E_INSET_LAST_ETHER_TYPE 0x0001000000000000ULL - -/* bit 56 ~ bit 63, Flex Payload */ -#define I40E_INSET_FLEX_PAYLOAD_W1 0x0100000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W2 0x0200000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W3 0x0400000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W4 0x0800000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W5 0x1000000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W6 0x2000000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W7 0x4000000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W8 0x8000000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD \ - (I40E_INSET_FLEX_PAYLOAD_W1 | I40E_INSET_FLEX_PAYLOAD_W2 | \ - I40E_INSET_FLEX_PAYLOAD_W3 | I40E_INSET_FLEX_PAYLOAD_W4 | \ - I40E_INSET_FLEX_PAYLOAD_W5 | I40E_INSET_FLEX_PAYLOAD_W6 | \ - I40E_INSET_FLEX_PAYLOAD_W7 | I40E_INSET_FLEX_PAYLOAD_W8) - /** * Below are values for writing un-exposed registers suggested * by silicon experts @@ -202,7 +149,7 @@ /* Source MAC address */ #define I40E_REG_INSET_L2_SMAC 0x1C00000000000000ULL /* Outer (S-Tag) VLAN tag in the outer L2 header */ -#define I40E_REG_INSET_L2_OUTER_VLAN 0x0200000000000000ULL +#define I40E_REG_INSET_L2_OUTER_VLAN 0x0000000004000000ULL /* Inner (C-Tag) or single VLAN tag in the outer L2 header */ #define I40E_REG_INSET_L2_INNER_VLAN 0x0080000000000000ULL /* Single VLAN tag in the inner L2 header */ @@ -211,6 +158,14 @@ #define I40E_REG_INSET_L3_SRC_IP4 0x0001800000000000ULL /* Destination IPv4 address */ #define I40E_REG_INSET_L3_DST_IP4 0x0000001800000000ULL +/* Source IPv4 address for X722 */ +#define I40E_X722_REG_INSET_L3_SRC_IP4 0x0006000000000000ULL +/* Destination IPv4 address for X722 */ +#define I40E_X722_REG_INSET_L3_DST_IP4 0x0000060000000000ULL +/* IPv4 Protocol for X722 */ +#define I40E_X722_REG_INSET_L3_IP4_PROTO 0x0010000000000000ULL +/* IPv4 Time to Live for X722 */ +#define I40E_X722_REG_INSET_L3_IP4_TTL 0x0010000000000000ULL /* IPv4 Type of Service (TOS) */ #define I40E_REG_INSET_L3_IP4_TOS 0x0040000000000000ULL /* IPv4 Protocol */ @@ -277,11 +232,6 @@ #define I40E_INSET_IPV6_HOP_LIMIT_MASK 0x000CFF00UL #define I40E_INSET_IPV6_NEXT_HDR_MASK 0x000C00FFUL -#define I40E_GL_SWT_L2TAGCTRL(_i) (0x001C0A70 + ((_i) * 4)) -#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT 16 -#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_MASK \ - I40E_MASK(0xFFFF, I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT) - /* PCI offset for querying capability */ #define PCI_DEV_CAP_REG 0xA4 /* PCI offset for enabling/disabling Extended Tag */ @@ -317,6 +267,8 @@ static int i40e_dev_queue_stats_mapping_set(struct rte_eth_dev *dev, uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx); +static int i40e_fw_version_get(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); static void i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info); static int i40e_vlan_filter_set(struct rte_eth_dev *dev, @@ -366,8 +318,8 @@ static void i40e_stat_update_48(struct i40e_hw *hw, uint64_t *offset, uint64_t *stat); static void i40e_pf_config_irq0(struct i40e_hw *hw, bool no_queue); -static void i40e_dev_interrupt_handler( - __rte_unused struct rte_intr_handle *handle, void *param); +static void i40e_dev_interrupt_handler(struct rte_intr_handle *handle, + void *param); static int i40e_res_pool_init(struct i40e_res_pool_info *pool, uint32_t base, uint32_t num); static void i40e_res_pool_destroy(struct i40e_res_pool_info *pool); @@ -399,9 +351,6 @@ static int i40e_dev_udp_tunnel_port_add(struct rte_eth_dev *dev, static int i40e_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, struct rte_eth_udp_tunnel *udp_tunnel); static void i40e_filter_input_set_init(struct i40e_pf *pf); -static int i40e_ethertype_filter_set(struct i40e_pf *pf, - struct rte_eth_ethertype_filter *filter, - bool add); static int i40e_ethertype_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg); @@ -411,6 +360,7 @@ static int i40e_dev_filter_ctrl(struct rte_eth_dev *dev, void *arg); static int i40e_dev_get_dcb_info(struct rte_eth_dev *dev, struct rte_eth_dcb_info *dcb_info); +static int i40e_dev_sync_phy_type(struct i40e_hw *hw); static void i40e_configure_registers(struct i40e_hw *hw); static void i40e_hw_init(struct rte_eth_dev *dev); static int i40e_config_qinq(struct i40e_hw *hw, struct i40e_vsi *vsi); @@ -453,6 +403,22 @@ static void i40e_set_default_mac_addr(struct rte_eth_dev *dev, static int i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); +static int i40e_ethertype_filter_convert( + const struct rte_eth_ethertype_filter *input, + struct i40e_ethertype_filter *filter); +static int i40e_sw_ethertype_filter_insert(struct i40e_pf *pf, + struct i40e_ethertype_filter *filter); + +static int i40e_tunnel_filter_convert( + struct i40e_aqc_add_remove_cloud_filters_element_data *cld_filter, + struct i40e_tunnel_filter *tunnel_filter); +static int i40e_sw_tunnel_filter_insert(struct i40e_pf *pf, + struct i40e_tunnel_filter *tunnel_filter); + +static void i40e_ethertype_filter_restore(struct i40e_pf *pf); +static void i40e_tunnel_filter_restore(struct i40e_pf *pf); +static void i40e_filter_restore(struct i40e_pf *pf); + static const struct rte_pci_id pci_id_i40e_map[] = { { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QEMU) }, @@ -474,7 +440,6 @@ static const struct rte_pci_id pci_id_i40e_map[] = { { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_X722) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_X722) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_I_X722) }, - { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_I_X722) }, { .vendor_id = 0, /* sentinel */ }, }; @@ -496,6 +461,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = { .stats_reset = i40e_dev_stats_reset, .xstats_reset = i40e_dev_stats_reset, .queue_stats_mapping_set = i40e_dev_queue_stats_mapping_set, + .fw_version_get = i40e_fw_version_get, .dev_infos_get = i40e_dev_info_get, .dev_supported_ptypes_get = i40e_dev_supported_ptypes_get, .vlan_filter_set = i40e_vlan_filter_set, @@ -663,10 +629,10 @@ static const struct rte_i40e_xstats_name_off rte_i40e_txq_prio_strings[] = { static struct eth_driver rte_i40e_pmd = { .pci_drv = { - .name = "rte_i40e_pmd", .id_table = pci_id_i40e_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_i40e_dev_init, .eth_dev_uninit = eth_i40e_dev_uninit, @@ -701,33 +667,10 @@ rte_i40e_dev_atomic_write_link_status(struct rte_eth_dev *dev, return 0; } -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI IXGBE devices. - */ -static int -rte_i40e_pmd_init(const char *name __rte_unused, - const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - rte_eth_driver_register(&rte_i40e_pmd); - - return 0; -} - -static struct rte_driver rte_i40e_driver = { - .type = PMD_PDEV, - .init = rte_i40e_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_i40e_driver, i40e); -DRIVER_REGISTER_PCI_TABLE(i40e, pci_id_i40e_map); +RTE_PMD_REGISTER_PCI(net_i40e, rte_i40e_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_i40e, pci_id_i40e_map); +RTE_PMD_REGISTER_KMOD_DEP(net_i40e, "* igb_uio | uio_pci_generic | vfio"); -/* - * Initialize registers for flexible payload, which should be set by NVM. - * This should be removed from code once it is fixed in NVM. - */ #ifndef I40E_GLQF_ORT #define I40E_GLQF_ORT(_i) (0x00268900 + ((_i) * 4)) #endif @@ -735,8 +678,12 @@ DRIVER_REGISTER_PCI_TABLE(i40e, pci_id_i40e_map); #define I40E_GLQF_PIT(_i) (0x00268C80 + ((_i) * 4)) #endif -static inline void i40e_flex_payload_reg_init(struct i40e_hw *hw) +static inline void i40e_GLQF_reg_init(struct i40e_hw *hw) { + /* + * Initialize registers for flexible payload, which should be set by NVM. + * This should be removed from code once it is fixed in NVM. + */ I40E_WRITE_REG(hw, I40E_GLQF_ORT(18), 0x00000030); I40E_WRITE_REG(hw, I40E_GLQF_ORT(19), 0x00000030); I40E_WRITE_REG(hw, I40E_GLQF_ORT(26), 0x0000002B); @@ -747,17 +694,16 @@ static inline void i40e_flex_payload_reg_init(struct i40e_hw *hw) I40E_WRITE_REG(hw, I40E_GLQF_ORT(20), 0x00000031); I40E_WRITE_REG(hw, I40E_GLQF_ORT(23), 0x00000031); I40E_WRITE_REG(hw, I40E_GLQF_ORT(63), 0x0000002D); - - /* GLQF_PIT Registers */ I40E_WRITE_REG(hw, I40E_GLQF_PIT(16), 0x00007480); I40E_WRITE_REG(hw, I40E_GLQF_PIT(17), 0x00007440); + + /* Initialize registers for parsing packet type of QinQ */ + I40E_WRITE_REG(hw, I40E_GLQF_ORT(40), 0x00000029); + I40E_WRITE_REG(hw, I40E_GLQF_PIT(9), 0x00009420); } #define I40E_FLOW_CONTROL_ETHERTYPE 0x8808 -#define TREX_PATCH -#define TREX_PATCH_LOW_LATENCY - /* * Add a ethertype filter to drop all flow control frames transmitted * from VSIs. @@ -776,8 +722,8 @@ i40e_add_tx_flow_control_drop_filter(struct i40e_pf *pf) pf->main_vsi_seid, 0, TRUE, NULL, NULL); if (ret) - PMD_INIT_LOG(ERR, "Failed to add filter to drop flow control " - " frames from VSIs."); + PMD_INIT_LOG(ERR, + "Failed to add filter to drop flow control frames from VSIs."); } static int @@ -920,25 +866,159 @@ is_floating_veb_supported(struct rte_devargs *devargs) static void config_floating_veb(struct rte_eth_dev *dev) { - struct rte_pci_device *pci_dev = dev->pci_dev; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); memset(pf->floating_veb_list, 0, sizeof(pf->floating_veb_list)); if (hw->aq.fw_maj_ver >= FLOATING_VEB_SUPPORTED_FW_MAJ) { - pf->floating_veb = is_floating_veb_supported(pci_dev->devargs); - config_vf_floating_veb(pci_dev->devargs, pf->floating_veb, + pf->floating_veb = + is_floating_veb_supported(pci_dev->device.devargs); + config_vf_floating_veb(pci_dev->device.devargs, + pf->floating_veb, pf->floating_veb_list); } else { pf->floating_veb = false; } } +#define I40E_L2_TAGS_S_TAG_SHIFT 1 +#define I40E_L2_TAGS_S_TAG_MASK I40E_MASK(0x1, I40E_L2_TAGS_S_TAG_SHIFT) + +static int +i40e_init_ethtype_filter_list(struct rte_eth_dev *dev) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype; + char ethertype_hash_name[RTE_HASH_NAMESIZE]; + int ret; + + struct rte_hash_parameters ethertype_hash_params = { + .name = ethertype_hash_name, + .entries = I40E_MAX_ETHERTYPE_FILTER_NUM, + .key_len = sizeof(struct i40e_ethertype_filter_input), + .hash_func = rte_hash_crc, + }; + + /* Initialize ethertype filter rule list and hash */ + TAILQ_INIT(ðertype_rule->ethertype_list); + snprintf(ethertype_hash_name, RTE_HASH_NAMESIZE, + "ethertype_%s", dev->data->name); + ethertype_rule->hash_table = rte_hash_create(ðertype_hash_params); + if (!ethertype_rule->hash_table) { + PMD_INIT_LOG(ERR, "Failed to create ethertype hash table!"); + return -EINVAL; + } + ethertype_rule->hash_map = rte_zmalloc("i40e_ethertype_hash_map", + sizeof(struct i40e_ethertype_filter *) * + I40E_MAX_ETHERTYPE_FILTER_NUM, + 0); + if (!ethertype_rule->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for ethertype hash map!"); + ret = -ENOMEM; + goto err_ethertype_hash_map_alloc; + } + + return 0; + +err_ethertype_hash_map_alloc: + rte_hash_free(ethertype_rule->hash_table); + + return ret; +} + +static int +i40e_init_tunnel_filter_list(struct rte_eth_dev *dev) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel; + char tunnel_hash_name[RTE_HASH_NAMESIZE]; + int ret; + + struct rte_hash_parameters tunnel_hash_params = { + .name = tunnel_hash_name, + .entries = I40E_MAX_TUNNEL_FILTER_NUM, + .key_len = sizeof(struct i40e_tunnel_filter_input), + .hash_func = rte_hash_crc, + }; + + /* Initialize tunnel filter rule list and hash */ + TAILQ_INIT(&tunnel_rule->tunnel_list); + snprintf(tunnel_hash_name, RTE_HASH_NAMESIZE, + "tunnel_%s", dev->data->name); + tunnel_rule->hash_table = rte_hash_create(&tunnel_hash_params); + if (!tunnel_rule->hash_table) { + PMD_INIT_LOG(ERR, "Failed to create tunnel hash table!"); + return -EINVAL; + } + tunnel_rule->hash_map = rte_zmalloc("i40e_tunnel_hash_map", + sizeof(struct i40e_tunnel_filter *) * + I40E_MAX_TUNNEL_FILTER_NUM, + 0); + if (!tunnel_rule->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for tunnel hash map!"); + ret = -ENOMEM; + goto err_tunnel_hash_map_alloc; + } + + return 0; + +err_tunnel_hash_map_alloc: + rte_hash_free(tunnel_rule->hash_table); + + return ret; +} + +static int +i40e_init_fdir_filter_list(struct rte_eth_dev *dev) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct i40e_fdir_info *fdir_info = &pf->fdir; + char fdir_hash_name[RTE_HASH_NAMESIZE]; + int ret; + + struct rte_hash_parameters fdir_hash_params = { + .name = fdir_hash_name, + .entries = I40E_MAX_FDIR_FILTER_NUM, + .key_len = sizeof(struct rte_eth_fdir_input), + .hash_func = rte_hash_crc, + }; + + /* Initialize flow director filter rule list and hash */ + TAILQ_INIT(&fdir_info->fdir_list); + snprintf(fdir_hash_name, RTE_HASH_NAMESIZE, + "fdir_%s", dev->data->name); + fdir_info->hash_table = rte_hash_create(&fdir_hash_params); + if (!fdir_info->hash_table) { + PMD_INIT_LOG(ERR, "Failed to create fdir hash table!"); + return -EINVAL; + } + fdir_info->hash_map = rte_zmalloc("i40e_fdir_hash_map", + sizeof(struct i40e_fdir_filter *) * + I40E_MAX_FDIR_FILTER_NUM, + 0); + if (!fdir_info->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for fdir hash map!"); + ret = -ENOMEM; + goto err_fdir_hash_map_alloc; + } + return 0; + +err_fdir_hash_map_alloc: + rte_hash_free(fdir_info->hash_table); + + return ret; +} + static int eth_i40e_dev_init(struct rte_eth_dev *dev) { struct rte_pci_device *pci_dev; + struct rte_intr_handle *intr_handle; struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vsi *vsi; @@ -951,6 +1031,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prepare = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -960,9 +1041,11 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) i40e_set_tx_function(dev); return 0; } - pci_dev = dev->pci_dev; + pci_dev = I40E_DEV_TO_PCI(dev); + intr_handle = &pci_dev->intr_handle; rte_eth_copy_pci_info(dev, pci_dev); + dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; pf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); pf->adapter->eth_dev = dev; @@ -971,8 +1054,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) hw->back = I40E_PF_TO_ADAPTER(pf); hw->hw_addr = (uint8_t *)(pci_dev->mem_resource[0].addr); if (!hw->hw_addr) { - PMD_INIT_LOG(ERR, "Hardware is not available, " - "as address is NULL"); + PMD_INIT_LOG(ERR, + "Hardware is not available, as address is NULL"); return -ENODEV; } @@ -1005,11 +1088,12 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) } /* - * To work around the NVM issue,initialize registers - * for flexible payload by software. - * It should be removed once issues are fixed in NVM. + * To work around the NVM issue, initialize registers + * for flexible payload and packet type of QinQ by + * software. It should be removed once issues are fixed + * in NVM. */ - i40e_flex_payload_reg_init(hw); + i40e_GLQF_reg_init(hw); /* Initialize the input set for filters (hash and fd) to default value */ i40e_filter_input_set_init(pf); @@ -1032,7 +1116,11 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) config_floating_veb(dev); /* Clear PXE mode */ i40e_clear_pxe_mode(hw); - + ret = i40e_dev_sync_phy_type(hw); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to sync phy type: %d", ret); + goto err_sync_phy_type; + } /* * On X710, performance number is far from the expectation on recent * firmware versions. The fix for this issue may not be integrated in @@ -1103,8 +1191,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) /* Set the global registers with default ether type value */ ret = i40e_vlan_tpid_set(dev, ETH_VLAN_TYPE_OUTER, ETHER_TYPE_VLAN); if (ret != I40E_SUCCESS) { - PMD_INIT_LOG(ERR, "Failed to set the default outer " - "VLAN ether type"); + PMD_INIT_LOG(ERR, + "Failed to set the default outer VLAN ether type"); goto err_setup_pf_switch; } @@ -1123,6 +1211,15 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) /* Disable double vlan by default */ i40e_vsi_config_double_vlan(vsi, FALSE); + /* Disable S-TAG identification when floating_veb is disabled */ + if (!pf->floating_veb) { + ret = I40E_READ_REG(hw, I40E_PRT_L2TAGSEN); + if (ret & I40E_L2_TAGS_S_TAG_MASK) { + ret &= ~I40E_L2_TAGS_S_TAG_MASK; + I40E_WRITE_REG(hw, I40E_PRT_L2TAGSEN, ret); + } + } + if (!vsi->max_macaddrs) len = ETHER_ADDR_LEN; else @@ -1131,8 +1228,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) /* Should be after VSI initialized */ dev->data->mac_addrs = rte_zmalloc("i40e", len, 0); if (!dev->data->mac_addrs) { - PMD_INIT_LOG(ERR, "Failed to allocated memory " - "for storing mac address"); + PMD_INIT_LOG(ERR, + "Failed to allocated memory for storing mac address"); goto err_mac_alloc; } ether_addr_copy((struct ether_addr *)hw->mac.perm_addr, @@ -1142,15 +1239,15 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) i40e_pf_host_init(dev); /* register callback func to eal lib */ - rte_intr_callback_register(&(pci_dev->intr_handle), - i40e_dev_interrupt_handler, (void *)dev); + rte_intr_callback_register(intr_handle, + i40e_dev_interrupt_handler, dev); /* configure and enable device interrupt */ i40e_pf_config_irq0(hw, TRUE); i40e_pf_enable_irq0(hw); /* enable uio intr after callback register */ - rte_intr_enable(&(pci_dev->intr_handle)); + rte_intr_enable(intr_handle); /* * Add an ethertype filter to drop all flow control frames transmitted * from VSIs. By doing so, we stop VF from sending out PAUSE or PFC @@ -1173,8 +1270,26 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) pf->flags &= ~I40E_FLAG_DCB; } + ret = i40e_init_ethtype_filter_list(dev); + if (ret < 0) + goto err_init_ethtype_filter_list; + ret = i40e_init_tunnel_filter_list(dev); + if (ret < 0) + goto err_init_tunnel_filter_list; + ret = i40e_init_fdir_filter_list(dev); + if (ret < 0) + goto err_init_fdir_filter_list; + return 0; +err_init_fdir_filter_list: + rte_free(pf->tunnel.hash_table); + rte_free(pf->tunnel.hash_map); +err_init_tunnel_filter_list: + rte_free(pf->ethertype.hash_table); + rte_free(pf->ethertype.hash_map); +err_init_ethtype_filter_list: + rte_free(dev->data->mac_addrs); err_mac_alloc: i40e_vsi_release(pf->main_vsi); err_setup_pf_switch: @@ -1188,17 +1303,79 @@ err_msix_pool_init: err_qp_pool_init: err_parameter_init: err_get_capabilities: +err_sync_phy_type: (void)i40e_shutdown_adminq(hw); return ret; } +static void +i40e_rm_ethtype_filter_list(struct i40e_pf *pf) +{ + struct i40e_ethertype_filter *p_ethertype; + struct i40e_ethertype_rule *ethertype_rule; + + ethertype_rule = &pf->ethertype; + /* Remove all ethertype filter rules and hash */ + if (ethertype_rule->hash_map) + rte_free(ethertype_rule->hash_map); + if (ethertype_rule->hash_table) + rte_hash_free(ethertype_rule->hash_table); + + while ((p_ethertype = TAILQ_FIRST(ðertype_rule->ethertype_list))) { + TAILQ_REMOVE(ðertype_rule->ethertype_list, + p_ethertype, rules); + rte_free(p_ethertype); + } +} + +static void +i40e_rm_tunnel_filter_list(struct i40e_pf *pf) +{ + struct i40e_tunnel_filter *p_tunnel; + struct i40e_tunnel_rule *tunnel_rule; + + tunnel_rule = &pf->tunnel; + /* Remove all tunnel director rules and hash */ + if (tunnel_rule->hash_map) + rte_free(tunnel_rule->hash_map); + if (tunnel_rule->hash_table) + rte_hash_free(tunnel_rule->hash_table); + + while ((p_tunnel = TAILQ_FIRST(&tunnel_rule->tunnel_list))) { + TAILQ_REMOVE(&tunnel_rule->tunnel_list, p_tunnel, rules); + rte_free(p_tunnel); + } +} + +static void +i40e_rm_fdir_filter_list(struct i40e_pf *pf) +{ + struct i40e_fdir_filter *p_fdir; + struct i40e_fdir_info *fdir_info; + + fdir_info = &pf->fdir; + /* Remove all flow director rules and hash */ + if (fdir_info->hash_map) + rte_free(fdir_info->hash_map); + if (fdir_info->hash_table) + rte_hash_free(fdir_info->hash_table); + + while ((p_fdir = TAILQ_FIRST(&fdir_info->fdir_list))) { + TAILQ_REMOVE(&fdir_info->fdir_list, p_fdir, rules); + rte_free(p_fdir); + } +} + static int eth_i40e_dev_uninit(struct rte_eth_dev *dev) { + struct i40e_pf *pf; struct rte_pci_device *pci_dev; + struct rte_intr_handle *intr_handle; struct i40e_hw *hw; struct i40e_filter_control_settings settings; + struct rte_flow *p_flow; int ret; uint8_t aq_fail = 0; @@ -1207,8 +1384,10 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - pci_dev = dev->pci_dev; + pci_dev = I40E_DEV_TO_PCI(dev); + intr_handle = &pci_dev->intr_handle; if (hw->adapter_stopped == 0) i40e_dev_close(dev); @@ -1217,11 +1396,6 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev) dev->rx_pkt_burst = NULL; dev->tx_pkt_burst = NULL; - /* Disable LLDP */ - ret = i40e_aq_stop_lldp(hw, true, NULL); - if (ret != I40E_SUCCESS) /* Its failure can be ignored */ - PMD_INIT_LOG(INFO, "Failed to stop lldp"); - /* Clear PXE mode */ i40e_clear_pxe_mode(hw); @@ -1243,11 +1417,21 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev) dev->data->mac_addrs = NULL; /* disable uio intr before callback unregister */ - rte_intr_disable(&(pci_dev->intr_handle)); + rte_intr_disable(intr_handle); /* register callback func to eal lib */ - rte_intr_callback_unregister(&(pci_dev->intr_handle), - i40e_dev_interrupt_handler, (void *)dev); + rte_intr_callback_unregister(intr_handle, + i40e_dev_interrupt_handler, dev); + + i40e_rm_ethtype_filter_list(pf); + i40e_rm_tunnel_filter_list(pf); + i40e_rm_fdir_filter_list(pf); + + /* Remove all flows */ + while ((p_flow = TAILQ_FIRST(&pf->flow_list))) { + TAILQ_REMOVE(&pf->flow_list, p_flow, node); + rte_free(p_flow); + } return 0; } @@ -1313,6 +1497,8 @@ i40e_dev_configure(struct rte_eth_dev *dev) } } + TAILQ_INIT(&pf->flow_list); + return 0; err_dcb: @@ -1333,7 +1519,8 @@ void i40e_vsi_queues_unbind_intr(struct i40e_vsi *vsi) { struct rte_eth_dev *dev = vsi->adapter->eth_dev; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); uint16_t msix_vect = vsi->msix_intr; uint16_t i; @@ -1446,7 +1633,8 @@ void i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi) { struct rte_eth_dev *dev = vsi->adapter->eth_dev; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); uint16_t msix_vect = vsi->msix_intr; uint16_t nb_msix = RTE_MIN(vsi->nb_msix, intr_handle->nb_efd); @@ -1517,7 +1705,8 @@ static void i40e_vsi_enable_queues_intr(struct i40e_vsi *vsi) { struct rte_eth_dev *dev = vsi->adapter->eth_dev; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); uint16_t interval = i40e_calc_itr_interval(\ RTE_LIBRTE_I40E_ITR_INTERVAL); @@ -1548,7 +1737,8 @@ static void i40e_vsi_disable_queues_intr(struct i40e_vsi *vsi) { struct rte_eth_dev *dev = vsi->adapter->eth_dev; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); uint16_t msix_intr, i; @@ -1571,6 +1761,8 @@ i40e_parse_link_speeds(uint16_t link_speeds) if (link_speeds & ETH_LINK_SPEED_40G) link_speed |= I40E_LINK_SPEED_40GB; + if (link_speeds & ETH_LINK_SPEED_25G) + link_speed |= I40E_LINK_SPEED_25GB; if (link_speeds & ETH_LINK_SPEED_20G) link_speed |= I40E_LINK_SPEED_20GB; if (link_speeds & ETH_LINK_SPEED_10G) @@ -1596,6 +1788,7 @@ i40e_phy_conf_link(struct i40e_hw *hw, I40E_AQ_PHY_FLAG_PAUSE_RX | I40E_AQ_PHY_FLAG_LOW_POWER; const uint8_t advt = I40E_LINK_SPEED_40GB | + I40E_LINK_SPEED_25GB | I40E_LINK_SPEED_10GB | I40E_LINK_SPEED_1GB | I40E_LINK_SPEED_100MB; @@ -1623,6 +1816,8 @@ i40e_phy_conf_link(struct i40e_hw *hw, /* use get_phy_abilities_resp value for the rest */ phy_conf.phy_type = phy_ab.phy_type; + phy_conf.phy_type_ext = phy_ab.phy_type_ext; + phy_conf.fec_config = phy_ab.fec_cfg_curr_mod_ext_info; phy_conf.eee_capability = phy_ab.eee_capability; phy_conf.eeer = phy_ab.eeer_val; phy_conf.low_power_ctrl = phy_ab.d3_lpan; @@ -1654,7 +1849,7 @@ i40e_apply_link_speed(struct rte_eth_dev *dev) abilities |= I40E_AQ_PHY_LINK_ENABLED; /* Skip changing speed on 40G interfaces, FW does not support */ - if (i40e_is_40G_device(hw->device_id)) { + if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types)) { speed = I40E_LINK_SPEED_UNKNOWN; abilities |= I40E_AQ_PHY_AN_ENABLED; } @@ -1669,7 +1864,8 @@ i40e_dev_start(struct rte_eth_dev *dev) struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vsi *main_vsi = pf->main_vsi; int ret, i; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t intr_vector = 0; hw->adapter_stopped = 0; @@ -1686,8 +1882,9 @@ i40e_dev_start(struct rte_eth_dev *dev) !RTE_ETH_DEV_SRIOV(dev).active) && dev->data->dev_conf.intr_conf.rxq != 0) { intr_vector = dev->data->nb_rx_queues; - if (rte_intr_efd_enable(intr_handle, intr_vector)) - return -1; + ret = rte_intr_efd_enable(intr_handle, intr_vector); + if (ret) + return ret; } if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) { @@ -1696,8 +1893,9 @@ i40e_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (!intr_handle->intr_vec) { - PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + PMD_INIT_LOG(ERR, + "Failed to allocate %d rx_queues intr_vec", + dev->data->nb_rx_queues); return -ENOMEM; } } @@ -1750,7 +1948,8 @@ i40e_dev_start(struct rte_eth_dev *dev) /* Apply link configure */ if (dev->data->dev_conf.link_speeds & ~(ETH_LINK_SPEED_100M | ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G | - ETH_LINK_SPEED_20G | ETH_LINK_SPEED_40G)) { + ETH_LINK_SPEED_20G | ETH_LINK_SPEED_25G | + ETH_LINK_SPEED_40G)) { PMD_DRV_LOG(ERR, "Invalid link setting"); goto err_up; } @@ -1769,13 +1968,25 @@ i40e_dev_start(struct rte_eth_dev *dev) i40e_pf_enable_irq0(hw); if (dev->data->dev_conf.intr_conf.lsc != 0) - PMD_INIT_LOG(INFO, "lsc won't enable because of" - " no intr multiplex\n"); + PMD_INIT_LOG(INFO, + "lsc won't enable because of no intr multiplex"); + } else if (dev->data->dev_conf.intr_conf.lsc != 0) { + ret = i40e_aq_set_phy_int_mask(hw, + ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MODULE_QUAL_FAIL | + I40E_AQ_EVENT_MEDIA_NA), NULL); + if (ret != I40E_SUCCESS) + PMD_DRV_LOG(WARNING, "Fail to set phy mask"); + + /* Call get_link_info aq commond to enable LSE */ + i40e_dev_link_update(dev, 0); } /* enable uio intr after callback register */ rte_intr_enable(intr_handle); + i40e_filter_restore(pf); + return I40E_SUCCESS; err_up: @@ -1791,7 +2002,8 @@ i40e_dev_stop(struct rte_eth_dev *dev) struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_vsi *main_vsi = pf->main_vsi; struct i40e_mirror_rule *p_mirror; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int i; /* Disable all queues */ @@ -1842,6 +2054,8 @@ i40e_dev_close(struct rte_eth_dev *dev) { struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t reg; int i; @@ -1853,23 +2067,22 @@ i40e_dev_close(struct rte_eth_dev *dev) /* Disable interrupt */ i40e_pf_disable_irq0(hw); - rte_intr_disable(&(dev->pci_dev->intr_handle)); + rte_intr_disable(intr_handle); /* shutdown and destroy the HMC */ i40e_shutdown_lan_hmc(hw); - /* release all the existing VSIs and VEBs */ - i40e_fdir_teardown(pf); - i40e_vsi_release(pf->main_vsi); - for (i = 0; i < pf->nb_cfg_vmdq_vsi; i++) { i40e_vsi_release(pf->vmdq[i].vsi); pf->vmdq[i].vsi = NULL; } - rte_free(pf->vmdq); pf->vmdq = NULL; + /* release all the existing VSIs and VEBs */ + i40e_fdir_teardown(pf); + i40e_vsi_release(pf->main_vsi); + /* shutdown the adminq */ i40e_aq_queue_shutdown(hw, true); i40e_shutdown_adminq(hw); @@ -1970,9 +2183,10 @@ static int i40e_dev_set_link_down(struct rte_eth_dev *dev) { uint8_t speed = I40E_LINK_SPEED_UNKNOWN; - uint8_t abilities = I40E_AQ_PHY_ENABLE_ATOMIC_LINK; + uint8_t abilities = 0; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + abilities = I40E_AQ_PHY_ENABLE_ATOMIC_LINK; return i40e_phy_conf_link(hw, abilities, speed); } @@ -1987,6 +2201,7 @@ i40e_dev_link_update(struct rte_eth_dev *dev, struct rte_eth_link link, old; int status; unsigned rep_cnt = MAX_REPEAT_TIME; + bool enable_lse = dev->data->dev_conf.intr_conf.lsc ? true : false; memset(&link, 0, sizeof(link)); memset(&old, 0, sizeof(old)); @@ -1995,7 +2210,8 @@ i40e_dev_link_update(struct rte_eth_dev *dev, do { /* Get link status information from hardware */ - status = i40e_aq_get_link_info(hw, false, &link_status, NULL); + status = i40e_aq_get_link_info(hw, enable_lse, + &link_status, NULL); if (status != I40E_SUCCESS) { link.link_speed = ETH_SPEED_NUM_100M; link.link_duplex = ETH_LINK_FULL_DUPLEX; @@ -2030,6 +2246,9 @@ i40e_dev_link_update(struct rte_eth_dev *dev, case I40E_LINK_SPEED_20GB: link.link_speed = ETH_SPEED_NUM_20G; break; + case I40E_LINK_SPEED_25GB: + link.link_speed = ETH_SPEED_NUM_25G; + break; case I40E_LINK_SPEED_40GB: link.link_speed = ETH_SPEED_NUM_40G; break; @@ -2296,11 +2515,9 @@ i40e_read_stats_registers(struct i40e_pf *pf, struct i40e_hw *hw) I40E_GLPRT_PTC9522L(hw->port), pf->offset_loaded, &os->tx_size_big, &ns->tx_size_big); -#ifndef TREX_PATCH i40e_stat_update_32(hw, I40E_GLQF_PCNT(pf->fdir.match_counter_index), pf->offset_loaded, &os->fd_sb_match, &ns->fd_sb_match); -#endif /* GLPRT_MSPDC not supported */ /* GLPRT_XEC not supported */ @@ -2310,46 +2527,6 @@ i40e_read_stats_registers(struct i40e_pf *pf, struct i40e_hw *hw) i40e_update_vsi_stats(pf->main_vsi); } -//TREX_PATCH -// fill stats array with fdir rules match count statistics -// Notice that we read statistics from start to start + len, but we fill the stats are -// starting from 0 with len values -void -i40e_trex_fdir_stats_get(struct rte_eth_dev *dev, uint32_t *stats, uint32_t start, uint32_t len) -{ - int i; - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - - for (i = 0; i < len; i++) { - stats[i] = I40E_READ_REG(hw, I40E_GLQF_PCNT(i + start)); - } -} - -// TREX_PATCH -void -i40e_trex_fdir_stats_reset(struct rte_eth_dev *dev, uint32_t *stats, uint32_t start, uint32_t len) -{ - int i; - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - - for (i = 0; i < len; i++) { - if (stats) { - stats[i] = I40E_READ_REG(hw, I40E_GLQF_PCNT(i + start)); - } - I40E_WRITE_REG(hw, I40E_GLQF_PCNT(i + start), 0xffffffff); - } -} - -// TREX_PATCH -int -i40e_trex_get_fw_ver(struct rte_eth_dev *dev, uint32_t *nvm_ver) -{ - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - - *nvm_ver = hw->nvm.version; - return 0; -} - /* Get all statistics of a port */ static void i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) @@ -2366,17 +2543,10 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) pf->main_vsi->eth_stats.rx_multicast + pf->main_vsi->eth_stats.rx_broadcast - pf->main_vsi->eth_stats.rx_discards; -#ifndef TREX_PATCH stats->opackets = pf->main_vsi->eth_stats.tx_unicast + pf->main_vsi->eth_stats.tx_multicast + pf->main_vsi->eth_stats.tx_broadcast; stats->ibytes = ns->eth.rx_bytes; -#else - /* Hanoch: move to global transmit and not pf->vsi and we have two high and low priorty */ - stats->opackets = ns->eth.tx_unicast +ns->eth.tx_multicast +ns->eth.tx_broadcast; - stats->ibytes = pf->main_vsi->eth_stats.rx_bytes; -#endif - stats->obytes = ns->eth.tx_bytes; stats->oerrors = ns->eth.tx_errors + pf->main_vsi->eth_stats.tx_errors; @@ -2557,6 +2727,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (i = 0; i < I40E_NB_ETH_XSTATS; i++) { xstats[count].value = *(uint64_t *)(((char *)&hw_stats->eth) + rte_i40e_stats_strings[i].offset); + xstats[count].id = count; count++; } @@ -2564,6 +2735,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) { xstats[count].value = *(uint64_t *)(((char *)hw_stats) + rte_i40e_hw_port_strings[i].offset); + xstats[count].id = count; count++; } @@ -2573,6 +2745,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, *(uint64_t *)(((char *)hw_stats) + rte_i40e_rxq_prio_strings[i].offset + (sizeof(uint64_t) * prio)); + xstats[count].id = count; count++; } } @@ -2583,6 +2756,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, *(uint64_t *)(((char *)hw_stats) + rte_i40e_txq_prio_strings[i].offset + (sizeof(uint64_t) * prio)); + xstats[count].id = count; count++; } } @@ -2601,19 +2775,49 @@ i40e_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *dev, return -ENOSYS; } +static int +i40e_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) +{ + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + u32 full_ver; + u8 ver, patch; + u16 build; + int ret; + + full_ver = hw->nvm.oem_ver; + ver = (u8)(full_ver >> 24); + build = (u16)((full_ver >> 8) & 0xffff); + patch = (u8)(full_ver & 0xff); + + ret = snprintf(fw_version, fw_size, + "%d.%d%d 0x%08x %d.%d.%d", + ((hw->nvm.version >> 12) & 0xf), + ((hw->nvm.version >> 4) & 0xff), + (hw->nvm.version & 0xf), hw->nvm.eetrack, + ver, build, patch); + + ret += 1; /* add the size of '\0' */ + if (fw_size < (u32)ret) + return ret; + else + return 0; +} + static void i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vsi *vsi = pf->main_vsi; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + dev_info->pci_dev = pci_dev; dev_info->max_rx_queues = vsi->nb_qps; dev_info->max_tx_queues = vsi->nb_qps; dev_info->min_rx_bufsize = I40E_BUF_SIZE_MIN; dev_info->max_rx_pktlen = I40E_FRAME_SIZE_MAX; dev_info->max_mac_addrs = vsi->max_macaddrs; - dev_info->max_vfs = dev->pci_dev->max_vfs; + dev_info->max_vfs = pci_dev->max_vfs; dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP | DEV_RX_OFFLOAD_QINQ_STRIP | @@ -2628,7 +2832,11 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) DEV_TX_OFFLOAD_TCP_CKSUM | DEV_TX_OFFLOAD_SCTP_CKSUM | DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | - DEV_TX_OFFLOAD_TCP_TSO; + DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GRE_TNL_TSO | + DEV_TX_OFFLOAD_IPIP_TNL_TSO | + DEV_TX_OFFLOAD_GENEVE_TNL_TSO; dev_info->hash_key_size = (I40E_PFQF_HKEY_MAX_INDEX + 1) * sizeof(uint32_t); dev_info->reta_size = pf->hash_lut_size; @@ -2666,6 +2874,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { @@ -2678,9 +2888,12 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_tx_queues += dev_info->vmdq_queue_num; } - if (i40e_is_40G_device(hw->device_id)) + if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types)) /* For XL710 */ dev_info->speed_capa = ETH_LINK_SPEED_40G; + else if (I40E_PHY_TYPE_SUPPORT_25G(hw->phy.phy_types)) + /* For XXV710 */ + dev_info->speed_capa = ETH_LINK_SPEED_25G; else /* For X710 */ dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G; @@ -2723,7 +2936,7 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev, else { ret = -EINVAL; PMD_DRV_LOG(ERR, - "Unsupported vlan type in single vlan.\n"); + "Unsupported vlan type in single vlan."); return ret; } break; @@ -2735,13 +2948,15 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev, ret = i40e_aq_debug_read_register(hw, I40E_GL_SWT_L2TAGCTRL(reg_id), ®_r, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Fail to debug read from " - "I40E_GL_SWT_L2TAGCTRL[%d]", reg_id); + PMD_DRV_LOG(ERR, + "Fail to debug read from I40E_GL_SWT_L2TAGCTRL[%d]", + reg_id); ret = -EIO; return ret; } - PMD_DRV_LOG(DEBUG, "Debug read from I40E_GL_SWT_L2TAGCTRL[%d]: " - "0x%08"PRIx64"", reg_id, reg_r); + PMD_DRV_LOG(DEBUG, + "Debug read from I40E_GL_SWT_L2TAGCTRL[%d]: 0x%08"PRIx64, + reg_id, reg_r); reg_w = reg_r & (~(I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_MASK)); reg_w |= ((uint64_t)tpid << I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT); @@ -2755,12 +2970,14 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev, reg_w, NULL); if (ret != I40E_SUCCESS) { ret = -EIO; - PMD_DRV_LOG(ERR, "Fail to debug write to " - "I40E_GL_SWT_L2TAGCTRL[%d]", reg_id); + PMD_DRV_LOG(ERR, + "Fail to debug write to I40E_GL_SWT_L2TAGCTRL[%d]", + reg_id); return ret; } - PMD_DRV_LOG(DEBUG, "Debug write 0x%08"PRIx64" to " - "I40E_GL_SWT_L2TAGCTRL[%d]", reg_w, reg_id); + PMD_DRV_LOG(DEBUG, + "Debug write 0x%08"PRIx64" to I40E_GL_SWT_L2TAGCTRL[%d]", + reg_w, reg_id); return ret; } @@ -2904,8 +3121,9 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) max_high_water = I40E_RXPBSIZE >> I40E_KILOSHIFT; if ((fc_conf->high_water > max_high_water) || (fc_conf->high_water < fc_conf->low_water)) { - PMD_INIT_LOG(ERR, "Invalid high/low water setup value in KB, " - "High_water must <= %d.", max_high_water); + PMD_INIT_LOG(ERR, + "Invalid high/low water setup value in KB, High_water must be <= %d.", + max_high_water); return -EINVAL; } @@ -2926,7 +3144,7 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) if (err < 0) return -ENOSYS; - if (i40e_is_40G_device(hw->device_id)) { + if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types)) { /* Configure flow control refresh threshold, * the value for stat_tx_pause_refresh_timer[8] * is used for global pause operation. @@ -3077,8 +3295,8 @@ i40e_macaddr_remove(struct rte_eth_dev *dev, uint32_t index) /* No VMDQ pool enabled or configured */ if (!(pf->flags & I40E_FLAG_VMDQ) || (i > pf->nb_cfg_vmdq_vsi)) { - PMD_DRV_LOG(ERR, "No VMDQ pool enabled" - "/configured"); + PMD_DRV_LOG(ERR, + "No VMDQ pool enabled/configured"); return; } vsi = pf->vmdq[i - 1].vsi; @@ -3279,9 +3497,9 @@ i40e_dev_rss_reta_update(struct rte_eth_dev *dev, if (reta_size != lut_size || reta_size > ETH_RSS_RETA_SIZE_512) { - PMD_DRV_LOG(ERR, "The size of hash lookup table configured " - "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, lut_size); + PMD_DRV_LOG(ERR, + "The size of hash lookup table configured (%d) doesn't match the number hardware can supported (%d)", + reta_size, lut_size); return -EINVAL; } @@ -3320,9 +3538,9 @@ i40e_dev_rss_reta_query(struct rte_eth_dev *dev, if (reta_size != lut_size || reta_size > ETH_RSS_RETA_SIZE_512) { - PMD_DRV_LOG(ERR, "The size of hash lookup table configured " - "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, lut_size); + PMD_DRV_LOG(ERR, + "The size of hash lookup table configured (%d) doesn't match the number hardware can supported (%d)", + reta_size, lut_size); return -EINVAL; } @@ -3377,8 +3595,9 @@ i40e_allocate_dma_mem_d(__attribute__((unused)) struct i40e_hw *hw, mem->va = mz->addr; mem->pa = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr); mem->zone = (const void *)mz; - PMD_DRV_LOG(DEBUG, "memzone %s allocated with physical address: " - "%"PRIu64, mz->name, mem->pa); + PMD_DRV_LOG(DEBUG, + "memzone %s allocated with physical address: %"PRIu64, + mz->name, mem->pa); return I40E_SUCCESS; } @@ -3395,9 +3614,9 @@ i40e_free_dma_mem_d(__attribute__((unused)) struct i40e_hw *hw, if (!mem) return I40E_ERR_PARAM; - PMD_DRV_LOG(DEBUG, "memzone %s to be freed with physical address: " - "%"PRIu64, ((const struct rte_memzone *)mem->zone)->name, - mem->pa); + PMD_DRV_LOG(DEBUG, + "memzone %s to be freed with physical address: %"PRIu64, + ((const struct rte_memzone *)mem->zone)->name, mem->pa); rte_memzone_free((const struct rte_memzone *)mem->zone); mem->zone = NULL; mem->va = NULL; @@ -3508,9 +3727,10 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev) { struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); uint16_t qp_count = 0, vsi_count = 0; - if (dev->pci_dev->max_vfs && !hw->func_caps.sr_iov_1_1) { + if (pci_dev->max_vfs && !hw->func_caps.sr_iov_1_1) { PMD_INIT_LOG(ERR, "HW configuration doesn't support SRIOV"); return -EINVAL; } @@ -3551,13 +3771,13 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev) /* VF queue/VSI allocation */ pf->vf_qp_offset = pf->lan_qp_offset + pf->lan_nb_qps; - if (hw->func_caps.sr_iov_1_1 && dev->pci_dev->max_vfs) { + if (hw->func_caps.sr_iov_1_1 && pci_dev->max_vfs) { pf->flags |= I40E_FLAG_SRIOV; pf->vf_nb_qps = RTE_LIBRTE_I40E_QUEUE_NUM_PER_VF; - pf->vf_num = dev->pci_dev->max_vfs; - PMD_DRV_LOG(DEBUG, "%u VF VSIs, %u queues per VF VSI, " - "in total %u queues", pf->vf_num, pf->vf_nb_qps, - pf->vf_nb_qps * pf->vf_num); + pf->vf_num = pci_dev->max_vfs; + PMD_DRV_LOG(DEBUG, + "%u VF VSIs, %u queues per VF VSI, in total %u queues", + pf->vf_num, pf->vf_nb_qps, pf->vf_nb_qps * pf->vf_num); } else { pf->vf_nb_qps = 0; pf->vf_num = 0; @@ -3585,14 +3805,13 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev) if (pf->max_nb_vmdq_vsi) { pf->flags |= I40E_FLAG_VMDQ; pf->vmdq_nb_qps = pf->vmdq_nb_qp_max; - PMD_DRV_LOG(DEBUG, "%u VMDQ VSIs, %u queues " - "per VMDQ VSI, in total %u queues", - pf->max_nb_vmdq_vsi, - pf->vmdq_nb_qps, pf->vmdq_nb_qps * - pf->max_nb_vmdq_vsi); + PMD_DRV_LOG(DEBUG, + "%u VMDQ VSIs, %u queues per VMDQ VSI, in total %u queues", + pf->max_nb_vmdq_vsi, pf->vmdq_nb_qps, + pf->vmdq_nb_qps * pf->max_nb_vmdq_vsi); } else { - PMD_DRV_LOG(INFO, "No enough queues left for " - "VMDq"); + PMD_DRV_LOG(INFO, + "No enough queues left for VMDq"); } } else { PMD_DRV_LOG(INFO, "No queue or VSI left for VMDq"); @@ -3605,15 +3824,15 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev) pf->flags |= I40E_FLAG_DCB; if (qp_count > hw->func_caps.num_tx_qp) { - PMD_DRV_LOG(ERR, "Failed to allocate %u queues, which exceeds " - "the hardware maximum %u", qp_count, - hw->func_caps.num_tx_qp); + PMD_DRV_LOG(ERR, + "Failed to allocate %u queues, which exceeds the hardware maximum %u", + qp_count, hw->func_caps.num_tx_qp); return -EINVAL; } if (vsi_count > hw->func_caps.num_vsis) { - PMD_DRV_LOG(ERR, "Failed to allocate %u VSIs, which exceeds " - "the hardware maximum %u", vsi_count, - hw->func_caps.num_vsis); + PMD_DRV_LOG(ERR, + "Failed to allocate %u VSIs, which exceeds the hardware maximum %u", + vsi_count, hw->func_caps.num_vsis); return -EINVAL; } @@ -3859,8 +4078,8 @@ i40e_res_pool_alloc(struct i40e_res_pool_info *pool, */ entry = rte_zmalloc("res_pool", sizeof(*entry), 0); if (entry == NULL) { - PMD_DRV_LOG(ERR, "Failed to allocate memory for " - "resource pool"); + PMD_DRV_LOG(ERR, + "Failed to allocate memory for resource pool"); return -ENOMEM; } entry->base = valid_entry->base; @@ -3900,9 +4119,9 @@ validate_tcmap_parameter(struct i40e_vsi *vsi, uint8_t enabled_tcmap) } if (!bitmap_is_subset(hw->func_caps.enabled_tcmap, enabled_tcmap)) { - PMD_DRV_LOG(ERR, "Enabled TC map 0x%x not applicable to " - "HW support 0x%x", hw->func_caps.enabled_tcmap, - enabled_tcmap); + PMD_DRV_LOG(ERR, + "Enabled TC map 0x%x not applicable to HW support 0x%x", + hw->func_caps.enabled_tcmap, enabled_tcmap); return I40E_NOT_SUPPORTED; } return I40E_SUCCESS; @@ -4108,18 +4327,10 @@ i40e_veb_setup(struct i40e_pf *pf, struct i40e_vsi *vsi) /* create floating veb if vsi is NULL */ if (vsi != NULL) { ret = i40e_aq_add_veb(hw, veb->uplink_seid, vsi->seid, -#ifdef TREX_PATCH_LOW_LATENCY - vsi->enabled_tc, false, -#else - I40E_DEFAULT_TCMAP, false, -#endif + I40E_DEFAULT_TCMAP, false, &veb->seid, false, NULL); } else { -#ifdef TREX_PATCH_LOW_LATENCY - ret = i40e_aq_add_veb(hw, 0, 0, vsi->enabled_tc, -#else ret = i40e_aq_add_veb(hw, 0, 0, I40E_DEFAULT_TCMAP, -#endif true, &veb->seid, false, NULL); } @@ -4133,7 +4344,7 @@ i40e_veb_setup(struct i40e_pf *pf, struct i40e_vsi *vsi) ret = i40e_aq_get_veb_parameters(hw, veb->seid, NULL, NULL, &veb->stats_idx, NULL, NULL, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Get veb statics index failed, aq_err: %d", + PMD_DRV_LOG(ERR, "Get veb statistics index failed, aq_err: %d", hw->aq.asq_last_status); goto fail; } @@ -4157,11 +4368,16 @@ i40e_vsi_release(struct i40e_vsi *vsi) void *temp; int ret; struct i40e_mac_filter *f; - uint16_t user_param = vsi->user_param; + uint16_t user_param; if (!vsi) return I40E_SUCCESS; + if (!vsi->adapter) + return -EFAULT; + + user_param = vsi->user_param; + pf = I40E_VSI_TO_PF(vsi); hw = I40E_VSI_TO_HW(vsi); @@ -4250,8 +4466,8 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi) struct i40e_mac_filter *f; struct ether_addr *mac; - PMD_DRV_LOG(WARNING, "Cannot remove the default " - "macvlan filter"); + PMD_DRV_LOG(WARNING, + "Cannot remove the default macvlan filter"); /* It needs to add the permanent mac into mac list */ f = rte_zmalloc("macv_filter", sizeof(*f), 0); if (f == NULL) { @@ -4273,57 +4489,6 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi) return i40e_vsi_add_mac(vsi, &filter); } -#ifdef TREX_PATCH_LOW_LATENCY -static int -i40e_vsi_update_tc_max_bw(struct i40e_vsi *vsi, u16 credit){ - struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); - int ret; - - if (!vsi->seid) { - PMD_DRV_LOG(ERR, "seid not valid"); - return -EINVAL; - } - - ret = i40e_aq_config_vsi_bw_limit(hw, vsi->seid, credit,0, NULL); - if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure TC BW"); - return ret; - } - return (0); -} - -static int -i40e_vsi_update_tc_bandwidth_ex(struct i40e_vsi *vsi) -{ - struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); - int i, ret; - struct i40e_aqc_configure_vsi_ets_sla_bw_data tc_bw_data; - struct i40e_aqc_configure_vsi_tc_bw_data * res_buffer; - - if (!vsi->seid) { - PMD_DRV_LOG(ERR, "seid not valid"); - return -EINVAL; - } - - memset(&tc_bw_data, 0, sizeof(tc_bw_data)); - tc_bw_data.tc_valid_bits = 3; - - /* enable TC 0,1 */ - ret = i40e_aq_config_vsi_ets_sla_bw_limit(hw, vsi->seid, &tc_bw_data, NULL); - if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure TC BW"); - return ret; - } - - vsi->enabled_tc=3; - res_buffer = ( struct i40e_aqc_configure_vsi_tc_bw_data *)&tc_bw_data; - (void)rte_memcpy(vsi->info.qs_handle, res_buffer->qs_handles, - sizeof(vsi->info.qs_handle)); - - return I40E_SUCCESS; -} -#endif - /* * i40e_vsi_get_bw_config - Query VSI BW Information * @vsi: the VSI to be queried @@ -4352,8 +4517,9 @@ i40e_vsi_get_bw_config(struct i40e_vsi *vsi) ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, &ets_sla_config, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "VSI failed to get TC bandwdith " - "configuration %u", hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "VSI failed to get TC bandwdith configuration %u", + hw->aq.asq_last_status); return ret; } @@ -4399,8 +4565,7 @@ i40e_enable_pf_lb(struct i40e_pf *pf) /* Use the FW API if FW >= v5.0 */ if (hw->aq.fw_maj_ver < 5) { - //TREX_PATCH - changed from ERR to INFO. Most of our customers do not have latest FW - PMD_INIT_LOG(INFO, "FW < v5.0, cannot enable loopback"); + PMD_INIT_LOG(ERR, "FW < v5.0, cannot enable loopback"); return; } @@ -4421,7 +4586,7 @@ i40e_enable_pf_lb(struct i40e_pf *pf) ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) - PMD_DRV_LOG(ERR, "update vsi switch failed, aq_err=%d\n", + PMD_DRV_LOG(ERR, "update vsi switch failed, aq_err=%d", hw->aq.asq_last_status); } @@ -4442,14 +4607,14 @@ i40e_vsi_setup(struct i40e_pf *pf, if (type != I40E_VSI_MAIN && type != I40E_VSI_SRIOV && uplink_vsi == NULL) { - PMD_DRV_LOG(ERR, "VSI setup failed, " - "VSI link shouldn't be NULL"); + PMD_DRV_LOG(ERR, + "VSI setup failed, VSI link shouldn't be NULL"); return NULL; } if (type == I40E_VSI_MAIN && uplink_vsi != NULL) { - PMD_DRV_LOG(ERR, "VSI setup failed, MAIN VSI " - "uplink VSI should be NULL"); + PMD_DRV_LOG(ERR, + "VSI setup failed, MAIN VSI uplink VSI should be NULL"); return NULL; } @@ -4493,6 +4658,7 @@ i40e_vsi_setup(struct i40e_pf *pf, vsi->max_macaddrs = I40E_NUM_MACADDR_MAX; vsi->parent_vsi = uplink_vsi ? uplink_vsi : pf->main_vsi; vsi->user_param = user_param; + vsi->vlan_anti_spoof_on = 0; /* Allocate queues */ switch (vsi->type) { case I40E_VSI_MAIN : @@ -4600,8 +4766,8 @@ i40e_vsi_setup(struct i40e_pf *pf, ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info, I40E_DEFAULT_TCMAP); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure " - "TC queue mapping"); + PMD_DRV_LOG(ERR, + "Failed to configure TC queue mapping"); goto fail_msix_alloc; } ctxt.seid = vsi->seid; @@ -4671,8 +4837,8 @@ i40e_vsi_setup(struct i40e_pf *pf, ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info, I40E_DEFAULT_TCMAP); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure " - "TC queue mapping"); + PMD_DRV_LOG(ERR, + "Failed to configure TC queue mapping"); goto fail_msix_alloc; } ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP; @@ -4714,8 +4880,8 @@ i40e_vsi_setup(struct i40e_pf *pf, ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info, I40E_DEFAULT_TCMAP); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure " - "TC queue mapping"); + PMD_DRV_LOG(ERR, + "Failed to configure TC queue mapping"); goto fail_msix_alloc; } ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP; @@ -4732,8 +4898,8 @@ i40e_vsi_setup(struct i40e_pf *pf, ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info, I40E_DEFAULT_TCMAP); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure " - "TC queue mapping."); + PMD_DRV_LOG(ERR, + "Failed to configure TC queue mapping."); goto fail_msix_alloc; } ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP; @@ -4996,8 +5162,9 @@ i40e_pf_setup(struct i40e_pf *pf) /* make queue allocated first, let FDIR use queue pair 0*/ ret = i40e_res_pool_alloc(&pf->qp_pool, I40E_DEFAULT_QP_NUM_FDIR); if (ret != I40E_FDIR_QUEUE_ID) { - PMD_DRV_LOG(ERR, "queue allocation fails for FDIR :" - " ret =%d", ret); + PMD_DRV_LOG(ERR, + "queue allocation fails for FDIR: ret =%d", + ret); pf->flags &= ~I40E_FLAG_FDIR; } } @@ -5016,12 +5183,12 @@ i40e_pf_setup(struct i40e_pf *pf) else if (hw->func_caps.rss_table_size == ETH_RSS_RETA_SIZE_512) settings.hash_lut_size = I40E_HASH_LUT_SIZE_512; else { - PMD_DRV_LOG(ERR, "Hash lookup table size (%u) not supported\n", - hw->func_caps.rss_table_size); + PMD_DRV_LOG(ERR, "Hash lookup table size (%u) not supported", + hw->func_caps.rss_table_size); return I40E_ERR_PARAM; } - PMD_DRV_LOG(INFO, "Hardware capability of hash lookup table " - "size: %u\n", hw->func_caps.rss_table_size); + PMD_DRV_LOG(INFO, "Hardware capability of hash lookup table size: %u", + hw->func_caps.rss_table_size); pf->hash_lut_size = hw->func_caps.rss_table_size; /* Enable ethtype and macvlan filters */ @@ -5271,8 +5438,8 @@ i40e_dev_rx_init(struct i40e_pf *pf) ret = i40e_rx_queue_init(rxq); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to do RX queue " - "initialization"); + PMD_DRV_LOG(ERR, + "Failed to do RX queue initialization"); break; } } @@ -5518,6 +5685,24 @@ i40e_dev_handle_vfr_event(struct rte_eth_dev *dev) } } +static void +i40e_notify_all_vfs_link_status(struct rte_eth_dev *dev) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct i40e_virtchnl_pf_event event; + int i; + + event.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE; + event.event_data.link_event.link_status = + dev->data->dev_link.link_status; + event.event_data.link_event.link_speed = + (enum i40e_aq_link_speed)dev->data->dev_link.link_speed; + + for (i = 0; i < pf->vf_num; i++) + i40e_pf_host_send_msg_to_vf(&pf->vfs[i], I40E_VIRTCHNL_OP_EVENT, + I40E_SUCCESS, (uint8_t *)&event, sizeof(event)); +} + static void i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) { @@ -5538,8 +5723,9 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) ret = i40e_clean_arq_element(hw, &info, &pending); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(INFO, "Failed to read msg from AdminQ, " - "aq_err: %u", hw->aq.asq_last_status); + PMD_DRV_LOG(INFO, + "Failed to read msg from AdminQ, aq_err: %u", + hw->aq.asq_last_status); break; } opcode = rte_le_to_cpu_16(info.desc.opcode); @@ -5554,6 +5740,14 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) info.msg_buf, info.msg_len); break; + case i40e_aqc_opc_get_link_status: + ret = i40e_dev_link_update(dev, 0); + if (!ret) { + i40e_notify_all_vfs_link_status(dev); + _rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC, NULL); + } + break; default: PMD_DRV_LOG(ERR, "Request %u is not supported yet", opcode); @@ -5563,81 +5757,30 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) rte_free(info.msg_buf); } -/* - * Interrupt handler is registered as the alarm callback for handling LSC - * interrupt in a definite of time, in order to wait the NIC into a stable - * state. Currently it waits 1 sec in i40e for the link up interrupt, and - * no need for link down interrupt. +/** + * Interrupt handler triggered by NIC for handling + * specific interrupt. + * + * @param handle + * Pointer to interrupt handle. + * @param param + * The address of parameter (struct rte_eth_dev *) regsitered before. + * + * @return + * void */ static void -i40e_dev_interrupt_delayed_handler(void *param) +i40e_dev_interrupt_handler(struct rte_intr_handle *intr_handle, + void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t icr0; - /* read interrupt causes again */ - icr0 = I40E_READ_REG(hw, I40E_PFINT_ICR0); - -#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER - if (icr0 & I40E_PFINT_ICR0_ECC_ERR_MASK) - PMD_DRV_LOG(ERR, "ICR0: unrecoverable ECC error\n"); - if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) - PMD_DRV_LOG(ERR, "ICR0: malicious programming detected\n"); - if (icr0 & I40E_PFINT_ICR0_GRST_MASK) - PMD_DRV_LOG(INFO, "ICR0: global reset requested\n"); - if (icr0 & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) - PMD_DRV_LOG(INFO, "ICR0: PCI exception\n activated\n"); - if (icr0 & I40E_PFINT_ICR0_STORM_DETECT_MASK) - PMD_DRV_LOG(INFO, "ICR0: a change in the storm control " - "state\n"); - if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK) - PMD_DRV_LOG(ERR, "ICR0: HMC error\n"); - if (icr0 & I40E_PFINT_ICR0_PE_CRITERR_MASK) - PMD_DRV_LOG(ERR, "ICR0: protocol engine critical error\n"); -#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */ - - if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) { - PMD_DRV_LOG(INFO, "INT:VF reset detected\n"); - i40e_dev_handle_vfr_event(dev); - } - if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { - PMD_DRV_LOG(INFO, "INT:ADMINQ event\n"); - i40e_dev_handle_aq_msg(dev); - } - - /* handle the link up interrupt in an alarm callback */ - i40e_dev_link_update(dev, 0); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); - - i40e_pf_enable_irq0(hw); - rte_intr_enable(&(dev->pci_dev->intr_handle)); -} - -/** - * Interrupt handler triggered by NIC for handling - * specific interrupt. - * - * @param handle - * Pointer to interrupt handle. - * @param param - * The address of parameter (struct rte_eth_dev *) regsitered before. - * - * @return - * void - */ -static void -i40e_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, - void *param) -{ - struct rte_eth_dev *dev = (struct rte_eth_dev *)param; - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - uint32_t icr0; - - /* Disable interrupt */ - i40e_pf_disable_irq0(hw); - - /* read out interrupt causes */ + /* Disable interrupt */ + i40e_pf_disable_irq0(hw); + + /* read out interrupt causes */ icr0 = I40E_READ_REG(hw, I40E_PFINT_ICR0); /* No interrupt event indicated */ @@ -5671,34 +5814,10 @@ i40e_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, i40e_dev_handle_aq_msg(dev); } - /* Link Status Change interrupt */ - if (icr0 & I40E_PFINT_ICR0_LINK_STAT_CHANGE_MASK) { -#define I40E_US_PER_SECOND 1000000 - struct rte_eth_link link; - - PMD_DRV_LOG(INFO, "ICR0: link status changed\n"); - memset(&link, 0, sizeof(link)); - rte_i40e_dev_atomic_read_link_status(dev, &link); - i40e_dev_link_update(dev, 0); - - /* - * For link up interrupt, it needs to wait 1 second to let the - * hardware be a stable state. Otherwise several consecutive - * interrupts can be observed. - * For link down interrupt, no need to wait. - */ - if (!link.link_status && rte_eal_alarm_set(I40E_US_PER_SECOND, - i40e_dev_interrupt_delayed_handler, (void *)dev) >= 0) - return; - else - _rte_eth_dev_callback_process(dev, - RTE_ETH_EVENT_INTR_LSC); - } - done: /* Enable interrupt */ i40e_pf_enable_irq0(hw); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); } static int @@ -5751,7 +5870,7 @@ i40e_add_macvlan_filters(struct i40e_vsi *vsi, flags = I40E_AQC_MACVLAN_ADD_HASH_MATCH; break; default: - PMD_DRV_LOG(ERR, "Invalid MAC match type\n"); + PMD_DRV_LOG(ERR, "Invalid MAC match type"); ret = I40E_ERR_PARAM; goto DONE; } @@ -5826,7 +5945,7 @@ i40e_remove_macvlan_filters(struct i40e_vsi *vsi, flags = I40E_AQC_MACVLAN_DEL_HASH_MATCH; break; default: - PMD_DRV_LOG(ERR, "Invalid MAC filter type\n"); + PMD_DRV_LOG(ERR, "Invalid MAC filter type"); ret = I40E_ERR_PARAM; goto DONE; } @@ -5881,14 +6000,11 @@ i40e_find_vlan_filter(struct i40e_vsi *vsi, } static void -i40e_set_vlan_filter(struct i40e_vsi *vsi, - uint16_t vlan_id, bool on) +i40e_store_vlan_filter(struct i40e_vsi *vsi, + uint16_t vlan_id, bool on) { uint32_t vid_idx, vid_bit; - if (vlan_id > ETH_VLAN_ID_MAX) - return; - vid_idx = I40E_VFTA_IDX(vlan_id); vid_bit = I40E_VFTA_BIT(vlan_id); @@ -5898,6 +6014,38 @@ i40e_set_vlan_filter(struct i40e_vsi *vsi, vsi->vfta[vid_idx] &= ~vid_bit; } +static void +i40e_set_vlan_filter(struct i40e_vsi *vsi, + uint16_t vlan_id, bool on) +{ + struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); + struct i40e_aqc_add_remove_vlan_element_data vlan_data = {0}; + int ret; + + if (vlan_id > ETH_VLAN_ID_MAX) + return; + + i40e_store_vlan_filter(vsi, vlan_id, on); + + if (!vsi->vlan_anti_spoof_on || !vlan_id) + return; + + vlan_data.vlan_tag = rte_cpu_to_le_16(vlan_id); + + if (on) { + ret = i40e_aq_add_vlan(hw, vsi->seid, + &vlan_data, 1, NULL); + if (ret != I40E_SUCCESS) + PMD_DRV_LOG(ERR, "Failed to add vlan filter"); + } else { + ret = i40e_aq_remove_vlan(hw, vsi->seid, + &vlan_data, 1, NULL); + if (ret != I40E_SUCCESS) + PMD_DRV_LOG(ERR, + "Failed to remove vlan filter"); + } +} + /** * Find all vlan options for specific mac addr, * return with actual vlan found. @@ -5923,8 +6071,8 @@ i40e_find_all_vlan_for_mac(struct i40e_vsi *vsi, for (k = 0; k < I40E_UINT32_BIT_SIZE; k++) { if (vsi->vfta[j] & (1 << k)) { if (i > num - 1) { - PMD_DRV_LOG(ERR, "vlan number " - "not match"); + PMD_DRV_LOG(ERR, + "vlan number doesn't match"); return I40E_ERR_PARAM; } (void)rte_memcpy(&mv_f[i].macaddr, @@ -5969,7 +6117,7 @@ i40e_find_all_mac_for_vlan(struct i40e_vsi *vsi, static int i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi) { - int i, num; + int i, j, num; struct i40e_mac_filter *f; struct i40e_macvlan_filter *mv_f; int ret = I40E_SUCCESS; @@ -5994,6 +6142,7 @@ i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi) TAILQ_FOREACH(f, &vsi->mac_list, next) { (void)rte_memcpy(&mv_f[i].macaddr, &f->mac_info.mac_addr, ETH_ADDR_LEN); + mv_f[i].filter_type = f->mac_info.filter_type; mv_f[i].vlan_id = 0; i++; } @@ -6003,6 +6152,8 @@ i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi) vsi->vlan_num, &f->mac_info.mac_addr); if (ret != I40E_SUCCESS) goto DONE; + for (j = i; j < i + vsi->vlan_num; j++) + mv_f[j].filter_type = f->mac_info.filter_type; i += vsi->vlan_num; } } @@ -6214,7 +6365,7 @@ i40e_vsi_delete_mac(struct i40e_vsi *vsi, struct ether_addr *addr) if (filter_type == RTE_MACVLAN_PERFECT_MATCH || filter_type == RTE_MACVLAN_HASH_MATCH) { if (vlan_num == 0) { - PMD_DRV_LOG(ERR, "VLAN number shouldn't be 0\n"); + PMD_DRV_LOG(ERR, "VLAN number shouldn't be 0"); return I40E_ERR_PARAM; } } else if (filter_type == RTE_MAC_PERFECT_MATCH || @@ -6256,7 +6407,7 @@ DONE: /* Configure hash enable flags for RSS */ uint64_t -i40e_config_hena(uint64_t flags) +i40e_config_hena(uint64_t flags, enum i40e_mac_type type) { uint64_t hena = 0; @@ -6265,20 +6416,42 @@ i40e_config_hena(uint64_t flags) if (flags & ETH_RSS_FRAG_IPV4) hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4; - if (flags & ETH_RSS_NONFRAG_IPV4_TCP) - hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP; - if (flags & ETH_RSS_NONFRAG_IPV4_UDP) - hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + if (flags & ETH_RSS_NONFRAG_IPV4_TCP) { + if (type == I40E_MAC_X722) { + hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + } else + hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + } + if (flags & ETH_RSS_NONFRAG_IPV4_UDP) { + if (type == I40E_MAC_X722) { + hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + } else + hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + } if (flags & ETH_RSS_NONFRAG_IPV4_SCTP) hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP; if (flags & ETH_RSS_NONFRAG_IPV4_OTHER) hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; if (flags & ETH_RSS_FRAG_IPV6) hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6; - if (flags & ETH_RSS_NONFRAG_IPV6_TCP) - hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP; - if (flags & ETH_RSS_NONFRAG_IPV6_UDP) - hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + if (flags & ETH_RSS_NONFRAG_IPV6_TCP) { + if (type == I40E_MAC_X722) { + hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); + } else + hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP; + } + if (flags & ETH_RSS_NONFRAG_IPV6_UDP) { + if (type == I40E_MAC_X722) { + hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + } else + hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + } if (flags & ETH_RSS_NONFRAG_IPV6_SCTP) hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP; if (flags & ETH_RSS_NONFRAG_IPV6_OTHER) @@ -6301,8 +6474,14 @@ i40e_parse_hena(uint64_t flags) rss_hf |= ETH_RSS_FRAG_IPV4; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP)) rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK)) + rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP)) rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP)) + rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP)) + rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP)) rss_hf |= ETH_RSS_NONFRAG_IPV4_SCTP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER)) @@ -6311,8 +6490,14 @@ i40e_parse_hena(uint64_t flags) rss_hf |= ETH_RSS_FRAG_IPV6; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP)) rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK)) + rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP)) rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP)) + rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) + rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP)) rss_hf |= ETH_RSS_NONFRAG_IPV6_SCTP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER)) @@ -6332,7 +6517,10 @@ i40e_pf_disable_rss(struct i40e_pf *pf) hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32; - hena &= ~I40E_RSS_HENA_ALL; + if (hw->mac.type == I40E_MAC_X722) + hena &= ~I40E_RSS_HENA_ALL_X722; + else + hena &= ~I40E_RSS_HENA_ALL; i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32)); I40E_WRITE_FLUSH(hw); @@ -6360,8 +6548,7 @@ i40e_set_rss_key(struct i40e_vsi *vsi, uint8_t *key, uint8_t key_len) ret = i40e_aq_set_rss_key(hw, vsi->vsi_id, key_dw); if (ret) - PMD_INIT_LOG(ERR, "Failed to configure RSS key " - "via AQ"); + PMD_INIT_LOG(ERR, "Failed to configure RSS key via AQ"); } else { uint32_t *hash_key = (uint32_t *)key; uint16_t i; @@ -6419,8 +6606,11 @@ i40e_hw_rss_hash_set(struct i40e_pf *pf, struct rte_eth_rss_conf *rss_conf) rss_hf = rss_conf->rss_hf; hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32; - hena &= ~I40E_RSS_HENA_ALL; - hena |= i40e_config_hena(rss_hf); + if (hw->mac.type == I40E_MAC_X722) + hena &= ~I40E_RSS_HENA_ALL_X722; + else + hena &= ~I40E_RSS_HENA_ALL; + hena |= i40e_config_hena(rss_hf, hw->mac.type); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32)); I40E_WRITE_FLUSH(hw); @@ -6439,7 +6629,9 @@ i40e_dev_rss_hash_update(struct rte_eth_dev *dev, hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32; - if (!(hena & I40E_RSS_HENA_ALL)) { /* RSS disabled */ + if (!(hena & ((hw->mac.type == I40E_MAC_X722) + ? I40E_RSS_HENA_ALL_X722 + : I40E_RSS_HENA_ALL))) { /* RSS disabled */ if (rss_hf != 0) /* Enable RSS */ return -EINVAL; return 0; /* Nothing to do */ @@ -6502,7 +6694,86 @@ i40e_dev_get_filter_type(uint16_t filter_type, uint16_t *flag) return 0; } +/* Convert tunnel filter structure */ +static int +i40e_tunnel_filter_convert(struct i40e_aqc_add_remove_cloud_filters_element_data + *cld_filter, + struct i40e_tunnel_filter *tunnel_filter) +{ + ether_addr_copy((struct ether_addr *)&cld_filter->outer_mac, + (struct ether_addr *)&tunnel_filter->input.outer_mac); + ether_addr_copy((struct ether_addr *)&cld_filter->inner_mac, + (struct ether_addr *)&tunnel_filter->input.inner_mac); + tunnel_filter->input.inner_vlan = cld_filter->inner_vlan; + tunnel_filter->input.flags = cld_filter->flags; + tunnel_filter->input.tenant_id = cld_filter->tenant_id; + tunnel_filter->queue = cld_filter->queue_number; + + return 0; +} + +/* Check if there exists the tunnel filter */ +struct i40e_tunnel_filter * +i40e_sw_tunnel_filter_lookup(struct i40e_tunnel_rule *tunnel_rule, + const struct i40e_tunnel_filter_input *input) +{ + int ret; + + ret = rte_hash_lookup(tunnel_rule->hash_table, (const void *)input); + if (ret < 0) + return NULL; + + return tunnel_rule->hash_map[ret]; +} + +/* Add a tunnel filter into the SW list */ static int +i40e_sw_tunnel_filter_insert(struct i40e_pf *pf, + struct i40e_tunnel_filter *tunnel_filter) +{ + struct i40e_tunnel_rule *rule = &pf->tunnel; + int ret; + + ret = rte_hash_add_key(rule->hash_table, &tunnel_filter->input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert tunnel filter to hash table %d!", + ret); + return ret; + } + rule->hash_map[ret] = tunnel_filter; + + TAILQ_INSERT_TAIL(&rule->tunnel_list, tunnel_filter, rules); + + return 0; +} + +/* Delete a tunnel filter from the SW list */ +int +i40e_sw_tunnel_filter_del(struct i40e_pf *pf, + struct i40e_tunnel_filter_input *input) +{ + struct i40e_tunnel_rule *rule = &pf->tunnel; + struct i40e_tunnel_filter *tunnel_filter; + int ret; + + ret = rte_hash_del_key(rule->hash_table, input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to delete tunnel filter to hash table %d!", + ret); + return ret; + } + tunnel_filter = rule->hash_map[ret]; + rule->hash_map[ret] = NULL; + + TAILQ_REMOVE(&rule->tunnel_list, tunnel_filter, rules); + rte_free(tunnel_filter); + + return 0; +} + +int i40e_dev_tunnel_filter_set(struct i40e_pf *pf, struct rte_eth_tunnel_filter_conf *tunnel_filter, uint8_t add) @@ -6517,6 +6788,9 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf, struct i40e_vsi *vsi = pf->main_vsi; struct i40e_aqc_add_remove_cloud_filters_element_data *cld_filter; struct i40e_aqc_add_remove_cloud_filters_element_data *pfilter; + struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel; + struct i40e_tunnel_filter *tunnel, *node; + struct i40e_tunnel_filter check_filter; /* Check if filter exists */ cld_filter = rte_zmalloc("tunnel_filter", sizeof(struct i40e_aqc_add_remove_cloud_filters_element_data), @@ -6579,11 +6853,38 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf, pfilter->tenant_id = rte_cpu_to_le_32(tunnel_filter->tenant_id); pfilter->queue_number = rte_cpu_to_le_16(tunnel_filter->queue_id); - if (add) + /* Check if there is the filter in SW list */ + memset(&check_filter, 0, sizeof(check_filter)); + i40e_tunnel_filter_convert(cld_filter, &check_filter); + node = i40e_sw_tunnel_filter_lookup(tunnel_rule, &check_filter.input); + if (add && node) { + PMD_DRV_LOG(ERR, "Conflict with existing tunnel rules!"); + return -EINVAL; + } + + if (!add && !node) { + PMD_DRV_LOG(ERR, "There's no corresponding tunnel filter!"); + return -EINVAL; + } + + if (add) { ret = i40e_aq_add_cloud_filters(hw, vsi->seid, cld_filter, 1); - else + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to add a tunnel filter."); + return ret; + } + tunnel = rte_zmalloc("tunnel_filter", sizeof(*tunnel), 0); + rte_memcpy(tunnel, &check_filter, sizeof(check_filter)); + ret = i40e_sw_tunnel_filter_insert(pf, tunnel); + } else { ret = i40e_aq_remove_cloud_filters(hw, vsi->seid, - cld_filter, 1); + cld_filter, 1); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to delete a tunnel filter."); + return ret; + } + ret = i40e_sw_tunnel_filter_del(pf, &node->input); + } rte_free(cld_filter); return ret; @@ -6620,8 +6921,9 @@ i40e_add_vxlan_port(struct i40e_pf *pf, uint16_t port) /* Now check if there is space to add the new port */ idx = i40e_get_vxlan_port_idx(pf, 0); if (idx < 0) { - PMD_DRV_LOG(ERR, "Maximum number of UDP ports reached," - "not adding port %d", port); + PMD_DRV_LOG(ERR, + "Maximum number of UDP ports reached, not adding port %d", + port); return -ENOSPC; } @@ -6860,7 +7162,7 @@ i40e_dev_set_gre_key_len(struct i40e_hw *hw, uint8_t len) int ret = -EINVAL; val = I40E_READ_REG(hw, I40E_GL_PRS_FVBM(2)); - PMD_DRV_LOG(DEBUG, "Read original GL_PRS_FVBM with 0x%08x\n", val); + PMD_DRV_LOG(DEBUG, "Read original GL_PRS_FVBM with 0x%08x", val); if (len == 3) { reg = val | I40E_GL_PRS_FVBM_MSK_ENA; @@ -6879,7 +7181,7 @@ i40e_dev_set_gre_key_len(struct i40e_hw *hw, uint8_t len) } else { ret = 0; } - PMD_DRV_LOG(DEBUG, "Read modified GL_PRS_FVBM with 0x%08x\n", + PMD_DRV_LOG(DEBUG, "Read modified GL_PRS_FVBM with 0x%08x", I40E_READ_REG(hw, I40E_GL_PRS_FVBM(2))); return ret; @@ -6992,15 +7294,15 @@ i40e_set_symmetric_hash_enable_per_port(struct i40e_hw *hw, uint8_t enable) if (enable > 0) { if (reg & I40E_PRTQF_CTL_0_HSYM_ENA_MASK) { - PMD_DRV_LOG(INFO, "Symmetric hash has already " - "been enabled"); + PMD_DRV_LOG(INFO, + "Symmetric hash has already been enabled"); return; } reg |= I40E_PRTQF_CTL_0_HSYM_ENA_MASK; } else { if (!(reg & I40E_PRTQF_CTL_0_HSYM_ENA_MASK)) { - PMD_DRV_LOG(INFO, "Symmetric hash has already " - "been disabled"); + PMD_DRV_LOG(INFO, + "Symmetric hash has already been disabled"); return; } reg &= ~I40E_PRTQF_CTL_0_HSYM_ENA_MASK; @@ -7124,16 +7426,16 @@ i40e_set_hash_filter_global_config(struct i40e_hw *hw, if (g_cfg->hash_func == RTE_ETH_HASH_FUNCTION_TOEPLITZ) { /* Toeplitz */ if (reg & I40E_GLQF_CTL_HTOEP_MASK) { - PMD_DRV_LOG(DEBUG, "Hash function already set to " - "Toeplitz"); + PMD_DRV_LOG(DEBUG, + "Hash function already set to Toeplitz"); goto out; } reg |= I40E_GLQF_CTL_HTOEP_MASK; } else if (g_cfg->hash_func == RTE_ETH_HASH_FUNCTION_SIMPLE_XOR) { /* Simple XOR */ if (!(reg & I40E_GLQF_CTL_HTOEP_MASK)) { - PMD_DRV_LOG(DEBUG, "Hash function already set to " - "Simple XOR"); + PMD_DRV_LOG(DEBUG, + "Hash function already set to Simple XOR"); goto out; } reg &= ~I40E_GLQF_CTL_HTOEP_MASK; @@ -7176,6 +7478,24 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV4_TOS | + I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL | + I40E_INSET_TUNNEL_DMAC | I40E_INSET_TUNNEL_ID | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | + I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV4_TOS | + I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL | + I40E_INSET_TUNNEL_DMAC | I40E_INSET_TUNNEL_ID | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | + I40E_INSET_FLEX_PAYLOAD, [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = I40E_INSET_DMAC | I40E_INSET_SMAC | I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | @@ -7185,6 +7505,15 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV4_TOS | + I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL | + I40E_INSET_TUNNEL_DMAC | I40E_INSET_TUNNEL_ID | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | + I40E_INSET_TCP_FLAGS | I40E_INSET_FLEX_PAYLOAD, [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] = I40E_INSET_DMAC | I40E_INSET_SMAC | I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | @@ -7218,6 +7547,24 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV6_TC | + I40E_INSET_IPV6_FLOW | I40E_INSET_IPV6_NEXT_HDR | + I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC | + I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | + I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | + I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV6_TC | + I40E_INSET_IPV6_FLOW | I40E_INSET_IPV6_NEXT_HDR | + I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC | + I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | + I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | + I40E_INSET_FLEX_PAYLOAD, [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] = I40E_INSET_DMAC | I40E_INSET_SMAC | I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | @@ -7227,6 +7574,15 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV6_TC | + I40E_INSET_IPV6_FLOW | I40E_INSET_IPV6_NEXT_HDR | + I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC | + I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | + I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | + I40E_INSET_FLEX_PAYLOAD, [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] = I40E_INSET_DMAC | I40E_INSET_SMAC | I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | @@ -7266,11 +7622,26 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] = I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | @@ -7292,11 +7663,26 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] = I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] = I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | @@ -7340,7 +7726,7 @@ i40e_validate_input_set(enum i40e_filter_pctype pctype, } /* default input set fields combination per pctype */ -static uint64_t +uint64_t i40e_get_default_input_set(uint16_t pctype) { static const uint64_t default_inset_table[] = { @@ -7349,9 +7735,18 @@ i40e_get_default_input_set(uint16_t pctype) [I40E_FILTER_PCTYPE_NONF_IPV4_UDP] = I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] = + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] = + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] = + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] = I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | @@ -7363,9 +7758,18 @@ i40e_get_default_input_set(uint16_t pctype) [I40E_FILTER_PCTYPE_NONF_IPV6_UDP] = I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] = + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] = + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] = I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] = + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] = I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | @@ -7484,25 +7888,23 @@ i40e_parse_input_set(uint64_t *inset, * and vice versa */ static uint64_t -i40e_translate_input_set_reg(uint64_t input) +i40e_translate_input_set_reg(enum i40e_mac_type type, uint64_t input) { uint64_t val = 0; uint16_t i; - static const struct { + struct inset_map { uint64_t inset; uint64_t inset_reg; - } inset_map[] = { + }; + + static const struct inset_map inset_map_common[] = { {I40E_INSET_DMAC, I40E_REG_INSET_L2_DMAC}, {I40E_INSET_SMAC, I40E_REG_INSET_L2_SMAC}, {I40E_INSET_VLAN_OUTER, I40E_REG_INSET_L2_OUTER_VLAN}, {I40E_INSET_VLAN_INNER, I40E_REG_INSET_L2_INNER_VLAN}, {I40E_INSET_LAST_ETHER_TYPE, I40E_REG_INSET_LAST_ETHER_TYPE}, - {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4}, - {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4}, {I40E_INSET_IPV4_TOS, I40E_REG_INSET_L3_IP4_TOS}, - {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO}, - {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL}, {I40E_INSET_IPV6_SRC, I40E_REG_INSET_L3_SRC_IP6}, {I40E_INSET_IPV6_DST, I40E_REG_INSET_L3_DST_IP6}, {I40E_INSET_IPV6_TC, I40E_REG_INSET_L3_IP6_TC}, @@ -7531,13 +7933,40 @@ i40e_translate_input_set_reg(uint64_t input) {I40E_INSET_FLEX_PAYLOAD_W8, I40E_REG_INSET_FLEX_PAYLOAD_WORD8}, }; + /* some different registers map in x722*/ + static const struct inset_map inset_map_diff_x722[] = { + {I40E_INSET_IPV4_SRC, I40E_X722_REG_INSET_L3_SRC_IP4}, + {I40E_INSET_IPV4_DST, I40E_X722_REG_INSET_L3_DST_IP4}, + {I40E_INSET_IPV4_PROTO, I40E_X722_REG_INSET_L3_IP4_PROTO}, + {I40E_INSET_IPV4_TTL, I40E_X722_REG_INSET_L3_IP4_TTL}, + }; + + static const struct inset_map inset_map_diff_not_x722[] = { + {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4}, + {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4}, + {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO}, + {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL}, + }; + if (input == 0) return val; /* Translate input set to register aware inset */ - for (i = 0; i < RTE_DIM(inset_map); i++) { - if (input & inset_map[i].inset) - val |= inset_map[i].inset_reg; + if (type == I40E_MAC_X722) { + for (i = 0; i < RTE_DIM(inset_map_diff_x722); i++) { + if (input & inset_map_diff_x722[i].inset) + val |= inset_map_diff_x722[i].inset_reg; + } + } else { + for (i = 0; i < RTE_DIM(inset_map_diff_not_x722); i++) { + if (input & inset_map_diff_not_x722[i].inset) + val |= inset_map_diff_not_x722[i].inset_reg; + } + } + + for (i = 0; i < RTE_DIM(inset_map_common); i++) { + if (input & inset_map_common[i].inset) + val |= inset_map_common[i].inset_reg; } return val; @@ -7596,10 +8025,10 @@ i40e_check_write_reg(struct i40e_hw *hw, uint32_t addr, uint32_t val) { uint32_t reg = i40e_read_rx_ctl(hw, addr); - PMD_DRV_LOG(DEBUG, "[0x%08x] original: 0x%08x\n", addr, reg); + PMD_DRV_LOG(DEBUG, "[0x%08x] original: 0x%08x", addr, reg); if (reg != val) i40e_write_rx_ctl(hw, addr, val); - PMD_DRV_LOG(DEBUG, "[0x%08x] after: 0x%08x\n", addr, + PMD_DRV_LOG(DEBUG, "[0x%08x] after: 0x%08x", addr, (uint32_t)i40e_read_rx_ctl(hw, addr)); } @@ -7614,15 +8043,22 @@ i40e_filter_input_set_init(struct i40e_pf *pf) for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) { - if (!I40E_VALID_PCTYPE(pctype)) - continue; + if (hw->mac.type == I40E_MAC_X722) { + if (!I40E_VALID_PCTYPE_X722(pctype)) + continue; + } else { + if (!I40E_VALID_PCTYPE(pctype)) + continue; + } + input_set = i40e_get_default_input_set(pctype); num = i40e_generate_inset_mask_reg(input_set, mask_reg, I40E_INSET_MASK_NUM_REG); if (num < 0) return; - inset_reg = i40e_translate_input_set_reg(input_set); + inset_reg = i40e_translate_input_set_reg(hw->mac.type, + input_set); i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 0), (uint32_t)(inset_reg & UINT32_MAX)); @@ -7680,7 +8116,15 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw, PMD_DRV_LOG(ERR, "invalid flow_type input."); return -EINVAL; } - pctype = i40e_flowtype_to_pctype(conf->flow_type); + + if (hw->mac.type == I40E_MAC_X722) { + /* get translated pctype value in fd pctype register */ + pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl(hw, + I40E_GLQF_FD_PCTYPES((int)i40e_flowtype_to_pctype( + conf->flow_type))); + } else + pctype = i40e_flowtype_to_pctype(conf->flow_type); + ret = i40e_parse_input_set(&input_set, pctype, conf->field, conf->inset_size); if (ret) { @@ -7704,7 +8148,7 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw, if (num < 0) return -EINVAL; - inset_reg |= i40e_translate_input_set_reg(input_set); + inset_reg |= i40e_translate_input_set_reg(hw->mac.type, input_set); i40e_check_write_reg(hw, I40E_GLQF_HASH_INSET(0, pctype), (uint32_t)(inset_reg & UINT32_MAX)); @@ -7749,7 +8193,9 @@ i40e_fdir_filter_inset_select(struct i40e_pf *pf, PMD_DRV_LOG(ERR, "invalid flow_type input."); return -EINVAL; } + pctype = i40e_flowtype_to_pctype(conf->flow_type); + ret = i40e_parse_input_set(&input_set, pctype, conf->field, conf->inset_size); if (ret) { @@ -7780,7 +8226,7 @@ i40e_fdir_filter_inset_select(struct i40e_pf *pf, if (num < 0) return -EINVAL; - inset_reg |= i40e_translate_input_set_reg(input_set); + inset_reg |= i40e_translate_input_set_reg(hw->mac.type, input_set); i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 0), (uint32_t)(inset_reg & UINT32_MAX)); @@ -7893,16 +8339,95 @@ i40e_hash_filter_ctrl(struct rte_eth_dev *dev, return ret; } +/* Convert ethertype filter structure */ +static int +i40e_ethertype_filter_convert(const struct rte_eth_ethertype_filter *input, + struct i40e_ethertype_filter *filter) +{ + rte_memcpy(&filter->input.mac_addr, &input->mac_addr, ETHER_ADDR_LEN); + filter->input.ether_type = input->ether_type; + filter->flags = input->flags; + filter->queue = input->queue; + + return 0; +} + +/* Check if there exists the ehtertype filter */ +struct i40e_ethertype_filter * +i40e_sw_ethertype_filter_lookup(struct i40e_ethertype_rule *ethertype_rule, + const struct i40e_ethertype_filter_input *input) +{ + int ret; + + ret = rte_hash_lookup(ethertype_rule->hash_table, (const void *)input); + if (ret < 0) + return NULL; + + return ethertype_rule->hash_map[ret]; +} + +/* Add ethertype filter in SW list */ +static int +i40e_sw_ethertype_filter_insert(struct i40e_pf *pf, + struct i40e_ethertype_filter *filter) +{ + struct i40e_ethertype_rule *rule = &pf->ethertype; + int ret; + + ret = rte_hash_add_key(rule->hash_table, &filter->input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert ethertype filter" + " to hash table %d!", + ret); + return ret; + } + rule->hash_map[ret] = filter; + + TAILQ_INSERT_TAIL(&rule->ethertype_list, filter, rules); + + return 0; +} + +/* Delete ethertype filter in SW list */ +int +i40e_sw_ethertype_filter_del(struct i40e_pf *pf, + struct i40e_ethertype_filter_input *input) +{ + struct i40e_ethertype_rule *rule = &pf->ethertype; + struct i40e_ethertype_filter *filter; + int ret; + + ret = rte_hash_del_key(rule->hash_table, input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to delete ethertype filter" + " to hash table %d!", + ret); + return ret; + } + filter = rule->hash_map[ret]; + rule->hash_map[ret] = NULL; + + TAILQ_REMOVE(&rule->ethertype_list, filter, rules); + rte_free(filter); + + return 0; +} + /* * Configure ethertype filter, which can director packet by filtering * with mac address and ether_type or only ether_type */ -static int +int i40e_ethertype_filter_set(struct i40e_pf *pf, struct rte_eth_ethertype_filter *filter, bool add) { struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype; + struct i40e_ethertype_filter *ethertype_filter, *node; + struct i40e_ethertype_filter check_filter; struct i40e_control_filter_stats stats; uint16_t flags = 0; int ret; @@ -7913,13 +8438,29 @@ i40e_ethertype_filter_set(struct i40e_pf *pf, } if (filter->ether_type == ETHER_TYPE_IPv4 || filter->ether_type == ETHER_TYPE_IPv6) { - PMD_DRV_LOG(ERR, "unsupported ether_type(0x%04x) in" - " control packet filter.", filter->ether_type); + PMD_DRV_LOG(ERR, + "unsupported ether_type(0x%04x) in control packet filter.", + filter->ether_type); return -EINVAL; } if (filter->ether_type == ETHER_TYPE_VLAN) - PMD_DRV_LOG(WARNING, "filter vlan ether_type in first tag is" - " not supported."); + PMD_DRV_LOG(WARNING, + "filter vlan ether_type in first tag is not supported."); + + /* Check if there is the filter in SW list */ + memset(&check_filter, 0, sizeof(check_filter)); + i40e_ethertype_filter_convert(filter, &check_filter); + node = i40e_sw_ethertype_filter_lookup(ethertype_rule, + &check_filter.input); + if (add && node) { + PMD_DRV_LOG(ERR, "Conflict with existing ethertype rules!"); + return -EINVAL; + } + + if (!add && !node) { + PMD_DRV_LOG(ERR, "There's no corresponding ethertype filter!"); + return -EINVAL; + } if (!(filter->flags & RTE_ETHTYPE_FLAGS_MAC)) flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC; @@ -7934,14 +8475,25 @@ i40e_ethertype_filter_set(struct i40e_pf *pf, pf->main_vsi->seid, filter->queue, add, &stats, NULL); - PMD_DRV_LOG(INFO, "add/rem control packet filter, return %d," - " mac_etype_used = %u, etype_used = %u," - " mac_etype_free = %u, etype_free = %u\n", - ret, stats.mac_etype_used, stats.etype_used, - stats.mac_etype_free, stats.etype_free); + PMD_DRV_LOG(INFO, + "add/rem control packet filter, return %d, mac_etype_used = %u, etype_used = %u, mac_etype_free = %u, etype_free = %u", + ret, stats.mac_etype_used, stats.etype_used, + stats.mac_etype_free, stats.etype_free); if (ret < 0) return -ENOSYS; - return 0; + + /* Add or delete a filter in SW list */ + if (add) { + ethertype_filter = rte_zmalloc("ethertype_filter", + sizeof(*ethertype_filter), 0); + rte_memcpy(ethertype_filter, &check_filter, + sizeof(check_filter)); + ret = i40e_sw_ethertype_filter_insert(pf, ethertype_filter); + } else { + ret = i40e_sw_ethertype_filter_del(pf, &node->input); + } + + return ret; } /* @@ -7976,7 +8528,7 @@ i40e_ethertype_filter_handle(struct rte_eth_dev *dev, FALSE); break; default: - PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op); + PMD_DRV_LOG(ERR, "unsupported operation %u", filter_op); ret = -ENOSYS; break; } @@ -8014,6 +8566,11 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev, case RTE_ETH_FILTER_FDIR: ret = i40e_fdir_ctrl_func(dev, filter_op, arg); break; + case RTE_ETH_FILTER_GENERIC: + if (filter_op != RTE_ETH_FILTER_GET) + return -EINVAL; + *(const void **)arg = &i40e_flow_ops; + break; default: PMD_DRV_LOG(WARNING, "Filter type (%d) not supported", filter_type); @@ -8031,10 +8588,11 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev, static void i40e_enable_extended_tag(struct rte_eth_dev *dev) { + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); uint32_t buf = 0; int ret; - ret = rte_eal_pci_read_config(dev->pci_dev, &buf, sizeof(buf), + ret = rte_eal_pci_read_config(pci_dev, &buf, sizeof(buf), PCI_DEV_CAP_REG); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to read PCI offset 0x%x", @@ -8047,7 +8605,7 @@ i40e_enable_extended_tag(struct rte_eth_dev *dev) } buf = 0; - ret = rte_eal_pci_read_config(dev->pci_dev, &buf, sizeof(buf), + ret = rte_eal_pci_read_config(pci_dev, &buf, sizeof(buf), PCI_DEV_CTRL_REG); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to read PCI offset 0x%x", @@ -8059,7 +8617,7 @@ i40e_enable_extended_tag(struct rte_eth_dev *dev) return; } buf |= PCI_DEV_CTRL_EXT_TAG_MASK; - ret = rte_eal_pci_write_config(dev->pci_dev, &buf, sizeof(buf), + ret = rte_eal_pci_write_config(pci_dev, &buf, sizeof(buf), PCI_DEV_CTRL_REG); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to write PCI offset 0x%x", @@ -8122,8 +8680,14 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype) [I40E_FILTER_PCTYPE_FRAG_IPV4] = RTE_ETH_FLOW_FRAG_IPV4, [I40E_FILTER_PCTYPE_NONF_IPV4_UDP] = RTE_ETH_FLOW_NONFRAG_IPV4_UDP, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] = + RTE_ETH_FLOW_NONFRAG_IPV4_UDP, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] = + RTE_ETH_FLOW_NONFRAG_IPV4_UDP, [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = RTE_ETH_FLOW_NONFRAG_IPV4_TCP, + [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] = + RTE_ETH_FLOW_NONFRAG_IPV4_TCP, [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] = RTE_ETH_FLOW_NONFRAG_IPV4_SCTP, [I40E_FILTER_PCTYPE_NONF_IPV4_OTHER] = @@ -8131,8 +8695,14 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype) [I40E_FILTER_PCTYPE_FRAG_IPV6] = RTE_ETH_FLOW_FRAG_IPV6, [I40E_FILTER_PCTYPE_NONF_IPV6_UDP] = RTE_ETH_FLOW_NONFRAG_IPV6_UDP, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] = + RTE_ETH_FLOW_NONFRAG_IPV6_UDP, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] = + RTE_ETH_FLOW_NONFRAG_IPV6_UDP, [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] = RTE_ETH_FLOW_NONFRAG_IPV6_TCP, + [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] = + RTE_ETH_FLOW_NONFRAG_IPV6_TCP, [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] = RTE_ETH_FLOW_NONFRAG_IPV6_SCTP, [I40E_FILTER_PCTYPE_NONF_IPV6_OTHER] = @@ -8168,6 +8738,23 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype) #define I40E_GL_SWR_PM_UP_THR_SF_VALUE 0x06060606 #define I40E_GL_SWR_PM_UP_THR 0x269FBC +static int +i40e_dev_sync_phy_type(struct i40e_hw *hw) +{ + enum i40e_status_code status; + struct i40e_aq_get_phy_abilities_resp phy_ab; + int ret = -ENOTSUP; + + status = i40e_aq_get_phy_capabilities(hw, false, true, &phy_ab, + NULL); + + if (status) + return ret; + + return 0; +} + + static void i40e_configure_registers(struct i40e_hw *hw) { @@ -8185,7 +8772,8 @@ i40e_configure_registers(struct i40e_hw *hw) for (i = 0; i < RTE_DIM(reg_table); i++) { if (reg_table[i].addr == I40E_GL_SWR_PM_UP_THR) { - if (i40e_is_40G_device(hw->device_id)) /* For XL710 */ + if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types) || /* For XL710 */ + I40E_PHY_TYPE_SUPPORT_25G(hw->phy.phy_types)) /* For XXV710 */ reg_table[i].val = I40E_GL_SWR_PM_UP_THR_SF_VALUE; else /* For X710 */ @@ -8208,9 +8796,9 @@ i40e_configure_registers(struct i40e_hw *hw) ret = i40e_aq_debug_write_register(hw, reg_table[i].addr, reg_table[i].val, NULL); if (ret < 0) { - PMD_DRV_LOG(ERR, "Failed to write 0x%"PRIx64" to the " - "address of 0x%"PRIx32, reg_table[i].val, - reg_table[i].addr); + PMD_DRV_LOG(ERR, + "Failed to write 0x%"PRIx64" to the address of 0x%"PRIx32, + reg_table[i].val, reg_table[i].addr); break; } PMD_DRV_LOG(DEBUG, "Write 0x%"PRIx64" to the address of " @@ -8255,8 +8843,9 @@ i40e_config_qinq(struct i40e_hw *hw, struct i40e_vsi *vsi) I40E_VSI_L2TAGSTXVALID( vsi->vsi_id), reg, NULL); if (ret < 0) { - PMD_DRV_LOG(ERR, "Failed to update " - "VSI_L2TAGSTXVALID[%d]", vsi->vsi_id); + PMD_DRV_LOG(ERR, + "Failed to update VSI_L2TAGSTXVALID[%d]", + vsi->vsi_id); return I40E_ERR_CONFIG; } } @@ -8307,11 +8896,10 @@ i40e_aq_add_mirror_rule(struct i40e_hw *hw, rte_memcpy(&desc.params.raw, &cmd, sizeof(cmd)); status = i40e_asq_send_command(hw, &desc, entries, buff_len, NULL); - PMD_DRV_LOG(INFO, "i40e_aq_add_mirror_rule, aq_status %d," - "rule_id = %u" - " mirror_rules_used = %u, mirror_rules_free = %u,", - hw->aq.asq_last_status, resp->rule_id, - resp->mirror_rules_used, resp->mirror_rules_free); + PMD_DRV_LOG(INFO, + "i40e_aq_add_mirror_rule, aq_status %d, rule_id = %u mirror_rules_used = %u, mirror_rules_free = %u,", + hw->aq.asq_last_status, resp->rule_id, + resp->mirror_rules_used, resp->mirror_rules_free); *rule_id = rte_le_to_cpu_16(resp->rule_id); return status; @@ -8389,8 +8977,8 @@ i40e_mirror_rule_set(struct rte_eth_dev *dev, PMD_DRV_LOG(DEBUG, "i40e_mirror_rule_set: sw_id = %d.", sw_id); if (pf->main_vsi->veb == NULL || pf->vfs == NULL) { - PMD_DRV_LOG(ERR, "mirror rule can not be configured" - " without veb or vfs."); + PMD_DRV_LOG(ERR, + "mirror rule can not be configured without veb or vfs."); return -ENOSYS; } if (pf->nb_mirror_rule > I40E_MAX_MIRROR_RULES) { @@ -8422,9 +9010,9 @@ i40e_mirror_rule_set(struct rte_eth_dev *dev, mirr_rule->entries, mirr_rule->num_entries, mirr_rule->id); if (ret < 0) { - PMD_DRV_LOG(ERR, "failed to remove mirror rule:" - " ret = %d, aq_err = %d.", - ret, hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "failed to remove mirror rule: ret = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOSYS; } TAILQ_REMOVE(&pf->mirror_list, mirr_rule, rules); @@ -8513,9 +9101,9 @@ i40e_mirror_rule_set(struct rte_eth_dev *dev, mirr_rule->rule_type, mirr_rule->entries, j, &rule_id); if (ret < 0) { - PMD_DRV_LOG(ERR, "failed to add mirror rule:" - " ret = %d, aq_err = %d.", - ret, hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "failed to add mirror rule: ret = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); rte_free(mirr_rule); return -ENOSYS; } @@ -8567,9 +9155,9 @@ i40e_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t sw_id) mirr_rule->entries, mirr_rule->num_entries, mirr_rule->id); if (ret < 0) { - PMD_DRV_LOG(ERR, "failed to remove mirror rule:" - " status = %d, aq_err = %d.", - ret, hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "failed to remove mirror rule: status = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOSYS; } TAILQ_REMOVE(&pf->mirror_list, mirr_rule, rules); @@ -9001,9 +9589,9 @@ i40e_config_switch_comp_tc(struct i40e_veb *veb, uint8_t tc_map) ret = i40e_aq_config_switch_comp_bw_config(hw, veb->seid, &veb_bw, NULL); if (ret) { - PMD_INIT_LOG(ERR, "AQ command Config switch_comp BW allocation" - " per TC failed = %d", - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, + "AQ command Config switch_comp BW allocation per TC failed = %d", + hw->aq.asq_last_status); return ret; } @@ -9011,16 +9599,18 @@ i40e_config_switch_comp_tc(struct i40e_veb *veb, uint8_t tc_map) ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid, &ets_query, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to get switch_comp ETS" - " configuration %u", hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "Failed to get switch_comp ETS configuration %u", + hw->aq.asq_last_status); return ret; } memset(&bw_query, 0, sizeof(bw_query)); ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid, &bw_query, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to get switch_comp bandwidth" - " configuration %u", hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "Failed to get switch_comp bandwidth configuration %u", + hw->aq.asq_last_status); return ret; } @@ -9085,8 +9675,8 @@ i40e_vsi_config_tc(struct i40e_vsi *vsi, uint8_t tc_map) } ret = i40e_aq_config_vsi_tc_bw(hw, vsi->seid, &bw_data, NULL); if (ret) { - PMD_INIT_LOG(ERR, "AQ command Config VSI BW allocation" - " per TC failed = %d", + PMD_INIT_LOG(ERR, + "AQ command Config VSI BW allocation per TC failed = %d", hw->aq.asq_last_status); goto out; } @@ -9107,9 +9697,8 @@ i40e_vsi_config_tc(struct i40e_vsi *vsi, uint8_t tc_map) /* Update the VSI after updating the VSI queue-mapping information */ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { - PMD_INIT_LOG(ERR, "Failed to configure " - "TC queue mapping = %d", - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, "Failed to configure TC queue mapping = %d", + hw->aq.asq_last_status); goto out; } /* update the local VSI info with updated queue map */ @@ -9161,8 +9750,8 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, /* Use the FW API if FW > v4.4*/ if (!(((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver >= 4)) || (hw->aq.fw_maj_ver >= 5))) { - PMD_INIT_LOG(ERR, "FW < v4.4, can not use FW LLDP API" - " to configure DCB"); + PMD_INIT_LOG(ERR, + "FW < v4.4, can not use FW LLDP API to configure DCB"); return I40E_ERR_FIRMWARE_API_VERSION; } @@ -9177,8 +9766,7 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, old_cfg->etsrec = old_cfg->etscfg; ret = i40e_set_dcb_config(hw); if (ret) { - PMD_INIT_LOG(ERR, - "Set DCB Config failed, err %s aq_err %s\n", + PMD_INIT_LOG(ERR, "Set DCB Config failed, err %s aq_err %s", i40e_stat_str(hw, ret), i40e_aq_str(hw, hw->aq.asq_last_status)); return ret; @@ -9210,7 +9798,7 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, ret = i40e_config_switch_comp_tc(main_vsi->veb, tc_map); if (ret) PMD_INIT_LOG(WARNING, - "Failed configuring TC for VEB seid=%d\n", + "Failed configuring TC for VEB seid=%d", main_vsi->veb->seid); } /* Update each VSI */ @@ -9228,8 +9816,8 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, I40E_DEFAULT_TCMAP); if (ret) PMD_INIT_LOG(WARNING, - "Failed configuring TC for VSI seid=%d\n", - vsi_list->vsi->seid); + "Failed configuring TC for VSI seid=%d", + vsi_list->vsi->seid); /* continue */ } } @@ -9243,7 +9831,6 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, * * Returns 0 on success, negative value on failure */ -//TREX_PATCH - changed all ERR to INFO in below func static int i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) { @@ -9252,7 +9839,7 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) int ret = 0; if ((pf->flags & I40E_FLAG_DCB) == 0) { - PMD_INIT_LOG(INFO, "HW doesn't support DCB"); + PMD_INIT_LOG(ERR, "HW doesn't support DCB"); return -ENOTSUP; } @@ -9261,29 +9848,21 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) * LLDP MIB change event. */ if (sw_dcb == TRUE) { - ret = i40e_aq_stop_lldp(hw, TRUE, NULL); - if (ret != I40E_SUCCESS) - PMD_INIT_LOG(DEBUG, "Failed to stop lldp"); - ret = i40e_init_dcb(hw); - /* if sw_dcb, lldp agent is stopped, the return from + /* If lldp agent is stopped, the return value from * i40e_init_dcb we expect is failure with I40E_AQ_RC_EPERM - * adminq status. + * adminq status. Otherwise, it should return success. */ - if (ret != I40E_SUCCESS && - hw->aq.asq_last_status == I40E_AQ_RC_EPERM) { + if ((ret == I40E_SUCCESS) || (ret != I40E_SUCCESS && + hw->aq.asq_last_status == I40E_AQ_RC_EPERM)) { memset(&hw->local_dcbx_config, 0, sizeof(struct i40e_dcbx_config)); /* set dcb default configuration */ hw->local_dcbx_config.etscfg.willing = 0; hw->local_dcbx_config.etscfg.maxtcs = 0; hw->local_dcbx_config.etscfg.tcbwtable[0] = 100; - hw->local_dcbx_config.etscfg.tsatable[0] = I40E_IEEE_TSA_ETS; -#ifdef TREX_PATCH_LOW_LATENCY - hw->local_dcbx_config.etscfg.tcbwtable[1] = 0; - hw->local_dcbx_config.etscfg.tsatable[1] = I40E_IEEE_TSA_STRICT; - hw->local_dcbx_config.etscfg.prioritytable[1] = 1; -#endif + hw->local_dcbx_config.etscfg.tsatable[0] = + I40E_IEEE_TSA_ETS; hw->local_dcbx_config.etsrec = hw->local_dcbx_config.etscfg; hw->local_dcbx_config.pfc.willing = 0; @@ -9298,22 +9877,15 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) I40E_APP_PROTOID_FCOE; ret = i40e_set_dcb_config(hw); if (ret) { - PMD_INIT_LOG(INFO, "default dcb config fails." - " err = %d, aq_err = %d.", ret, - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, + "default dcb config fails. err = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOSYS; } -#ifdef TREX_PATCH_LOW_LATENCY - if (i40e_vsi_update_tc_bandwidth_ex(pf->main_vsi) != - I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to update TC bandwidth"); - return -ENOSYS; - } -#endif } else { - PMD_INIT_LOG(INFO, "DCBX configuration failed, err = %d," - " aq_err = %d.", ret, - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, + "DCB initialization in FW fails, err = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOTSUP; } } else { @@ -9324,14 +9896,14 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) ret = i40e_init_dcb(hw); if (!ret) { if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) { - PMD_INIT_LOG(INFO, "HW doesn't support" - " DCBX offload."); + PMD_INIT_LOG(ERR, + "HW doesn't support DCBX offload."); return -ENOTSUP; } } else { - PMD_INIT_LOG(INFO, "DCBX configuration failed, err = %d," - " aq_err = %d.", ret, - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, + "DCBX configuration failed, err = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOTSUP; } } @@ -9440,7 +10012,8 @@ i40e_dev_get_dcb_info(struct rte_eth_dev *dev, static int i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint16_t interval = i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL); @@ -9465,7 +10038,7 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)); I40E_WRITE_FLUSH(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(&pci_dev->intr_handle); return 0; } @@ -9473,7 +10046,8 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) static int i40e_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint16_t msix_intr; @@ -9605,8 +10179,7 @@ i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) /* mtu setting is forbidden if port is start */ if (dev_data->dev_started) { - PMD_DRV_LOG(ERR, - "port %d must be stopped before configuration\n", + PMD_DRV_LOG(ERR, "port %d must be stopped before configuration", dev_data->port_id); return -EBUSY; } @@ -9620,3 +10193,997 @@ i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return ret; } + +/* Restore ethertype filter */ +static void +i40e_ethertype_filter_restore(struct i40e_pf *pf) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_ethertype_filter_list + *ethertype_list = &pf->ethertype.ethertype_list; + struct i40e_ethertype_filter *f; + struct i40e_control_filter_stats stats; + uint16_t flags; + + TAILQ_FOREACH(f, ethertype_list, rules) { + flags = 0; + if (!(f->flags & RTE_ETHTYPE_FLAGS_MAC)) + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC; + if (f->flags & RTE_ETHTYPE_FLAGS_DROP) + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP; + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE; + + memset(&stats, 0, sizeof(stats)); + i40e_aq_add_rem_control_packet_filter(hw, + f->input.mac_addr.addr_bytes, + f->input.ether_type, + flags, pf->main_vsi->seid, + f->queue, 1, &stats, NULL); + } + PMD_DRV_LOG(INFO, "Ethertype filter:" + " mac_etype_used = %u, etype_used = %u," + " mac_etype_free = %u, etype_free = %u", + stats.mac_etype_used, stats.etype_used, + stats.mac_etype_free, stats.etype_free); +} + +/* Restore tunnel filter */ +static void +i40e_tunnel_filter_restore(struct i40e_pf *pf) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_vsi *vsi = pf->main_vsi; + struct i40e_tunnel_filter_list + *tunnel_list = &pf->tunnel.tunnel_list; + struct i40e_tunnel_filter *f; + struct i40e_aqc_add_remove_cloud_filters_element_data cld_filter; + + TAILQ_FOREACH(f, tunnel_list, rules) { + memset(&cld_filter, 0, sizeof(cld_filter)); + rte_memcpy(&cld_filter, &f->input, sizeof(f->input)); + cld_filter.queue_number = f->queue; + i40e_aq_add_cloud_filters(hw, vsi->seid, &cld_filter, 1); + } +} + +static void +i40e_filter_restore(struct i40e_pf *pf) +{ + i40e_ethertype_filter_restore(pf); + i40e_tunnel_filter_restore(pf); + i40e_fdir_filter_restore(pf); +} + +static int +is_i40e_pmd(const char *driver_name) +{ + if (!strstr(driver_name, "i40e")) + return -ENOTSUP; + + if (strstr(driver_name, "i40e_vf")) + return -ENOTSUP; + + return 0; +} + +int +rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + i40e_notify_vf_link_status(dev, &pf->vfs[vf]); + + return 0; +} + +int +rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + struct i40e_vsi_context ctxt; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + /* Check if it has been already on or off */ + if (vsi->info.valid_sections & + rte_cpu_to_le_16(I40E_AQ_VSI_PROP_SECURITY_VALID)) { + if (on) { + if ((vsi->info.sec_flags & + I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK) == + I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK) + return 0; /* already on */ + } else { + if ((vsi->info.sec_flags & + I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK) == 0) + return 0; /* already off */ + } + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID); + if (on) + vsi->info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK; + else + vsi->info.sec_flags &= ~I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK; + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + hw = I40E_VSI_TO_HW(vsi); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + } + + return ret; +} + +static int +i40e_add_rm_all_vlan_filter(struct i40e_vsi *vsi, uint8_t add) +{ + uint32_t j, k; + uint16_t vlan_id; + struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); + struct i40e_aqc_add_remove_vlan_element_data vlan_data = {0}; + int ret; + + for (j = 0; j < I40E_VFTA_SIZE; j++) { + if (!vsi->vfta[j]) + continue; + + for (k = 0; k < I40E_UINT32_BIT_SIZE; k++) { + if (!(vsi->vfta[j] & (1 << k))) + continue; + + vlan_id = j * I40E_UINT32_BIT_SIZE + k; + if (!vlan_id) + continue; + + vlan_data.vlan_tag = rte_cpu_to_le_16(vlan_id); + if (add) + ret = i40e_aq_add_vlan(hw, vsi->seid, + &vlan_data, 1, NULL); + else + ret = i40e_aq_remove_vlan(hw, vsi->seid, + &vlan_data, 1, NULL); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, + "Failed to add/rm vlan filter"); + return ret; + } + } + } + + return I40E_SUCCESS; +} + +int +rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + struct i40e_vsi_context ctxt; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + /* Check if it has been already on or off */ + if (vsi->vlan_anti_spoof_on == on) + return 0; /* already on or off */ + + vsi->vlan_anti_spoof_on = on; + ret = i40e_add_rm_all_vlan_filter(vsi, on); + if (ret) { + PMD_DRV_LOG(ERR, "Failed to remove VLAN filters."); + return -ENOTSUP; + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID); + if (on) + vsi->info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK; + else + vsi->info.sec_flags &= ~I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK; + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + hw = I40E_VSI_TO_HW(vsi); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + } + + return ret; +} + +static int +i40e_vsi_rm_mac_filter(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + struct i40e_macvlan_filter *mv_f; + int i, vlan_num; + enum rte_mac_filter_type filter_type; + int ret = I40E_SUCCESS; + void *temp; + + /* remove all the MACs */ + TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp) { + vlan_num = vsi->vlan_num; + filter_type = f->mac_info.filter_type; + if (filter_type == RTE_MACVLAN_PERFECT_MATCH || + filter_type == RTE_MACVLAN_HASH_MATCH) { + if (vlan_num == 0) { + PMD_DRV_LOG(ERR, "VLAN number shouldn't be 0"); + return I40E_ERR_PARAM; + } + } else if (filter_type == RTE_MAC_PERFECT_MATCH || + filter_type == RTE_MAC_HASH_MATCH) + vlan_num = 1; + + mv_f = rte_zmalloc("macvlan_data", vlan_num * sizeof(*mv_f), 0); + if (!mv_f) { + PMD_DRV_LOG(ERR, "failed to allocate memory"); + return I40E_ERR_NO_MEMORY; + } + + for (i = 0; i < vlan_num; i++) { + mv_f[i].filter_type = filter_type; + (void)rte_memcpy(&mv_f[i].macaddr, + &f->mac_info.mac_addr, + ETH_ADDR_LEN); + } + if (filter_type == RTE_MACVLAN_PERFECT_MATCH || + filter_type == RTE_MACVLAN_HASH_MATCH) { + ret = i40e_find_all_vlan_for_mac(vsi, mv_f, vlan_num, + &f->mac_info.mac_addr); + if (ret != I40E_SUCCESS) { + rte_free(mv_f); + return ret; + } + } + + ret = i40e_remove_macvlan_filters(vsi, mv_f, vlan_num); + if (ret != I40E_SUCCESS) { + rte_free(mv_f); + return ret; + } + + rte_free(mv_f); + ret = I40E_SUCCESS; + } + + return ret; +} + +static int +i40e_vsi_restore_mac_filter(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + struct i40e_macvlan_filter *mv_f; + int i, vlan_num = 0; + int ret = I40E_SUCCESS; + void *temp; + + /* restore all the MACs */ + TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp) { + if ((f->mac_info.filter_type == RTE_MACVLAN_PERFECT_MATCH) || + (f->mac_info.filter_type == RTE_MACVLAN_HASH_MATCH)) { + /** + * If vlan_num is 0, that's the first time to add mac, + * set mask for vlan_id 0. + */ + if (vsi->vlan_num == 0) { + i40e_set_vlan_filter(vsi, 0, 1); + vsi->vlan_num = 1; + } + vlan_num = vsi->vlan_num; + } else if ((f->mac_info.filter_type == RTE_MAC_PERFECT_MATCH) || + (f->mac_info.filter_type == RTE_MAC_HASH_MATCH)) + vlan_num = 1; + + mv_f = rte_zmalloc("macvlan_data", vlan_num * sizeof(*mv_f), 0); + if (!mv_f) { + PMD_DRV_LOG(ERR, "failed to allocate memory"); + return I40E_ERR_NO_MEMORY; + } + + for (i = 0; i < vlan_num; i++) { + mv_f[i].filter_type = f->mac_info.filter_type; + (void)rte_memcpy(&mv_f[i].macaddr, + &f->mac_info.mac_addr, + ETH_ADDR_LEN); + } + + if (f->mac_info.filter_type == RTE_MACVLAN_PERFECT_MATCH || + f->mac_info.filter_type == RTE_MACVLAN_HASH_MATCH) { + ret = i40e_find_all_vlan_for_mac(vsi, mv_f, vlan_num, + &f->mac_info.mac_addr); + if (ret != I40E_SUCCESS) { + rte_free(mv_f); + return ret; + } + } + + ret = i40e_add_macvlan_filters(vsi, mv_f, vlan_num); + if (ret != I40E_SUCCESS) { + rte_free(mv_f); + return ret; + } + + rte_free(mv_f); + ret = I40E_SUCCESS; + } + + return ret; +} + +static int +i40e_vsi_set_tx_loopback(struct i40e_vsi *vsi, uint8_t on) +{ + struct i40e_vsi_context ctxt; + struct i40e_hw *hw; + int ret; + + if (!vsi) + return -EINVAL; + + hw = I40E_VSI_TO_HW(vsi); + + /* Use the FW API if FW >= v5.0 */ + if (hw->aq.fw_maj_ver < 5) { + PMD_INIT_LOG(ERR, "FW < v5.0, cannot enable loopback"); + return -ENOTSUP; + } + + /* Check if it has been already on or off */ + if (vsi->info.valid_sections & + rte_cpu_to_le_16(I40E_AQ_VSI_PROP_SWITCH_VALID)) { + if (on) { + if ((vsi->info.switch_id & + I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB) == + I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB) + return 0; /* already on */ + } else { + if ((vsi->info.switch_id & + I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB) == 0) + return 0; /* already off */ + } + } + + /* remove all the MAC and VLAN first */ + ret = i40e_vsi_rm_mac_filter(vsi); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to remove MAC filters."); + return ret; + } + if (vsi->vlan_anti_spoof_on) { + ret = i40e_add_rm_all_vlan_filter(vsi, 0); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to remove VLAN filters."); + return ret; + } + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + if (on) + vsi->info.switch_id |= I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB; + else + vsi->info.switch_id &= ~I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB; + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + return ret; + } + + /* add all the MAC and VLAN back */ + ret = i40e_vsi_restore_mac_filter(vsi); + if (ret) + return ret; + if (vsi->vlan_anti_spoof_on) { + ret = i40e_add_rm_all_vlan_filter(vsi, 1); + if (ret) + return ret; + } + + return ret; +} + +int +rte_pmd_i40e_set_tx_loopback(uint8_t port, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_pf_vf *vf; + struct i40e_vsi *vsi; + uint16_t vf_id; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + /* setup PF TX loopback */ + vsi = pf->main_vsi; + ret = i40e_vsi_set_tx_loopback(vsi, on); + if (ret) + return -ENOTSUP; + + /* setup TX loopback for all the VFs */ + if (!pf->vfs) { + /* if no VF, do nothing. */ + return 0; + } + + for (vf_id = 0; vf_id < pf->vf_num; vf_id++) { + vf = &pf->vfs[vf_id]; + vsi = vf->vsi; + + ret = i40e_vsi_set_tx_loopback(vsi, on); + if (ret) + return -ENOTSUP; + } + + return ret; +} + +int +rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + hw = I40E_VSI_TO_HW(vsi); + + ret = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, + on, NULL, true); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set unicast promiscuous mode"); + } + + return ret; +} + +int +rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + hw = I40E_VSI_TO_HW(vsi); + + ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, + on, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set multicast promiscuous mode"); + } + + return ret; +} + +int +rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id, + struct ether_addr *mac_addr) +{ + struct i40e_mac_filter *f; + struct rte_eth_dev *dev; + struct i40e_pf_vf *vf; + struct i40e_vsi *vsi; + struct i40e_pf *pf; + void *temp; + + if (i40e_validate_mac_addr((u8 *)mac_addr) != I40E_SUCCESS) + return -EINVAL; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) + return -EINVAL; + + vf = &pf->vfs[vf_id]; + vsi = vf->vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + ether_addr_copy(mac_addr, &vf->mac_addr); + + /* Remove all existing mac */ + TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp) + i40e_vsi_delete_mac(vsi, &f->mac_info.mac_addr); + + return 0; +} + +/* Set vlan strip on/off for specific VF from host */ +int +rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + + if (!vsi) + return -EINVAL; + + ret = i40e_vsi_config_vlan_stripping(vsi, !!on); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set VLAN stripping!"); + } + + return ret; +} + +int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id, + uint16_t vlan_id) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_hw *hw; + struct i40e_vsi *vsi; + struct i40e_vsi_context ctxt; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + if (vlan_id > ETHER_MAX_VLAN_ID) { + PMD_DRV_LOG(ERR, "Invalid VLAN ID."); + return -EINVAL; + } + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + hw = I40E_PF_TO_HW(pf); + + /** + * return -ENODEV if SRIOV not enabled, VF number not configured + * or no queue assigned. + */ + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 || + pf->vf_nb_qps == 0) + return -ENODEV; + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); + vsi->info.pvid = vlan_id; + if (vlan_id > 0) + vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_INSERT_PVID; + else + vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_INSERT_PVID; + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + hw = I40E_VSI_TO_HW(vsi); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + } + + return ret; +} + +int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id, + uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + if (on > 1) { + PMD_DRV_LOG(ERR, "on should be 0 or 1."); + return -EINVAL; + } + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + hw = I40E_PF_TO_HW(pf); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + /** + * return -ENODEV if SRIOV not enabled, VF number not configured + * or no queue assigned. + */ + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 || + pf->vf_nb_qps == 0) { + PMD_DRV_LOG(ERR, "SRIOV is not enabled or no queue."); + return -ENODEV; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + hw = I40E_VSI_TO_HW(vsi); + + ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, on, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set VSI broadcast"); + } + + return ret; +} + +int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_hw *hw; + struct i40e_vsi *vsi; + struct i40e_vsi_context ctxt; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + if (on > 1) { + PMD_DRV_LOG(ERR, "on should be 0 or 1."); + return -EINVAL; + } + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + hw = I40E_PF_TO_HW(pf); + + /** + * return -ENODEV if SRIOV not enabled, VF number not configured + * or no queue assigned. + */ + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 || + pf->vf_nb_qps == 0) { + PMD_DRV_LOG(ERR, "SRIOV is not enabled or no queue."); + return -ENODEV; + } + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); + if (on) { + vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_TAGGED; + vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_MODE_UNTAGGED; + } else { + vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_UNTAGGED; + vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_MODE_TAGGED; + } + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + hw = I40E_VSI_TO_HW(vsi); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + } + + return ret; +} + +int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id, + uint64_t vf_mask, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_hw *hw; + uint16_t vf_idx; + int ret = I40E_SUCCESS; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + if (vlan_id > ETHER_MAX_VLAN_ID) { + PMD_DRV_LOG(ERR, "Invalid VLAN ID."); + return -EINVAL; + } + + if (vf_mask == 0) { + PMD_DRV_LOG(ERR, "No VF."); + return -EINVAL; + } + + if (on > 1) { + PMD_DRV_LOG(ERR, "on is should be 0 or 1."); + return -EINVAL; + } + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + hw = I40E_PF_TO_HW(pf); + + /** + * return -ENODEV if SRIOV not enabled, VF number not configured + * or no queue assigned. + */ + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 || + pf->vf_nb_qps == 0) { + PMD_DRV_LOG(ERR, "SRIOV is not enabled or no queue."); + return -ENODEV; + } + + for (vf_idx = 0; vf_idx < 64 && ret == I40E_SUCCESS; vf_idx++) { + if (vf_mask & ((uint64_t)(1ULL << vf_idx))) { + if (on) + ret = i40e_vsi_add_vlan(pf->vfs[vf_idx].vsi, + vlan_id); + else + ret = i40e_vsi_delete_vlan(pf->vfs[vf_idx].vsi, + vlan_id); + } + } + + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set VF VLAN filter, on = %d", on); + } + + return ret; +} + +int +rte_pmd_i40e_get_vf_stats(uint8_t port, + uint16_t vf_id, + struct rte_eth_stats *stats) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + i40e_update_vsi_stats(vsi); + + stats->ipackets = vsi->eth_stats.rx_unicast + + vsi->eth_stats.rx_multicast + + vsi->eth_stats.rx_broadcast; + stats->opackets = vsi->eth_stats.tx_unicast + + vsi->eth_stats.tx_multicast + + vsi->eth_stats.tx_broadcast; + stats->ibytes = vsi->eth_stats.rx_bytes; + stats->obytes = vsi->eth_stats.tx_bytes; + stats->ierrors = vsi->eth_stats.rx_discards; + stats->oerrors = vsi->eth_stats.tx_errors + vsi->eth_stats.tx_discards; + + return 0; +} + +int +rte_pmd_i40e_reset_vf_stats(uint8_t port, + uint16_t vf_id) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + vsi->offset_loaded = false; + i40e_update_vsi_stats(vsi); + + return 0; +} diff --git a/src/dpdk/drivers/net/i40e/i40e_ethdev.h b/src/dpdk/drivers/net/i40e/i40e_ethdev.h index 92c8fad0..9e2f7a28 100644 --- a/src/dpdk/drivers/net/i40e/i40e_ethdev.h +++ b/src/dpdk/drivers/net/i40e/i40e_ethdev.h @@ -37,6 +37,8 @@ #include #include #include +#include +#include #define I40E_VLAN_TAG_SIZE 4 @@ -126,6 +128,7 @@ enum i40e_flxpld_layer_idx { #define I40E_FLAG_FDIR (1ULL << 6) #define I40E_FLAG_VXLAN (1ULL << 7) #define I40E_FLAG_RSS_AQ_CAPABLE (1ULL << 8) +#define I40E_FLAG_VF_MAC_BY_PF (1ULL << 9) #define I40E_FLAG_ALL (I40E_FLAG_RSS | \ I40E_FLAG_DCB | \ I40E_FLAG_VMDQ | \ @@ -134,7 +137,8 @@ enum i40e_flxpld_layer_idx { I40E_FLAG_HEADER_SPLIT_ENABLED | \ I40E_FLAG_FDIR | \ I40E_FLAG_VXLAN | \ - I40E_FLAG_RSS_AQ_CAPABLE) + I40E_FLAG_RSS_AQ_CAPABLE | \ + I40E_FLAG_VF_MAC_BY_PF) #define I40E_RSS_OFFLOAD_ALL ( \ ETH_RSS_FRAG_IPV4 | \ @@ -149,6 +153,16 @@ enum i40e_flxpld_layer_idx { ETH_RSS_NONFRAG_IPV6_OTHER | \ ETH_RSS_L2_PAYLOAD) +/* All bits of RSS hash enable for X722*/ +#define I40E_RSS_HENA_ALL_X722 ( \ + (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \ + I40E_RSS_HENA_ALL) + /* All bits of RSS hash enable */ #define I40E_RSS_HENA_ALL ( \ (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \ @@ -178,6 +192,65 @@ enum i40e_flxpld_layer_idx { #define FLOATING_VEB_SUPPORTED_FW_MAJ 5 #define FLOATING_VEB_SUPPORTED_FW_MIN 0 +#define I40E_GL_SWT_L2TAGCTRL(_i) (0x001C0A70 + ((_i) * 4)) +#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT 16 +#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_MASK \ + I40E_MASK(0xFFFF, I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT) + +#define I40E_INSET_NONE 0x00000000000000000ULL + +/* bit0 ~ bit 7 */ +#define I40E_INSET_DMAC 0x0000000000000001ULL +#define I40E_INSET_SMAC 0x0000000000000002ULL +#define I40E_INSET_VLAN_OUTER 0x0000000000000004ULL +#define I40E_INSET_VLAN_INNER 0x0000000000000008ULL +#define I40E_INSET_VLAN_TUNNEL 0x0000000000000010ULL + +/* bit 8 ~ bit 15 */ +#define I40E_INSET_IPV4_SRC 0x0000000000000100ULL +#define I40E_INSET_IPV4_DST 0x0000000000000200ULL +#define I40E_INSET_IPV6_SRC 0x0000000000000400ULL +#define I40E_INSET_IPV6_DST 0x0000000000000800ULL +#define I40E_INSET_SRC_PORT 0x0000000000001000ULL +#define I40E_INSET_DST_PORT 0x0000000000002000ULL +#define I40E_INSET_SCTP_VT 0x0000000000004000ULL + +/* bit 16 ~ bit 31 */ +#define I40E_INSET_IPV4_TOS 0x0000000000010000ULL +#define I40E_INSET_IPV4_PROTO 0x0000000000020000ULL +#define I40E_INSET_IPV4_TTL 0x0000000000040000ULL +#define I40E_INSET_IPV6_TC 0x0000000000080000ULL +#define I40E_INSET_IPV6_FLOW 0x0000000000100000ULL +#define I40E_INSET_IPV6_NEXT_HDR 0x0000000000200000ULL +#define I40E_INSET_IPV6_HOP_LIMIT 0x0000000000400000ULL +#define I40E_INSET_TCP_FLAGS 0x0000000000800000ULL + +/* bit 32 ~ bit 47, tunnel fields */ +#define I40E_INSET_TUNNEL_IPV4_DST 0x0000000100000000ULL +#define I40E_INSET_TUNNEL_IPV6_DST 0x0000000200000000ULL +#define I40E_INSET_TUNNEL_DMAC 0x0000000400000000ULL +#define I40E_INSET_TUNNEL_SRC_PORT 0x0000000800000000ULL +#define I40E_INSET_TUNNEL_DST_PORT 0x0000001000000000ULL +#define I40E_INSET_TUNNEL_ID 0x0000002000000000ULL + +/* bit 48 ~ bit 55 */ +#define I40E_INSET_LAST_ETHER_TYPE 0x0001000000000000ULL + +/* bit 56 ~ bit 63, Flex Payload */ +#define I40E_INSET_FLEX_PAYLOAD_W1 0x0100000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W2 0x0200000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W3 0x0400000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W4 0x0800000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W5 0x1000000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W6 0x2000000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W7 0x4000000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W8 0x8000000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD \ + (I40E_INSET_FLEX_PAYLOAD_W1 | I40E_INSET_FLEX_PAYLOAD_W2 | \ + I40E_INSET_FLEX_PAYLOAD_W3 | I40E_INSET_FLEX_PAYLOAD_W4 | \ + I40E_INSET_FLEX_PAYLOAD_W5 | I40E_INSET_FLEX_PAYLOAD_W6 | \ + I40E_INSET_FLEX_PAYLOAD_W7 | I40E_INSET_FLEX_PAYLOAD_W8) + struct i40e_adapter; /** @@ -290,6 +363,7 @@ struct i40e_vsi { uint16_t msix_intr; /* The MSIX interrupt binds to VSI */ uint16_t nb_msix; /* The max number of msix vector */ uint8_t enabled_tc; /* The traffic class enabled */ + uint8_t vlan_anti_spoof_on; /* The VLAN anti-spoofing enabled */ struct i40e_bw_info bw_info; /* VSI bandwidth information */ }; @@ -366,6 +440,14 @@ struct i40e_fdir_flex_mask { }; #define I40E_FILTER_PCTYPE_MAX 64 +#define I40E_MAX_FDIR_FILTER_NUM (1024 * 8) + +struct i40e_fdir_filter { + TAILQ_ENTRY(i40e_fdir_filter) rules; + struct rte_eth_fdir_filter fdir; +}; + +TAILQ_HEAD(i40e_fdir_filter_list, i40e_fdir_filter); /* * A structure used to define fields of a FDIR related info. */ @@ -384,6 +466,60 @@ struct i40e_fdir_info { */ struct i40e_fdir_flex_pit flex_set[I40E_MAX_FLXPLD_LAYER * I40E_MAX_FLXPLD_FIED]; struct i40e_fdir_flex_mask flex_mask[I40E_FILTER_PCTYPE_MAX]; + + struct i40e_fdir_filter_list fdir_list; + struct i40e_fdir_filter **hash_map; + struct rte_hash *hash_table; +}; + +/* Ethertype filter number HW supports */ +#define I40E_MAX_ETHERTYPE_FILTER_NUM 768 + +/* Ethertype filter struct */ +struct i40e_ethertype_filter_input { + struct ether_addr mac_addr; /* Mac address to match */ + uint16_t ether_type; /* Ether type to match */ +}; + +struct i40e_ethertype_filter { + TAILQ_ENTRY(i40e_ethertype_filter) rules; + struct i40e_ethertype_filter_input input; + uint16_t flags; /* Flags from RTE_ETHTYPE_FLAGS_* */ + uint16_t queue; /* Queue assigned to when match */ +}; + +TAILQ_HEAD(i40e_ethertype_filter_list, i40e_ethertype_filter); + +struct i40e_ethertype_rule { + struct i40e_ethertype_filter_list ethertype_list; + struct i40e_ethertype_filter **hash_map; + struct rte_hash *hash_table; +}; + +/* Tunnel filter number HW supports */ +#define I40E_MAX_TUNNEL_FILTER_NUM 400 + +/* Tunnel filter struct */ +struct i40e_tunnel_filter_input { + uint8_t outer_mac[6]; /* Outer mac address to match */ + uint8_t inner_mac[6]; /* Inner mac address to match */ + uint16_t inner_vlan; /* Inner vlan address to match */ + uint16_t flags; /* Filter type flag */ + uint32_t tenant_id; /* Tenant id to match */ +}; + +struct i40e_tunnel_filter { + TAILQ_ENTRY(i40e_tunnel_filter) rules; + struct i40e_tunnel_filter_input input; + uint16_t queue; /* Queue assigned to when match */ +}; + +TAILQ_HEAD(i40e_tunnel_filter_list, i40e_tunnel_filter); + +struct i40e_tunnel_rule { + struct i40e_tunnel_filter_list tunnel_list; + struct i40e_tunnel_filter **hash_map; + struct rte_hash *hash_table; }; #define I40E_MIRROR_MAX_ENTRIES_PER_RULE 64 @@ -407,6 +543,17 @@ struct i40e_mirror_rule { TAILQ_HEAD(i40e_mirror_rule_list, i40e_mirror_rule); +/* + * Struct to store flow created. + */ +struct rte_flow { + TAILQ_ENTRY(rte_flow) node; + enum rte_filter_type filter_type; + void *rule; +}; + +TAILQ_HEAD(i40e_flow_list, rte_flow); + /* * Structure to store private data specific for PF instance. */ @@ -456,12 +603,15 @@ struct i40e_pf { struct i40e_vmdq_info *vmdq; struct i40e_fdir_info fdir; /* flow director info */ + struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */ + struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */ struct i40e_fc_conf fc_conf; /* Flow control conf */ struct i40e_mirror_rule_list mirror_list; uint16_t nb_mirror_rule; /* The number of mirror rules */ bool floating_veb; /* The flag to use the floating VEB */ /* The floating enable flag for the specific VF */ bool floating_veb_list[I40E_MAX_VF]; + struct i40e_flow_list flow_list; }; enum pending_msg { @@ -517,7 +667,7 @@ struct i40e_vf { enum i40e_aq_link_speed link_speed; bool vf_reset; volatile uint32_t pend_cmd; /* pending command not finished yet */ - uint32_t cmd_retval; /* return value of the cmd response from PF */ + int32_t cmd_retval; /* return value of the cmd response from PF */ u16 pend_msg; /* flags indicates events from pf not handled yet */ uint8_t *aq_resp; /* buffer to store the adminq response from PF */ @@ -554,6 +704,25 @@ struct i40e_adapter { struct rte_timecounter tx_tstamp_tc; }; +extern const struct rte_flow_ops i40e_flow_ops; + +union i40e_filter_t { + struct rte_eth_ethertype_filter ethertype_filter; + struct rte_eth_fdir_filter fdir_filter; + struct rte_eth_tunnel_filter_conf tunnel_filter; +}; + +typedef int (*parse_filter_t)(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter); +struct i40e_valid_pattern { + enum rte_flow_item_type *items; + parse_filter_t parse_filter; +}; + int i40e_dev_switch_queues(struct i40e_pf *pf, bool on); int i40e_vsi_release(struct i40e_vsi *vsi); struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, @@ -577,7 +746,7 @@ int i40e_vsi_vlan_pvid_set(struct i40e_vsi *vsi, struct i40e_vsi_vlan_pvid_info *info); int i40e_vsi_config_vlan_stripping(struct i40e_vsi *vsi, bool on); int i40e_vsi_config_vlan_filter(struct i40e_vsi *vsi, bool on); -uint64_t i40e_config_hena(uint64_t flags); +uint64_t i40e_config_hena(uint64_t flags, enum i40e_mac_type type); uint64_t i40e_parse_hena(uint64_t flags); enum i40e_status_code i40e_fdir_setup_tx_resources(struct i40e_pf *pf); enum i40e_status_code i40e_fdir_setup_rx_resources(struct i40e_pf *pf); @@ -595,15 +764,44 @@ int i40e_fdir_ctrl_func(struct rte_eth_dev *dev, int i40e_select_filter_input_set(struct i40e_hw *hw, struct rte_eth_input_set_conf *conf, enum rte_filter_type filter); +void i40e_fdir_filter_restore(struct i40e_pf *pf); int i40e_hash_filter_inset_select(struct i40e_hw *hw, struct rte_eth_input_set_conf *conf); int i40e_fdir_filter_inset_select(struct i40e_pf *pf, struct rte_eth_input_set_conf *conf); - +int i40e_pf_host_send_msg_to_vf(struct i40e_pf_vf *vf, uint32_t opcode, + uint32_t retval, uint8_t *msg, + uint16_t msglen); void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_rxq_info *qinfo); void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_txq_info *qinfo); +struct i40e_ethertype_filter * +i40e_sw_ethertype_filter_lookup(struct i40e_ethertype_rule *ethertype_rule, + const struct i40e_ethertype_filter_input *input); +int i40e_sw_ethertype_filter_del(struct i40e_pf *pf, + struct i40e_ethertype_filter_input *input); +int i40e_sw_fdir_filter_del(struct i40e_pf *pf, + struct rte_eth_fdir_input *input); +struct i40e_tunnel_filter * +i40e_sw_tunnel_filter_lookup(struct i40e_tunnel_rule *tunnel_rule, + const struct i40e_tunnel_filter_input *input); +int i40e_sw_tunnel_filter_del(struct i40e_pf *pf, + struct i40e_tunnel_filter_input *input); +uint64_t i40e_get_default_input_set(uint16_t pctype); +int i40e_ethertype_filter_set(struct i40e_pf *pf, + struct rte_eth_ethertype_filter *filter, + bool add); +int i40e_add_del_fdir_filter(struct rte_eth_dev *dev, + const struct rte_eth_fdir_filter *filter, + bool add); +int i40e_dev_tunnel_filter_set(struct i40e_pf *pf, + struct rte_eth_tunnel_filter_conf *tunnel_filter, + uint8_t add); +int i40e_fdir_flush(struct rte_eth_dev *dev); + +#define I40E_DEV_TO_PCI(eth_dev) \ + RTE_DEV_TO_PCI((eth_dev)->device) /* I40E_DEV_PRIVATE_TO */ #define I40E_DEV_PRIVATE_TO_PF(adapter) \ @@ -699,6 +897,25 @@ i40e_calc_itr_interval(int16_t interval) (flow_type) == RTE_ETH_FLOW_NONFRAG_IPV6_OTHER || \ (flow_type) == RTE_ETH_FLOW_L2_PAYLOAD) +#define I40E_VALID_PCTYPE_X722(pctype) \ + ((pctype) == I40E_FILTER_PCTYPE_FRAG_IPV4 || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_TCP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_SCTP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_OTHER || \ + (pctype) == I40E_FILTER_PCTYPE_FRAG_IPV6 || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_TCP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_SCTP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_OTHER || \ + (pctype) == I40E_FILTER_PCTYPE_L2_PAYLOAD) + #define I40E_VALID_PCTYPE(pctype) \ ((pctype) == I40E_FILTER_PCTYPE_FRAG_IPV4 || \ (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_TCP || \ @@ -712,4 +929,18 @@ i40e_calc_itr_interval(int16_t interval) (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_OTHER || \ (pctype) == I40E_FILTER_PCTYPE_L2_PAYLOAD) +#define I40E_PHY_TYPE_SUPPORT_40G(phy_type) \ + (((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_KR4) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_AOC) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_CR4) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_SR4) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_LR4)) + +#define I40E_PHY_TYPE_SUPPORT_25G(phy_type) \ + (((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_KR) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_CR) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_SR) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_LR)) + #endif /* _I40E_ETHDEV_H_ */ diff --git a/src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c b/src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c index a616ae0b..a606aefe 100644 --- a/src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c +++ b/src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c @@ -126,8 +126,6 @@ static void i40evf_dev_promiscuous_enable(struct rte_eth_dev *dev); static void i40evf_dev_promiscuous_disable(struct rte_eth_dev *dev); static void i40evf_dev_allmulticast_enable(struct rte_eth_dev *dev); static void i40evf_dev_allmulticast_disable(struct rte_eth_dev *dev); -static int i40evf_get_link_status(struct rte_eth_dev *dev, - struct rte_eth_link *link); static int i40evf_init_vlan(struct rte_eth_dev *dev); static int i40evf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id); @@ -153,6 +151,9 @@ static int i40evf_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); static int i40evf_dev_rss_hash_conf_get(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); +static int i40evf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); +static void i40evf_set_default_mac_addr(struct rte_eth_dev *dev, + struct ether_addr *mac_addr); static int i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id); static int @@ -178,11 +179,11 @@ static const struct rte_i40evf_xstats_name_off rte_i40evf_stats_strings[] = { {"rx_unknown_protocol_packets", offsetof(struct i40e_eth_stats, rx_unknown_protocol)}, {"tx_bytes", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_multicast_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_broadcast_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_dropped_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_error_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, + {"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_unicast)}, + {"tx_multicast_packets", offsetof(struct i40e_eth_stats, tx_multicast)}, + {"tx_broadcast_packets", offsetof(struct i40e_eth_stats, tx_broadcast)}, + {"tx_dropped_packets", offsetof(struct i40e_eth_stats, tx_discards)}, + {"tx_error_packets", offsetof(struct i40e_eth_stats, tx_errors)}, }; #define I40EVF_NB_XSTATS (sizeof(rte_i40evf_stats_strings) / \ @@ -227,6 +228,8 @@ static const struct eth_dev_ops i40evf_eth_dev_ops = { .reta_query = i40evf_dev_rss_reta_query, .rss_hash_update = i40evf_dev_rss_hash_update, .rss_hash_conf_get = i40evf_dev_rss_hash_conf_get, + .mtu_set = i40evf_dev_mtu_set, + .mac_addr_set = i40evf_set_default_mac_addr, }; /* @@ -363,6 +366,7 @@ i40evf_execute_vf_cmd(struct rte_eth_dev *dev, struct vf_cmd_info *args) err = -1; do { ret = i40evf_read_pfmsg(dev, &info); + vf->cmd_retval = info.result; if (ret == I40EVF_MSG_CMD) { err = 0; break; @@ -641,7 +645,7 @@ i40evf_configure_vsi_queues(struct rte_eth_dev *dev) ret = i40evf_execute_vf_cmd(dev, &args); if (ret) PMD_DRV_LOG(ERR, "Failed to execute command of " - "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES\n"); + "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES"); return ret; } @@ -694,7 +698,7 @@ i40evf_configure_vsi_queues_ext(struct rte_eth_dev *dev) ret = i40evf_execute_vf_cmd(dev, &args); if (ret) PMD_DRV_LOG(ERR, "Failed to execute command of " - "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT\n"); + "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT"); return ret; } @@ -720,7 +724,8 @@ i40evf_config_irq_map(struct rte_eth_dev *dev) uint8_t cmd_buffer[sizeof(struct i40e_virtchnl_irq_map_info) + \ sizeof(struct i40e_virtchnl_vector_map)]; struct i40e_virtchnl_irq_map_info *map_info; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t vector_id; int i, err; @@ -888,19 +893,16 @@ i40evf_add_mac_addr(struct rte_eth_dev *dev, } static void -i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index) +i40evf_del_mac_addr_by_addr(struct rte_eth_dev *dev, + struct ether_addr *addr) { struct i40e_virtchnl_ether_addr_list *list; struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - struct rte_eth_dev_data *data = dev->data; - struct ether_addr *addr; uint8_t cmd_buffer[sizeof(struct i40e_virtchnl_ether_addr_list) + \ sizeof(struct i40e_virtchnl_ether_addr)]; int err; struct vf_cmd_info args; - addr = &(data->mac_addrs[index]); - if (i40e_validate_mac_addr(addr->addr_bytes) != I40E_SUCCESS) { PMD_DRV_LOG(ERR, "Invalid mac:%x-%x-%x-%x-%x-%x", addr->addr_bytes[0], addr->addr_bytes[1], @@ -927,6 +929,17 @@ i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index) return; } +static void +i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index) +{ + struct rte_eth_dev_data *data = dev->data; + struct ether_addr *addr; + + addr = &data->mac_addrs[index]; + + i40evf_del_mac_addr_by_addr(dev, addr); +} + static int i40evf_update_stats(struct rte_eth_dev *dev, struct i40e_eth_stats **pstats) { @@ -954,7 +967,7 @@ i40evf_update_stats(struct rte_eth_dev *dev, struct i40e_eth_stats **pstats) } static int -i40evf_get_statics(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +i40evf_get_statistics(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { int ret; struct i40e_eth_stats *pstats = NULL; @@ -1084,37 +1097,11 @@ i40evf_del_vlan(struct rte_eth_dev *dev, uint16_t vlanid) return err; } -static int -i40evf_get_link_status(struct rte_eth_dev *dev, struct rte_eth_link *link) -{ - struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - int err; - struct vf_cmd_info args; - struct rte_eth_link *new_link; - - args.ops = (enum i40e_virtchnl_ops)I40E_VIRTCHNL_OP_GET_LINK_STAT; - args.in_args = NULL; - args.in_args_size = 0; - args.out_buffer = vf->aq_resp; - args.out_size = I40E_AQ_BUF_SZ; - err = i40evf_execute_vf_cmd(dev, &args); - if (err) { - PMD_DRV_LOG(ERR, "fail to execute command OP_GET_LINK_STAT"); - return err; - } - - new_link = (struct rte_eth_link *)args.out_buffer; - (void)rte_memcpy(link, new_link, sizeof(*link)); - - return 0; -} - static const struct rte_pci_id pci_id_i40evf_map[] = { { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF_HV) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_A0_VF) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_VF) }, - { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_VF_HV) }, { .vendor_id = 0, /* sentinel */ }, }; @@ -1208,7 +1195,6 @@ i40evf_init_vf(struct rte_eth_dev *dev) int i, err, bufsz; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - struct ether_addr *p_mac_addr; uint16_t interval = i40e_calc_itr_interval(I40E_QUEUE_ITR_INTERVAL_MAX); @@ -1285,9 +1271,8 @@ i40evf_init_vf(struct rte_eth_dev *dev) vf->vsi.adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); /* Store the MAC address configured by host, or generate random one */ - p_mac_addr = (struct ether_addr *)(vf->vsi_res->default_mac_addr); - if (is_valid_assigned_ether_addr(p_mac_addr)) /* Configured by host */ - ether_addr_copy(p_mac_addr, (struct ether_addr *)hw->mac.addr); + if (is_valid_assigned_ether_addr((struct ether_addr *)hw->mac.addr)) + vf->flags |= I40E_FLAG_VF_MAC_BY_PF; else eth_random_addr(hw->mac.addr); /* Generate a random one */ @@ -1340,16 +1325,16 @@ i40evf_handle_pf_event(__rte_unused struct rte_eth_dev *dev, switch (pf_msg->event) { case I40E_VIRTCHNL_EVENT_RESET_IMPENDING: - PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_RESET_IMPENDING event\n"); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET); + PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_RESET_IMPENDING event"); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL); break; case I40E_VIRTCHNL_EVENT_LINK_CHANGE: - PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event\n"); + PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event"); vf->link_up = pf_msg->event_data.link_event.link_status; vf->link_speed = pf_msg->event_data.link_event.link_speed; break; case I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE: - PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event\n"); + PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event"); break; default: PMD_DRV_LOG(ERR, " unknown event received %u", pf_msg->event); @@ -1363,8 +1348,9 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev) struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); struct i40e_arq_event_info info; - struct i40e_virtchnl_msg *v_msg; - uint16_t pending, opcode; + uint16_t pending, aq_opc; + enum i40e_virtchnl_ops msg_opc; + enum i40e_status_code msg_ret; int ret; info.buf_len = I40E_AQ_BUF_SZ; @@ -1373,7 +1359,6 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev) return; } info.msg_buf = vf->aq_resp; - v_msg = (struct i40e_virtchnl_msg *)&info.desc; pending = 1; while (pending) { @@ -1384,32 +1369,39 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev) "ret: %d", ret); break; } - opcode = rte_le_to_cpu_16(info.desc.opcode); - - switch (opcode) { + aq_opc = rte_le_to_cpu_16(info.desc.opcode); + /* For the message sent from pf to vf, opcode is stored in + * cookie_high of struct i40e_aq_desc, while return error code + * are stored in cookie_low, Which is done by + * i40e_aq_send_msg_to_vf in PF driver.*/ + msg_opc = (enum i40e_virtchnl_ops)rte_le_to_cpu_32( + info.desc.cookie_high); + msg_ret = (enum i40e_status_code)rte_le_to_cpu_32( + info.desc.cookie_low); + switch (aq_opc) { case i40e_aqc_opc_send_msg_to_vf: - if (v_msg->v_opcode == I40E_VIRTCHNL_OP_EVENT) + if (msg_opc == I40E_VIRTCHNL_OP_EVENT) /* process event*/ i40evf_handle_pf_event(dev, info.msg_buf, info.msg_len); else { /* read message and it's expected one */ - if (v_msg->v_opcode == vf->pend_cmd) { - vf->cmd_retval = v_msg->v_retval; + if (msg_opc == vf->pend_cmd) { + vf->cmd_retval = msg_ret; /* prevent compiler reordering */ rte_compiler_barrier(); _clear_cmd(vf); } else PMD_DRV_LOG(ERR, "command mismatch," "expect %u, get %u", - vf->pend_cmd, v_msg->v_opcode); + vf->pend_cmd, msg_opc); PMD_DRV_LOG(DEBUG, "adminq response is received," - " opcode = %d\n", v_msg->v_opcode); + " opcode = %d", msg_opc); } break; default: PMD_DRV_LOG(ERR, "Request %u is not supported yet", - opcode); + aq_opc); break; } } @@ -1428,7 +1420,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev) * void */ static void -i40evf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, +i40evf_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; @@ -1442,31 +1434,31 @@ i40evf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, /* No interrupt event indicated */ if (!(icr0 & I40E_VFINT_ICR01_INTEVENT_MASK)) { - PMD_DRV_LOG(DEBUG, "No interrupt event, nothing to do\n"); + PMD_DRV_LOG(DEBUG, "No interrupt event, nothing to do"); goto done; } if (icr0 & I40E_VFINT_ICR01_ADMINQ_MASK) { - PMD_DRV_LOG(DEBUG, "ICR01_ADMINQ is reported\n"); + PMD_DRV_LOG(DEBUG, "ICR01_ADMINQ is reported"); i40evf_handle_aq_msg(dev); } /* Link Status Change interrupt */ if (icr0 & I40E_VFINT_ICR01_LINK_STAT_CHANGE_MASK) PMD_DRV_LOG(DEBUG, "LINK_STAT_CHANGE is reported," - " do nothing\n"); + " do nothing"); done: i40evf_enable_irq0(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); } static int i40evf_dev_init(struct rte_eth_dev *eth_dev) { - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(\ - eth_dev->data->dev_private); - struct rte_pci_device *pci_dev = eth_dev->pci_dev; + struct i40e_hw *hw + = I40E_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(eth_dev); PMD_INIT_FUNC_TRACE(); @@ -1485,15 +1477,16 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev) return 0; } - rte_eth_copy_pci_info(eth_dev, eth_dev->pci_dev); + rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; - hw->vendor_id = eth_dev->pci_dev->id.vendor_id; - hw->device_id = eth_dev->pci_dev->id.device_id; - hw->subsystem_vendor_id = eth_dev->pci_dev->id.subsystem_vendor_id; - hw->subsystem_device_id = eth_dev->pci_dev->id.subsystem_device_id; - hw->bus.device = eth_dev->pci_dev->addr.devid; - hw->bus.func = eth_dev->pci_dev->addr.function; - hw->hw_addr = (void *)eth_dev->pci_dev->mem_resource[0].addr; + hw->vendor_id = pci_dev->id.vendor_id; + hw->device_id = pci_dev->id.device_id; + hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id; + hw->subsystem_device_id = pci_dev->id.subsystem_device_id; + hw->bus.device = pci_dev->addr.devid; + hw->bus.func = pci_dev->addr.function; + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; hw->adapter_stopped = 0; if(i40evf_init_vf(eth_dev) != 0) { @@ -1554,38 +1547,19 @@ i40evf_dev_uninit(struct rte_eth_dev *eth_dev) */ static struct eth_driver rte_i40evf_pmd = { .pci_drv = { - .name = "rte_i40evf_pmd", .id_table = pci_id_i40evf_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = i40evf_dev_init, .eth_dev_uninit = i40evf_dev_uninit, .dev_private_size = sizeof(struct i40e_adapter), }; -/* - * VF Driver initialization routine. - * Invoked one at EAL init time. - * Register itself as the [Virtual Poll Mode] Driver of PCI Fortville devices. - */ -static int -rte_i40evf_pmd_init(const char *name __rte_unused, - const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_i40evf_pmd); - - return 0; -} - -static struct rte_driver rte_i40evf_driver = { - .type = PMD_PDEV, - .init = rte_i40evf_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_i40evf_driver, i40evf); -DRIVER_REGISTER_PCI_TABLE(i40evf, pci_id_i40evf_map); +RTE_PMD_REGISTER_PCI(net_i40e_vf, rte_i40evf_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_i40e_vf, pci_id_i40evf_map); +RTE_PMD_REGISTER_KMOD_DEP(net_i40e_vf, "* igb_uio | vfio"); static int i40evf_dev_configure(struct rte_eth_dev *dev) @@ -1900,7 +1874,8 @@ i40evf_enable_queues_intr(struct rte_eth_dev *dev) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; if (!rte_intr_allow_others(intr_handle)) { I40E_WRITE_REG(hw, @@ -1932,7 +1907,8 @@ i40evf_disable_queues_intr(struct rte_eth_dev *dev) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; if (!rte_intr_allow_others(intr_handle)) { I40E_WRITE_REG(hw, I40E_VFINT_DYN_CTL01, @@ -1958,7 +1934,8 @@ i40evf_disable_queues_intr(struct rte_eth_dev *dev) static int i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint16_t interval = i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL); @@ -1984,7 +1961,7 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) I40EVF_WRITE_FLUSH(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(&pci_dev->intr_handle); return 0; } @@ -1992,7 +1969,8 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) static int i40evf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint16_t msix_intr; @@ -2072,7 +2050,8 @@ i40evf_dev_start(struct rte_eth_dev *dev) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t intr_vector = 0; PMD_INIT_FUNC_TRACE(); @@ -2096,7 +2075,7 @@ i40evf_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (!intr_handle->intr_vec) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -2137,7 +2116,8 @@ err_queue: static void i40evf_dev_stop(struct rte_eth_dev *dev) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; PMD_INIT_FUNC_TRACE(); @@ -2166,35 +2146,33 @@ i40evf_dev_link_update(struct rte_eth_dev *dev, * DPDK pf host provide interfacet to acquire link status * while Linux driver does not */ - if (vf->version_major == I40E_DPDK_VERSION_MAJOR) - i40evf_get_link_status(dev, &new_link); - else { - /* Linux driver PF host */ - switch (vf->link_speed) { - case I40E_LINK_SPEED_100MB: - new_link.link_speed = ETH_SPEED_NUM_100M; - break; - case I40E_LINK_SPEED_1GB: - new_link.link_speed = ETH_SPEED_NUM_1G; - break; - case I40E_LINK_SPEED_10GB: - new_link.link_speed = ETH_SPEED_NUM_10G; - break; - case I40E_LINK_SPEED_20GB: - new_link.link_speed = ETH_SPEED_NUM_20G; - break; - case I40E_LINK_SPEED_40GB: - new_link.link_speed = ETH_SPEED_NUM_40G; - break; - default: - new_link.link_speed = ETH_SPEED_NUM_100M; - break; - } - /* full duplex only */ - new_link.link_duplex = ETH_LINK_FULL_DUPLEX; - new_link.link_status = vf->link_up ? ETH_LINK_UP : - ETH_LINK_DOWN; + + /* Linux driver PF host */ + switch (vf->link_speed) { + case I40E_LINK_SPEED_100MB: + new_link.link_speed = ETH_SPEED_NUM_100M; + break; + case I40E_LINK_SPEED_1GB: + new_link.link_speed = ETH_SPEED_NUM_1G; + break; + case I40E_LINK_SPEED_10GB: + new_link.link_speed = ETH_SPEED_NUM_10G; + break; + case I40E_LINK_SPEED_20GB: + new_link.link_speed = ETH_SPEED_NUM_20G; + break; + case I40E_LINK_SPEED_40GB: + new_link.link_speed = ETH_SPEED_NUM_40G; + break; + default: + new_link.link_speed = ETH_SPEED_NUM_100M; + break; } + /* full duplex only */ + new_link.link_duplex = ETH_LINK_FULL_DUPLEX; + new_link.link_status = vf->link_up ? ETH_LINK_UP : + ETH_LINK_DOWN; + i40evf_dev_atomic_write_link_status(dev, &new_link); return 0; @@ -2266,6 +2244,7 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); memset(dev_info, 0, sizeof(*dev_info)); + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->max_rx_queues = vf->vsi_res->num_queue_pairs; dev_info->max_tx_queues = vf->vsi_res->num_queue_pairs; dev_info->min_rx_bufsize = I40E_BUF_SIZE_MIN; @@ -2326,15 +2305,16 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) static void i40evf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { - if (i40evf_get_statics(dev, stats)) - PMD_DRV_LOG(ERR, "Get statics failed"); + if (i40evf_get_statistics(dev, stats)) + PMD_DRV_LOG(ERR, "Get statistics failed"); } static void i40evf_dev_close(struct rte_eth_dev *dev) { struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_pci_device *pci_dev = dev->pci_dev; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; i40evf_dev_stop(dev); hw->adapter_stopped = 1; @@ -2342,11 +2322,11 @@ i40evf_dev_close(struct rte_eth_dev *dev) i40evf_reset_vf(hw); i40e_shutdown_adminq(hw); /* disable uio intr before callback unregister */ - rte_intr_disable(&pci_dev->intr_handle); + rte_intr_disable(intr_handle); /* unregister callback func from eal lib */ - rte_intr_callback_unregister(&pci_dev->intr_handle, - i40evf_dev_interrupt_handler, (void *)dev); + rte_intr_callback_unregister(intr_handle, + i40evf_dev_interrupt_handler, dev); i40evf_disable_irq0(hw); } @@ -2423,7 +2403,7 @@ i40evf_dev_rss_reta_update(struct rte_eth_dev *dev, if (reta_size != ETH_RSS_RETA_SIZE_64) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number of hardware can " - "support (%d)\n", reta_size, ETH_RSS_RETA_SIZE_64); + "support (%d)", reta_size, ETH_RSS_RETA_SIZE_64); return -EINVAL; } @@ -2462,7 +2442,7 @@ i40evf_dev_rss_reta_query(struct rte_eth_dev *dev, if (reta_size != ETH_RSS_RETA_SIZE_64) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number of hardware can " - "support (%d)\n", reta_size, ETH_RSS_RETA_SIZE_64); + "support (%d)", reta_size, ETH_RSS_RETA_SIZE_64); return -EINVAL; } @@ -2568,8 +2548,11 @@ i40evf_hw_rss_hash_set(struct i40e_vf *vf, struct rte_eth_rss_conf *rss_conf) rss_hf = rss_conf->rss_hf; hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32; - hena &= ~I40E_RSS_HENA_ALL; - hena |= i40e_config_hena(rss_hf); + if (hw->mac.type == I40E_MAC_X722) + hena &= ~I40E_RSS_HENA_ALL_X722; + else + hena &= ~I40E_RSS_HENA_ALL; + hena |= i40e_config_hena(rss_hf, hw->mac.type); i40e_write_rx_ctl(hw, I40E_VFQF_HENA(0), (uint32_t)hena); i40e_write_rx_ctl(hw, I40E_VFQF_HENA(1), (uint32_t)(hena >> 32)); I40EVF_WRITE_FLUSH(hw); @@ -2585,7 +2568,10 @@ i40evf_disable_rss(struct i40e_vf *vf) hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32; - hena &= ~I40E_RSS_HENA_ALL; + if (hw->mac.type == I40E_MAC_X722) + hena &= ~I40E_RSS_HENA_ALL_X722; + else + hena &= ~I40E_RSS_HENA_ALL; i40e_write_rx_ctl(hw, I40E_VFQF_HENA(0), (uint32_t)hena); i40e_write_rx_ctl(hw, I40E_VFQF_HENA(1), (uint32_t)(hena >> 32)); I40EVF_WRITE_FLUSH(hw); @@ -2601,7 +2587,7 @@ i40evf_config_rss(struct i40e_vf *vf) if (vf->dev_data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) { i40evf_disable_rss(vf); - PMD_DRV_LOG(DEBUG, "RSS not configured\n"); + PMD_DRV_LOG(DEBUG, "RSS not configured"); return 0; } @@ -2618,7 +2604,7 @@ i40evf_config_rss(struct i40e_vf *vf) rss_conf = vf->dev_data->dev_conf.rx_adv_conf.rss_conf; if ((rss_conf.rss_hf & I40E_RSS_OFFLOAD_ALL) == 0) { i40evf_disable_rss(vf); - PMD_DRV_LOG(DEBUG, "No hash flag is set\n"); + PMD_DRV_LOG(DEBUG, "No hash flag is set"); return 0; } @@ -2646,7 +2632,9 @@ i40evf_dev_rss_hash_update(struct rte_eth_dev *dev, hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32; - if (!(hena & I40E_RSS_HENA_ALL)) { /* RSS disabled */ + if (!(hena & ((hw->mac.type == I40E_MAC_X722) + ? I40E_RSS_HENA_ALL_X722 + : I40E_RSS_HENA_ALL))) { /* RSS disabled */ if (rss_hf != 0) /* Enable RSS */ return -EINVAL; return 0; @@ -2676,3 +2664,55 @@ i40evf_dev_rss_hash_conf_get(struct rte_eth_dev *dev, return 0; } + +static int +i40evf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + struct rte_eth_dev_data *dev_data = vf->dev_data; + uint32_t frame_size = mtu + ETHER_HDR_LEN + + ETHER_CRC_LEN + I40E_VLAN_TAG_SIZE; + int ret = 0; + + /* check if mtu is within the allowed range */ + if ((mtu < ETHER_MIN_MTU) || (frame_size > I40E_FRAME_SIZE_MAX)) + return -EINVAL; + + /* mtu setting is forbidden if port is start */ + if (dev_data->dev_started) { + PMD_DRV_LOG(ERR, "port %d must be stopped before configuration", + dev_data->port_id); + return -EBUSY; + } + + if (frame_size > ETHER_MAX_LEN) + dev_data->dev_conf.rxmode.jumbo_frame = 1; + else + dev_data->dev_conf.rxmode.jumbo_frame = 0; + + dev_data->dev_conf.rxmode.max_rx_pkt_len = frame_size; + + return ret; +} + +static void +i40evf_set_default_mac_addr(struct rte_eth_dev *dev, + struct ether_addr *mac_addr) +{ + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + + if (!is_valid_assigned_ether_addr(mac_addr)) { + PMD_DRV_LOG(ERR, "Tried to set invalid MAC address."); + return; + } + + if (is_same_ether_addr(mac_addr, dev->data->mac_addrs)) + return; + + if (vf->flags & I40E_FLAG_VF_MAC_BY_PF) + return; + + i40evf_del_mac_addr_by_addr(dev, dev->data->mac_addrs); + + i40evf_add_mac_addr(dev, mac_addr, 0, 0); +} diff --git a/src/dpdk/drivers/net/i40e/i40e_fdir.c b/src/dpdk/drivers/net/i40e/i40e_fdir.c index 33cb6dab..0700253b 100644 --- a/src/dpdk/drivers/net/i40e/i40e_fdir.c +++ b/src/dpdk/drivers/net/i40e/i40e_fdir.c @@ -74,11 +74,8 @@ #define I40E_FDIR_UDP_DEFAULT_LEN 400 /* Wait count and interval for fdir filter programming */ -#define TREX_PATCH -// TREX_PATCH - Values were 10 and 1000. These numbers give much better performance when -// configuring large amount of rules -#define I40E_FDIR_WAIT_COUNT 100 -#define I40E_FDIR_WAIT_INTERVAL_US 100 +#define I40E_FDIR_WAIT_COUNT 10 +#define I40E_FDIR_WAIT_INTERVAL_US 1000 /* Wait count and interval for fdir filter flush */ #define I40E_FDIR_FLUSH_RETRY 50 @@ -122,7 +119,13 @@ static int i40e_fdir_filter_programming(struct i40e_pf *pf, enum i40e_filter_pctype pctype, const struct rte_eth_fdir_filter *filter, bool add); -static int i40e_fdir_flush(struct rte_eth_dev *dev); +static int i40e_fdir_filter_convert(const struct rte_eth_fdir_filter *input, + struct i40e_fdir_filter *filter); +static struct i40e_fdir_filter * +i40e_sw_fdir_filter_lookup(struct i40e_fdir_info *fdir_info, + const struct rte_eth_fdir_input *input); +static int i40e_sw_fdir_filter_insert(struct i40e_pf *pf, + struct i40e_fdir_filter *filter); static int i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq) @@ -254,7 +257,7 @@ i40e_fdir_setup(struct i40e_pf *pf) /* reserve memory for the fdir programming packet */ snprintf(z_name, sizeof(z_name), "%s_%s_%d", - eth_dev->driver->pci_drv.name, + eth_dev->driver->pci_drv.driver.name, I40E_FDIR_MZ_NAME, eth_dev->data->port_id); mz = i40e_memzone_reserve(z_name, I40E_FDIR_PKT_LEN, SOCKET_ID_ANY); @@ -356,8 +359,15 @@ i40e_init_flx_pld(struct i40e_pf *pf) /* initialize the masks */ for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) { - if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)pctype)) - continue; + if (hw->mac.type == I40E_MAC_X722) { + if (!I40E_VALID_PCTYPE_X722( + (enum i40e_filter_pctype)pctype)) + continue; + } else { + if (!I40E_VALID_PCTYPE( + (enum i40e_filter_pctype)pctype)) + continue; + } pf->fdir.flex_mask[pctype].word_mask = 0; i40e_write_rx_ctl(hw, I40E_PRTQF_FD_FLXINSET(pctype), 0); for (i = 0; i < I40E_FDIR_BITMASK_NUM_WORD; i++) { @@ -667,7 +677,16 @@ i40e_fdir_configure(struct rte_eth_dev *dev) i40e_set_flx_pld_cfg(pf, &conf->flex_set[i]); /* configure flex mask*/ for (i = 0; i < conf->nb_flexmasks; i++) { - pctype = i40e_flowtype_to_pctype(conf->flex_mask[i].flow_type); + if (hw->mac.type == I40E_MAC_X722) { + /* get translated pctype value in fd pctype register */ + pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl( + hw, I40E_GLQF_FD_PCTYPES( + (int)i40e_flowtype_to_pctype( + conf->flex_mask[i].flow_type))); + } else + pctype = i40e_flowtype_to_pctype( + conf->flex_mask[i].flow_type); + i40e_set_flex_mask_on_pctype(pf, pctype, &conf->flex_mask[i]); } @@ -732,9 +751,6 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input, fdir_input->flow.ip4_flow.ttl : I40E_FDIR_IP_DEFAULT_TTL; ip->type_of_service = fdir_input->flow.ip4_flow.tos; -#ifdef TREX_PATCH - ip->packet_id = rte_cpu_to_be_16(fdir_input->flow.ip4_flow.ip_id); -#endif /* * The source and destination fields in the transmitted packet * need to be presented in a reversed order with respect @@ -755,11 +771,7 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input, ip6->vtc_flow = rte_cpu_to_be_32(I40E_FDIR_IPv6_DEFAULT_VTC_FLOW | (fdir_input->flow.ipv6_flow.tc << - I40E_FDIR_IPv6_TC_OFFSET) -#ifdef TREX_PATCH - | (fdir_input->flow.ipv6_flow.flow_label & 0x000fffff) -#endif - ); + I40E_FDIR_IPv6_TC_OFFSET)); ip6->payload_len = rte_cpu_to_be_16(I40E_FDIR_IPv6_PAYLOAD_LEN); ip6->proto = fdir_input->flow.ipv6_flow.proto ? @@ -1011,20 +1023,92 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq) return ret; } +static int +i40e_fdir_filter_convert(const struct rte_eth_fdir_filter *input, + struct i40e_fdir_filter *filter) +{ + rte_memcpy(&filter->fdir, input, sizeof(struct rte_eth_fdir_filter)); + return 0; +} + +/* Check if there exists the flow director filter */ +static struct i40e_fdir_filter * +i40e_sw_fdir_filter_lookup(struct i40e_fdir_info *fdir_info, + const struct rte_eth_fdir_input *input) +{ + int ret; + + ret = rte_hash_lookup(fdir_info->hash_table, (const void *)input); + if (ret < 0) + return NULL; + + return fdir_info->hash_map[ret]; +} + +/* Add a flow director filter into the SW list */ +static int +i40e_sw_fdir_filter_insert(struct i40e_pf *pf, struct i40e_fdir_filter *filter) +{ + struct i40e_fdir_info *fdir_info = &pf->fdir; + int ret; + + ret = rte_hash_add_key(fdir_info->hash_table, + &filter->fdir.input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert fdir filter to hash table %d!", + ret); + return ret; + } + fdir_info->hash_map[ret] = filter; + + TAILQ_INSERT_TAIL(&fdir_info->fdir_list, filter, rules); + + return 0; +} + +/* Delete a flow director filter from the SW list */ +int +i40e_sw_fdir_filter_del(struct i40e_pf *pf, struct rte_eth_fdir_input *input) +{ + struct i40e_fdir_info *fdir_info = &pf->fdir; + struct i40e_fdir_filter *filter; + int ret; + + ret = rte_hash_del_key(fdir_info->hash_table, input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to delete fdir filter to hash table %d!", + ret); + return ret; + } + filter = fdir_info->hash_map[ret]; + fdir_info->hash_map[ret] = NULL; + + TAILQ_REMOVE(&fdir_info->fdir_list, filter, rules); + rte_free(filter); + + return 0; +} + /* * i40e_add_del_fdir_filter - add or remove a flow director filter. * @pf: board private structure * @filter: fdir filter entry * @add: 0 - delete, 1 - add */ -static int +int i40e_add_del_fdir_filter(struct rte_eth_dev *dev, const struct rte_eth_fdir_filter *filter, bool add) { + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); unsigned char *pkt = (unsigned char *)pf->fdir.prg_pkt; enum i40e_filter_pctype pctype; + struct i40e_fdir_info *fdir_info = &pf->fdir; + struct i40e_fdir_filter *fdir_filter, *node; + struct i40e_fdir_filter check_filter; /* Check if the filter exists */ int ret = 0; if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_PERFECT) { @@ -1047,6 +1131,22 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev, return -EINVAL; } + /* Check if there is the filter in SW list */ + memset(&check_filter, 0, sizeof(check_filter)); + i40e_fdir_filter_convert(filter, &check_filter); + node = i40e_sw_fdir_filter_lookup(fdir_info, &check_filter.fdir.input); + if (add && node) { + PMD_DRV_LOG(ERR, + "Conflict with existing flow director rules!"); + return -EINVAL; + } + + if (!add && !node) { + PMD_DRV_LOG(ERR, + "There's no corresponding flow firector filter!"); + return -EINVAL; + } + memset(pkt, 0, I40E_FDIR_PKT_LEN); ret = i40e_fdir_construct_pkt(pf, &filter->input, pkt); @@ -1054,13 +1154,32 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev, PMD_DRV_LOG(ERR, "construct packet for fdir fails."); return ret; } - pctype = i40e_flowtype_to_pctype(filter->input.flow_type); + + if (hw->mac.type == I40E_MAC_X722) { + /* get translated pctype value in fd pctype register */ + pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl( + hw, I40E_GLQF_FD_PCTYPES( + (int)i40e_flowtype_to_pctype( + filter->input.flow_type))); + } else + pctype = i40e_flowtype_to_pctype(filter->input.flow_type); + ret = i40e_fdir_filter_programming(pf, pctype, filter, add); if (ret < 0) { PMD_DRV_LOG(ERR, "fdir programming fails for PCTYPE(%u).", pctype); return ret; } + + if (add) { + fdir_filter = rte_zmalloc("fdir_filter", + sizeof(*fdir_filter), 0); + rte_memcpy(fdir_filter, &check_filter, sizeof(check_filter)); + ret = i40e_sw_fdir_filter_insert(pf, fdir_filter); + } else { + ret = i40e_sw_fdir_filter_del(pf, &node->fdir.input); + } + return ret; } @@ -1153,12 +1272,8 @@ i40e_fdir_filter_programming(struct i40e_pf *pf, fdirdp->dtype_cmd_cntindex |= rte_cpu_to_le_32(I40E_TXD_FLTR_QW1_CNT_ENA_MASK); fdirdp->dtype_cmd_cntindex |= -#ifdef TREX_PATCH - rte_cpu_to_le_32((fdir_action->stat_count_index << -#else rte_cpu_to_le_32( ((uint32_t)pf->fdir.match_counter_index << -#endif I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_MASK); @@ -1182,17 +1297,11 @@ i40e_fdir_filter_programming(struct i40e_pf *pf, I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); for (i = 0; i < I40E_FDIR_WAIT_COUNT; i++) { -#ifndef TREX_PATCH - /* itay: moved this delay after the check to avoid first check */ rte_delay_us(I40E_FDIR_WAIT_INTERVAL_US); -#endif if ((txdp->cmd_type_offset_bsz & rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) == rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) break; -#ifdef TREX_PATCH - rte_delay_us(I40E_FDIR_WAIT_INTERVAL_US); -#endif } if (i >= I40E_FDIR_WAIT_COUNT) { PMD_DRV_LOG(ERR, "Failed to program FDIR filter:" @@ -1200,10 +1309,7 @@ i40e_fdir_filter_programming(struct i40e_pf *pf, return -ETIMEDOUT; } /* totally delay 10 ms to check programming status*/ -#ifndef TREX_PATCH - /* itay: tests show this is not needed */ rte_delay_us((I40E_FDIR_WAIT_COUNT - i) * I40E_FDIR_WAIT_INTERVAL_US); -#endif if (i40e_check_fdir_programming_status(rxq) < 0) { PMD_DRV_LOG(ERR, "Failed to program FDIR filter:" " programming status reported."); @@ -1217,7 +1323,7 @@ i40e_fdir_filter_programming(struct i40e_pf *pf, * i40e_fdir_flush - clear all filters of Flow Director table * @pf: board private structure */ -static int +int i40e_fdir_flush(struct rte_eth_dev *dev) { struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); @@ -1296,6 +1402,7 @@ i40e_fdir_info_get_flex_mask(struct i40e_pf *pf, { struct i40e_fdir_flex_mask *mask; struct rte_eth_fdir_flex_mask *ptr = flex_mask; + struct i40e_hw *hw = I40E_PF_TO_HW(pf); uint16_t flow_type; uint8_t i, j; uint16_t off_bytes, mask_tmp; @@ -1304,8 +1411,13 @@ i40e_fdir_info_get_flex_mask(struct i40e_pf *pf, i <= I40E_FILTER_PCTYPE_L2_PAYLOAD; i++) { mask = &pf->fdir.flex_mask[i]; - if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)i)) - continue; + if (hw->mac.type == I40E_MAC_X722) { + if (!I40E_VALID_PCTYPE_X722((enum i40e_filter_pctype)i)) + continue; + } else { + if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)i)) + continue; + } flow_type = i40e_pctype_to_flowtype((enum i40e_filter_pctype)i); for (j = 0; j < I40E_FDIR_MAX_FLEXWORD_NUM; j++) { if (mask->word_mask & I40E_FLEX_WORD_MASK(j)) { @@ -1472,3 +1584,34 @@ i40e_fdir_ctrl_func(struct rte_eth_dev *dev, } return ret; } + +/* Restore flow director filter */ +void +i40e_fdir_filter_restore(struct i40e_pf *pf) +{ + struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(pf->main_vsi); + struct i40e_fdir_filter_list *fdir_list = &pf->fdir.fdir_list; + struct i40e_fdir_filter *f; +#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + uint32_t fdstat; + uint32_t guarant_cnt; /**< Number of filters in guaranteed spaces. */ + uint32_t best_cnt; /**< Number of filters in best effort spaces. */ +#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */ + + TAILQ_FOREACH(f, fdir_list, rules) + i40e_add_del_fdir_filter(dev, &f->fdir, TRUE); + +#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER + fdstat = I40E_READ_REG(hw, I40E_PFQF_FDSTAT); + guarant_cnt = + (uint32_t)((fdstat & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK) >> + I40E_PFQF_FDSTAT_GUARANT_CNT_SHIFT); + best_cnt = + (uint32_t)((fdstat & I40E_PFQF_FDSTAT_BEST_CNT_MASK) >> + I40E_PFQF_FDSTAT_BEST_CNT_SHIFT); +#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */ + + PMD_DRV_LOG(INFO, "FDIR: Guarant count: %d, Best count: %d", + guarant_cnt, best_cnt); +} diff --git a/src/dpdk/drivers/net/i40e/i40e_flow.c b/src/dpdk/drivers/net/i40e/i40e_flow.c new file mode 100644 index 00000000..76bb3320 --- /dev/null +++ b/src/dpdk/drivers/net/i40e/i40e_flow.c @@ -0,0 +1,1849 @@ +/*- + * BSD LICENSE + * + * Copyright (c) 2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i40e_logs.h" +#include "base/i40e_type.h" +#include "base/i40e_prototype.h" +#include "i40e_ethdev.h" + +#define I40E_IPV4_TC_SHIFT 4 +#define I40E_IPV6_TC_MASK (0x00FF << I40E_IPV4_TC_SHIFT) +#define I40E_IPV6_FRAG_HEADER 44 +#define I40E_TENANT_ARRAY_NUM 3 +#define I40E_TCI_MASK 0xFFFF + +static int i40e_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +static struct rte_flow *i40e_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +static int i40e_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error); +static int i40e_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error); +static int +i40e_flow_parse_ethertype_pattern(struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_ethertype_filter *filter); +static int i40e_flow_parse_ethertype_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_ethertype_filter *filter); +static int i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_fdir_filter *filter); +static int i40e_flow_parse_fdir_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_fdir_filter *filter); +static int i40e_flow_parse_tunnel_pattern(__rte_unused struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter); +static int i40e_flow_parse_tunnel_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter); +static int i40e_flow_parse_attr(const struct rte_flow_attr *attr, + struct rte_flow_error *error); +static int i40e_flow_parse_ethertype_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter); +static int i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter); +static int i40e_flow_parse_tunnel_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter); +static int i40e_flow_destroy_ethertype_filter(struct i40e_pf *pf, + struct i40e_ethertype_filter *filter); +static int i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf, + struct i40e_tunnel_filter *filter); +static int i40e_flow_flush_fdir_filter(struct i40e_pf *pf); +static int i40e_flow_flush_ethertype_filter(struct i40e_pf *pf); +static int i40e_flow_flush_tunnel_filter(struct i40e_pf *pf); + +const struct rte_flow_ops i40e_flow_ops = { + .validate = i40e_flow_validate, + .create = i40e_flow_create, + .destroy = i40e_flow_destroy, + .flush = i40e_flow_flush, +}; + +union i40e_filter_t cons_filter; +enum rte_filter_type cons_filter_type = RTE_ETH_FILTER_NONE; + +/* Pattern matched ethertype filter */ +static enum rte_flow_item_type pattern_ethertype[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_END, +}; + +/* Pattern matched flow director filter */ +static enum rte_flow_item_type pattern_fdir_ipv4[] = { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_udp[] = { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_udp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_tcp[] = { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_TCP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_tcp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_TCP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_sctp[] = { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_SCTP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_sctp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_SCTP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6[] = { + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_udp[] = { + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_udp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_tcp[] = { + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_TCP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_tcp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_TCP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_sctp[] = { + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_SCTP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_sctp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_SCTP, + RTE_FLOW_ITEM_TYPE_END, +}; + +/* Pattern matched tunnel filter */ +static enum rte_flow_item_type pattern_vxlan_1[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_vxlan_2[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_vxlan_3[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_vxlan_4[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, +}; + +static struct i40e_valid_pattern i40e_supported_patterns[] = { + /* Ethertype */ + { pattern_ethertype, i40e_flow_parse_ethertype_filter }, + /* FDIR */ + { pattern_fdir_ipv4, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_udp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_udp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_tcp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_tcp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_sctp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_sctp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_udp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_udp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_tcp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_tcp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_sctp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_sctp_ext, i40e_flow_parse_fdir_filter }, + /* tunnel */ + { pattern_vxlan_1, i40e_flow_parse_tunnel_filter }, + { pattern_vxlan_2, i40e_flow_parse_tunnel_filter }, + { pattern_vxlan_3, i40e_flow_parse_tunnel_filter }, + { pattern_vxlan_4, i40e_flow_parse_tunnel_filter }, +}; + +#define NEXT_ITEM_OF_ACTION(act, actions, index) \ + do { \ + act = actions + index; \ + while (act->type == RTE_FLOW_ACTION_TYPE_VOID) { \ + index++; \ + act = actions + index; \ + } \ + } while (0) + +/* Find the first VOID or non-VOID item pointer */ +static const struct rte_flow_item * +i40e_find_first_item(const struct rte_flow_item *item, bool is_void) +{ + bool is_find; + + while (item->type != RTE_FLOW_ITEM_TYPE_END) { + if (is_void) + is_find = item->type == RTE_FLOW_ITEM_TYPE_VOID; + else + is_find = item->type != RTE_FLOW_ITEM_TYPE_VOID; + if (is_find) + break; + item++; + } + return item; +} + +/* Skip all VOID items of the pattern */ +static void +i40e_pattern_skip_void_item(struct rte_flow_item *items, + const struct rte_flow_item *pattern) +{ + uint32_t cpy_count = 0; + const struct rte_flow_item *pb = pattern, *pe = pattern; + + for (;;) { + /* Find a non-void item first */ + pb = i40e_find_first_item(pb, false); + if (pb->type == RTE_FLOW_ITEM_TYPE_END) { + pe = pb; + break; + } + + /* Find a void item */ + pe = i40e_find_first_item(pb + 1, true); + + cpy_count = pe - pb; + rte_memcpy(items, pb, sizeof(struct rte_flow_item) * cpy_count); + + items += cpy_count; + + if (pe->type == RTE_FLOW_ITEM_TYPE_END) { + pb = pe; + break; + } + + pb = pe + 1; + } + /* Copy the END item. */ + rte_memcpy(items, pe, sizeof(struct rte_flow_item)); +} + +/* Check if the pattern matches a supported item type array */ +static bool +i40e_match_pattern(enum rte_flow_item_type *item_array, + struct rte_flow_item *pattern) +{ + struct rte_flow_item *item = pattern; + + while ((*item_array == item->type) && + (*item_array != RTE_FLOW_ITEM_TYPE_END)) { + item_array++; + item++; + } + + return (*item_array == RTE_FLOW_ITEM_TYPE_END && + item->type == RTE_FLOW_ITEM_TYPE_END); +} + +/* Find if there's parse filter function matched */ +static parse_filter_t +i40e_find_parse_filter_func(struct rte_flow_item *pattern) +{ + parse_filter_t parse_filter = NULL; + uint8_t i = 0; + + for (; i < RTE_DIM(i40e_supported_patterns); i++) { + if (i40e_match_pattern(i40e_supported_patterns[i].items, + pattern)) { + parse_filter = i40e_supported_patterns[i].parse_filter; + break; + } + } + + return parse_filter; +} + +/* Parse attributes */ +static int +i40e_flow_parse_attr(const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + /* Must be input direction */ + if (!attr->ingress) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* Not supported */ + if (attr->egress) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* Not supported */ + if (attr->priority) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + /* Not supported */ + if (attr->group) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + attr, "Not support group."); + return -rte_errno; + } + + return 0; +} + +static uint16_t +i40e_get_outer_vlan(struct rte_eth_dev *dev) +{ + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + int qinq = dev->data->dev_conf.rxmode.hw_vlan_extend; + uint64_t reg_r = 0; + uint16_t reg_id; + uint16_t tpid; + + if (qinq) + reg_id = 2; + else + reg_id = 3; + + i40e_aq_debug_read_register(hw, I40E_GL_SWT_L2TAGCTRL(reg_id), + ®_r, NULL); + + tpid = (reg_r >> I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT) & 0xFFFF; + + return tpid; +} + +/* 1. Last in item should be NULL as range is not supported. + * 2. Supported filter types: MAC_ETHTYPE and ETHTYPE. + * 3. SRC mac_addr mask should be 00:00:00:00:00:00. + * 4. DST mac_addr mask should be 00:00:00:00:00:00 or + * FF:FF:FF:FF:FF:FF + * 5. Ether_type mask should be 0xFFFF. + */ +static int +i40e_flow_parse_ethertype_pattern(struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_ethertype_filter *filter) +{ + const struct rte_flow_item *item = pattern; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + enum rte_flow_item_type item_type; + uint16_t outer_tpid; + + outer_tpid = i40e_get_outer_vlan(dev); + + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Not support range"); + return -rte_errno; + } + item_type = item->type; + switch (item_type) { + case RTE_FLOW_ITEM_TYPE_ETH: + eth_spec = (const struct rte_flow_item_eth *)item->spec; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + /* Get the MAC info. */ + if (!eth_spec || !eth_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL ETH spec/mask"); + return -rte_errno; + } + + /* Mask bits of source MAC address must be full of 0. + * Mask bits of destination MAC address must be full + * of 1 or full of 0. + */ + if (!is_zero_ether_addr(ð_mask->src) || + (!is_zero_ether_addr(ð_mask->dst) && + !is_broadcast_ether_addr(ð_mask->dst))) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid MAC_addr mask"); + return -rte_errno; + } + + if ((eth_mask->type & UINT16_MAX) != UINT16_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid ethertype mask"); + return -rte_errno; + } + + /* If mask bits of destination MAC address + * are full of 1, set RTE_ETHTYPE_FLAGS_MAC. + */ + if (is_broadcast_ether_addr(ð_mask->dst)) { + filter->mac_addr = eth_spec->dst; + filter->flags |= RTE_ETHTYPE_FLAGS_MAC; + } else { + filter->flags &= ~RTE_ETHTYPE_FLAGS_MAC; + } + filter->ether_type = rte_be_to_cpu_16(eth_spec->type); + + if (filter->ether_type == ETHER_TYPE_IPv4 || + filter->ether_type == ETHER_TYPE_IPv6 || + filter->ether_type == outer_tpid) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Unsupported ether_type in" + " control packet filter."); + return -rte_errno; + } + break; + default: + break; + } + } + + return 0; +} + +/* Ethertype action only supports QUEUE or DROP. */ +static int +i40e_flow_parse_ethertype_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_ethertype_filter *filter) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + uint32_t index = 0; + + /* Check if the first non-void action is QUEUE or DROP. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE && + act->type != RTE_FLOW_ACTION_TYPE_DROP) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->queue = act_q->index; + if (filter->queue >= pf->dev_data->nb_rx_queues) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid queue ID for" + " ethertype_filter."); + return -rte_errno; + } + } else { + filter->flags |= RTE_ETHTYPE_FLAGS_DROP; + } + + /* Check if the next non-void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + return 0; +} + +static int +i40e_flow_parse_ethertype_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter) +{ + struct rte_eth_ethertype_filter *ethertype_filter = + &filter->ethertype_filter; + int ret; + + ret = i40e_flow_parse_ethertype_pattern(dev, pattern, error, + ethertype_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_ethertype_action(dev, actions, error, + ethertype_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_attr(attr, error); + if (ret) + return ret; + + cons_filter_type = RTE_ETH_FILTER_ETHERTYPE; + + return ret; +} + +/* 1. Last in item should be NULL as range is not supported. + * 2. Supported flow type and input set: refer to array + * default_inset_table in i40e_ethdev.c. + * 3. Mask of fields which need to be matched should be + * filled with 1. + * 4. Mask of fields which needn't to be matched should be + * filled with 0. + */ +static int +i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_fdir_filter *filter) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + const struct rte_flow_item *item = pattern; + const struct rte_flow_item_eth *eth_spec, *eth_mask; + const struct rte_flow_item_ipv4 *ipv4_spec, *ipv4_mask; + const struct rte_flow_item_ipv6 *ipv6_spec, *ipv6_mask; + const struct rte_flow_item_tcp *tcp_spec, *tcp_mask; + const struct rte_flow_item_udp *udp_spec, *udp_mask; + const struct rte_flow_item_sctp *sctp_spec, *sctp_mask; + const struct rte_flow_item_vf *vf_spec; + uint32_t flow_type = RTE_ETH_FLOW_UNKNOWN; + enum i40e_filter_pctype pctype; + uint64_t input_set = I40E_INSET_NONE; + uint16_t flag_offset; + enum rte_flow_item_type item_type; + enum rte_flow_item_type l3 = RTE_FLOW_ITEM_TYPE_END; + uint32_t j; + + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Not support range"); + return -rte_errno; + } + item_type = item->type; + switch (item_type) { + case RTE_FLOW_ITEM_TYPE_ETH: + eth_spec = (const struct rte_flow_item_eth *)item->spec; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + if (eth_spec || eth_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid ETH spec/mask"); + return -rte_errno; + } + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + l3 = RTE_FLOW_ITEM_TYPE_IPV4; + ipv4_spec = + (const struct rte_flow_item_ipv4 *)item->spec; + ipv4_mask = + (const struct rte_flow_item_ipv4 *)item->mask; + if (!ipv4_spec || !ipv4_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL IPv4 spec/mask"); + return -rte_errno; + } + + /* Check IPv4 mask and update input set */ + if (ipv4_mask->hdr.version_ihl || + ipv4_mask->hdr.total_length || + ipv4_mask->hdr.packet_id || + ipv4_mask->hdr.fragment_offset || + ipv4_mask->hdr.hdr_checksum) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid IPv4 mask."); + return -rte_errno; + } + + if (ipv4_mask->hdr.src_addr == UINT32_MAX) + input_set |= I40E_INSET_IPV4_SRC; + if (ipv4_mask->hdr.dst_addr == UINT32_MAX) + input_set |= I40E_INSET_IPV4_DST; + if (ipv4_mask->hdr.type_of_service == UINT8_MAX) + input_set |= I40E_INSET_IPV4_TOS; + if (ipv4_mask->hdr.time_to_live == UINT8_MAX) + input_set |= I40E_INSET_IPV4_TTL; + if (ipv4_mask->hdr.next_proto_id == UINT8_MAX) + input_set |= I40E_INSET_IPV4_PROTO; + + /* Get filter info */ + flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_OTHER; + /* Check if it is fragment. */ + flag_offset = + rte_be_to_cpu_16(ipv4_spec->hdr.fragment_offset); + if (flag_offset & IPV4_HDR_OFFSET_MASK || + flag_offset & IPV4_HDR_MF_FLAG) + flow_type = RTE_ETH_FLOW_FRAG_IPV4; + + /* Get the filter info */ + filter->input.flow.ip4_flow.proto = + ipv4_spec->hdr.next_proto_id; + filter->input.flow.ip4_flow.tos = + ipv4_spec->hdr.type_of_service; + filter->input.flow.ip4_flow.ttl = + ipv4_spec->hdr.time_to_live; + filter->input.flow.ip4_flow.src_ip = + ipv4_spec->hdr.src_addr; + filter->input.flow.ip4_flow.dst_ip = + ipv4_spec->hdr.dst_addr; + + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + l3 = RTE_FLOW_ITEM_TYPE_IPV6; + ipv6_spec = + (const struct rte_flow_item_ipv6 *)item->spec; + ipv6_mask = + (const struct rte_flow_item_ipv6 *)item->mask; + if (!ipv6_spec || !ipv6_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL IPv6 spec/mask"); + return -rte_errno; + } + + /* Check IPv6 mask and update input set */ + if (ipv6_mask->hdr.payload_len) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid IPv6 mask"); + return -rte_errno; + } + + /* SCR and DST address of IPv6 shouldn't be masked */ + for (j = 0; j < RTE_DIM(ipv6_mask->hdr.src_addr); j++) { + if (ipv6_mask->hdr.src_addr[j] != UINT8_MAX || + ipv6_mask->hdr.dst_addr[j] != UINT8_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid IPv6 mask"); + return -rte_errno; + } + } + + input_set |= I40E_INSET_IPV6_SRC; + input_set |= I40E_INSET_IPV6_DST; + + if ((ipv6_mask->hdr.vtc_flow & + rte_cpu_to_be_16(I40E_IPV6_TC_MASK)) + == rte_cpu_to_be_16(I40E_IPV6_TC_MASK)) + input_set |= I40E_INSET_IPV6_TC; + if (ipv6_mask->hdr.proto == UINT8_MAX) + input_set |= I40E_INSET_IPV6_NEXT_HDR; + if (ipv6_mask->hdr.hop_limits == UINT8_MAX) + input_set |= I40E_INSET_IPV6_HOP_LIMIT; + + /* Get filter info */ + filter->input.flow.ipv6_flow.tc = + (uint8_t)(ipv6_spec->hdr.vtc_flow << + I40E_IPV4_TC_SHIFT); + filter->input.flow.ipv6_flow.proto = + ipv6_spec->hdr.proto; + filter->input.flow.ipv6_flow.hop_limits = + ipv6_spec->hdr.hop_limits; + + rte_memcpy(filter->input.flow.ipv6_flow.src_ip, + ipv6_spec->hdr.src_addr, 16); + rte_memcpy(filter->input.flow.ipv6_flow.dst_ip, + ipv6_spec->hdr.dst_addr, 16); + + /* Check if it is fragment. */ + if (ipv6_spec->hdr.proto == I40E_IPV6_FRAG_HEADER) + flow_type = RTE_ETH_FLOW_FRAG_IPV6; + else + flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_OTHER; + break; + case RTE_FLOW_ITEM_TYPE_TCP: + tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + if (!tcp_spec || !tcp_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL TCP spec/mask"); + return -rte_errno; + } + + /* Check TCP mask and update input set */ + if (tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || + tcp_mask->hdr.data_off || + tcp_mask->hdr.tcp_flags || + tcp_mask->hdr.rx_win || + tcp_mask->hdr.cksum || + tcp_mask->hdr.tcp_urp) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid TCP mask"); + return -rte_errno; + } + + if (tcp_mask->hdr.src_port != UINT16_MAX || + tcp_mask->hdr.dst_port != UINT16_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid TCP mask"); + return -rte_errno; + } + + input_set |= I40E_INSET_SRC_PORT; + input_set |= I40E_INSET_DST_PORT; + + /* Get filter info */ + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) + flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_TCP; + else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) + flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_TCP; + + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) { + filter->input.flow.tcp4_flow.src_port = + tcp_spec->hdr.src_port; + filter->input.flow.tcp4_flow.dst_port = + tcp_spec->hdr.dst_port; + } else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) { + filter->input.flow.tcp6_flow.src_port = + tcp_spec->hdr.src_port; + filter->input.flow.tcp6_flow.dst_port = + tcp_spec->hdr.dst_port; + } + break; + case RTE_FLOW_ITEM_TYPE_UDP: + udp_spec = (const struct rte_flow_item_udp *)item->spec; + udp_mask = (const struct rte_flow_item_udp *)item->mask; + if (!udp_spec || !udp_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL UDP spec/mask"); + return -rte_errno; + } + + /* Check UDP mask and update input set*/ + if (udp_mask->hdr.dgram_len || + udp_mask->hdr.dgram_cksum) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid UDP mask"); + return -rte_errno; + } + + if (udp_mask->hdr.src_port != UINT16_MAX || + udp_mask->hdr.dst_port != UINT16_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid UDP mask"); + return -rte_errno; + } + + input_set |= I40E_INSET_SRC_PORT; + input_set |= I40E_INSET_DST_PORT; + + /* Get filter info */ + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) + flow_type = + RTE_ETH_FLOW_NONFRAG_IPV4_UDP; + else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) + flow_type = + RTE_ETH_FLOW_NONFRAG_IPV6_UDP; + + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) { + filter->input.flow.udp4_flow.src_port = + udp_spec->hdr.src_port; + filter->input.flow.udp4_flow.dst_port = + udp_spec->hdr.dst_port; + } else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) { + filter->input.flow.udp6_flow.src_port = + udp_spec->hdr.src_port; + filter->input.flow.udp6_flow.dst_port = + udp_spec->hdr.dst_port; + } + break; + case RTE_FLOW_ITEM_TYPE_SCTP: + sctp_spec = + (const struct rte_flow_item_sctp *)item->spec; + sctp_mask = + (const struct rte_flow_item_sctp *)item->mask; + if (!sctp_spec || !sctp_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL SCTP spec/mask"); + return -rte_errno; + } + + /* Check SCTP mask and update input set */ + if (sctp_mask->hdr.cksum) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid UDP mask"); + return -rte_errno; + } + + if (sctp_mask->hdr.src_port != UINT16_MAX || + sctp_mask->hdr.dst_port != UINT16_MAX || + sctp_mask->hdr.tag != UINT32_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid UDP mask"); + return -rte_errno; + } + input_set |= I40E_INSET_SRC_PORT; + input_set |= I40E_INSET_DST_PORT; + input_set |= I40E_INSET_SCTP_VT; + + /* Get filter info */ + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) + flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_SCTP; + else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) + flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_SCTP; + + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) { + filter->input.flow.sctp4_flow.src_port = + sctp_spec->hdr.src_port; + filter->input.flow.sctp4_flow.dst_port = + sctp_spec->hdr.dst_port; + filter->input.flow.sctp4_flow.verify_tag = + sctp_spec->hdr.tag; + } else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) { + filter->input.flow.sctp6_flow.src_port = + sctp_spec->hdr.src_port; + filter->input.flow.sctp6_flow.dst_port = + sctp_spec->hdr.dst_port; + filter->input.flow.sctp6_flow.verify_tag = + sctp_spec->hdr.tag; + } + break; + case RTE_FLOW_ITEM_TYPE_VF: + vf_spec = (const struct rte_flow_item_vf *)item->spec; + filter->input.flow_ext.is_vf = 1; + filter->input.flow_ext.dst_id = vf_spec->id; + if (filter->input.flow_ext.is_vf && + filter->input.flow_ext.dst_id >= pf->vf_num) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid VF ID for FDIR."); + return -rte_errno; + } + break; + default: + break; + } + } + + pctype = i40e_flowtype_to_pctype(flow_type); + if (pctype == 0 || pctype > I40E_FILTER_PCTYPE_L2_PAYLOAD) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "Unsupported flow type"); + return -rte_errno; + } + + if (input_set != i40e_get_default_input_set(pctype)) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "Invalid input set."); + return -rte_errno; + } + filter->input.flow_type = flow_type; + + return 0; +} + +/* Parse to get the action info of a FDIR filter. + * FDIR action supports QUEUE or (QUEUE + MARK). + */ +static int +i40e_flow_parse_fdir_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_fdir_filter *filter) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + const struct rte_flow_action_mark *mark_spec; + uint32_t index = 0; + + /* Check if the first non-void action is QUEUE or DROP. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE && + act->type != RTE_FLOW_ACTION_TYPE_DROP) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid action."); + return -rte_errno; + } + + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->action.flex_off = 0; + if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) + filter->action.behavior = RTE_ETH_FDIR_ACCEPT; + else + filter->action.behavior = RTE_ETH_FDIR_REJECT; + + filter->action.report_status = RTE_ETH_FDIR_REPORT_ID; + filter->action.rx_queue = act_q->index; + + if (filter->action.rx_queue >= pf->dev_data->nb_rx_queues) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, act, + "Invalid queue ID for FDIR."); + return -rte_errno; + } + + /* Check if the next non-void item is MARK or END. */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_MARK && + act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid action."); + return -rte_errno; + } + + if (act->type == RTE_FLOW_ACTION_TYPE_MARK) { + mark_spec = (const struct rte_flow_action_mark *)act->conf; + filter->soft_id = mark_spec->id; + + /* Check if the next non-void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid action."); + return -rte_errno; + } + } + + return 0; +} + +static int +i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter) +{ + struct rte_eth_fdir_filter *fdir_filter = + &filter->fdir_filter; + int ret; + + ret = i40e_flow_parse_fdir_pattern(dev, pattern, error, fdir_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_fdir_action(dev, actions, error, fdir_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_attr(attr, error); + if (ret) + return ret; + + cons_filter_type = RTE_ETH_FILTER_FDIR; + + if (dev->data->dev_conf.fdir_conf.mode != + RTE_FDIR_MODE_PERFECT) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "Check the mode in fdir_conf."); + return -rte_errno; + } + + return 0; +} + +/* Parse to get the action info of a tunnle filter + * Tunnel action only supports QUEUE. + */ +static int +i40e_flow_parse_tunnel_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + uint32_t index = 0; + + /* Check if the first non-void action is QUEUE. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->queue_id = act_q->index; + if (filter->queue_id >= pf->dev_data->nb_rx_queues) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid queue ID for tunnel filter"); + return -rte_errno; + } + + /* Check if the next non-void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + return 0; +} + +static int +i40e_check_tenant_id_mask(const uint8_t *mask) +{ + uint32_t j; + int is_masked = 0; + + for (j = 0; j < I40E_TENANT_ARRAY_NUM; j++) { + if (*(mask + j) == UINT8_MAX) { + if (j > 0 && (*(mask + j) != *(mask + j - 1))) + return -EINVAL; + is_masked = 0; + } else if (*(mask + j) == 0) { + if (j > 0 && (*(mask + j) != *(mask + j - 1))) + return -EINVAL; + is_masked = 1; + } else { + return -EINVAL; + } + } + + return is_masked; +} + +/* 1. Last in item should be NULL as range is not supported. + * 2. Supported filter types: IMAC_IVLAN_TENID, IMAC_IVLAN, + * IMAC_TENID, OMAC_TENID_IMAC and IMAC. + * 3. Mask of fields which need to be matched should be + * filled with 1. + * 4. Mask of fields which needn't to be matched should be + * filled with 0. + */ +static int +i40e_flow_parse_vxlan_pattern(const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter) +{ + const struct rte_flow_item *item = pattern; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + const struct rte_flow_item_eth *o_eth_spec = NULL; + const struct rte_flow_item_eth *o_eth_mask = NULL; + const struct rte_flow_item_vxlan *vxlan_spec = NULL; + const struct rte_flow_item_vxlan *vxlan_mask = NULL; + const struct rte_flow_item_eth *i_eth_spec = NULL; + const struct rte_flow_item_eth *i_eth_mask = NULL; + const struct rte_flow_item_vlan *vlan_spec = NULL; + const struct rte_flow_item_vlan *vlan_mask = NULL; + bool is_vni_masked = 0; + enum rte_flow_item_type item_type; + bool vxlan_flag = 0; + + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Not support range"); + return -rte_errno; + } + item_type = item->type; + switch (item_type) { + case RTE_FLOW_ITEM_TYPE_ETH: + eth_spec = (const struct rte_flow_item_eth *)item->spec; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + if ((!eth_spec && eth_mask) || + (eth_spec && !eth_mask)) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid ether spec/mask"); + return -rte_errno; + } + + if (eth_spec && eth_mask) { + /* DST address of inner MAC shouldn't be masked. + * SRC address of Inner MAC should be masked. + */ + if (!is_broadcast_ether_addr(ð_mask->dst) || + !is_zero_ether_addr(ð_mask->src) || + eth_mask->type) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid ether spec/mask"); + return -rte_errno; + } + + if (!vxlan_flag) + rte_memcpy(&filter->outer_mac, + ð_spec->dst, + ETHER_ADDR_LEN); + else + rte_memcpy(&filter->inner_mac, + ð_spec->dst, + ETHER_ADDR_LEN); + } + + if (!vxlan_flag) { + o_eth_spec = eth_spec; + o_eth_mask = eth_mask; + } else { + i_eth_spec = eth_spec; + i_eth_mask = eth_mask; + } + + break; + case RTE_FLOW_ITEM_TYPE_VLAN: + vlan_spec = + (const struct rte_flow_item_vlan *)item->spec; + vlan_mask = + (const struct rte_flow_item_vlan *)item->mask; + if (vxlan_flag) { + vlan_spec = + (const struct rte_flow_item_vlan *)item->spec; + vlan_mask = + (const struct rte_flow_item_vlan *)item->mask; + if (!(vlan_spec && vlan_mask)) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid vlan item"); + return -rte_errno; + } + } else { + if (vlan_spec || vlan_mask) + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid vlan item"); + return -rte_errno; + } + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + case RTE_FLOW_ITEM_TYPE_IPV6: + case RTE_FLOW_ITEM_TYPE_UDP: + /* IPv4/IPv6/UDP are used to describe protocol, + * spec amd mask should be NULL. + */ + if (item->spec || item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid IPv4 item"); + return -rte_errno; + } + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + vxlan_spec = + (const struct rte_flow_item_vxlan *)item->spec; + vxlan_mask = + (const struct rte_flow_item_vxlan *)item->mask; + /* Check if VXLAN item is used to describe protocol. + * If yes, both spec and mask should be NULL. + * If no, either spec or mask shouldn't be NULL. + */ + if ((!vxlan_spec && vxlan_mask) || + (vxlan_spec && !vxlan_mask)) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid VXLAN item"); + return -rte_errno; + } + + /* Check if VNI is masked. */ + if (vxlan_mask) { + is_vni_masked = + i40e_check_tenant_id_mask(vxlan_mask->vni); + if (is_vni_masked < 0) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid VNI mask"); + return -rte_errno; + } + } + vxlan_flag = 1; + break; + default: + break; + } + } + + /* Check specification and mask to get the filter type */ + if (vlan_spec && vlan_mask && + (vlan_mask->tci == rte_cpu_to_be_16(I40E_TCI_MASK))) { + /* If there's inner vlan */ + filter->inner_vlan = rte_be_to_cpu_16(vlan_spec->tci) + & I40E_TCI_MASK; + if (vxlan_spec && vxlan_mask && !is_vni_masked) { + /* If there's vxlan */ + rte_memcpy(&filter->tenant_id, vxlan_spec->vni, + RTE_DIM(vxlan_spec->vni)); + if (!o_eth_spec && !o_eth_mask && + i_eth_spec && i_eth_mask) + filter->filter_type = + RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID; + else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, + "Invalid filter type"); + return -rte_errno; + } + } else if (!vxlan_spec && !vxlan_mask) { + /* If there's no vxlan */ + if (!o_eth_spec && !o_eth_mask && + i_eth_spec && i_eth_mask) + filter->filter_type = + RTE_TUNNEL_FILTER_IMAC_IVLAN; + else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, + "Invalid filter type"); + return -rte_errno; + } + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, + "Invalid filter type"); + return -rte_errno; + } + } else if ((!vlan_spec && !vlan_mask) || + (vlan_spec && vlan_mask && vlan_mask->tci == 0x0)) { + /* If there's no inner vlan */ + if (vxlan_spec && vxlan_mask && !is_vni_masked) { + /* If there's vxlan */ + rte_memcpy(&filter->tenant_id, vxlan_spec->vni, + RTE_DIM(vxlan_spec->vni)); + if (!o_eth_spec && !o_eth_mask && + i_eth_spec && i_eth_mask) + filter->filter_type = + RTE_TUNNEL_FILTER_IMAC_TENID; + else if (o_eth_spec && o_eth_mask && + i_eth_spec && i_eth_mask) + filter->filter_type = + RTE_TUNNEL_FILTER_OMAC_TENID_IMAC; + } else if (!vxlan_spec && !vxlan_mask) { + /* If there's no vxlan */ + if (!o_eth_spec && !o_eth_mask && + i_eth_spec && i_eth_mask) { + filter->filter_type = ETH_TUNNEL_FILTER_IMAC; + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Invalid filter type"); + return -rte_errno; + } + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Invalid filter type"); + return -rte_errno; + } + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Not supported by tunnel filter."); + return -rte_errno; + } + + filter->tunnel_type = RTE_TUNNEL_TYPE_VXLAN; + + return 0; +} + +static int +i40e_flow_parse_tunnel_pattern(__rte_unused struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter) +{ + int ret; + + ret = i40e_flow_parse_vxlan_pattern(pattern, error, filter); + + return ret; +} + +static int +i40e_flow_parse_tunnel_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter) +{ + struct rte_eth_tunnel_filter_conf *tunnel_filter = + &filter->tunnel_filter; + int ret; + + ret = i40e_flow_parse_tunnel_pattern(dev, pattern, + error, tunnel_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_tunnel_action(dev, actions, error, tunnel_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_attr(attr, error); + if (ret) + return ret; + + cons_filter_type = RTE_ETH_FILTER_TUNNEL; + + return ret; +} + +static int +i40e_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_flow_item *items; /* internal pattern w/o VOID items */ + parse_filter_t parse_filter; + uint32_t item_num = 0; /* non-void item number of pattern*/ + uint32_t i = 0; + int ret; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + memset(&cons_filter, 0, sizeof(cons_filter)); + + /* Get the non-void item number of pattern */ + while ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_END) { + if ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_VOID) + item_num++; + i++; + } + item_num++; + + items = rte_zmalloc("i40e_pattern", + item_num * sizeof(struct rte_flow_item), 0); + if (!items) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "No memory for PMD internal items."); + return -ENOMEM; + } + + i40e_pattern_skip_void_item(items, pattern); + + /* Find if there's matched parse filter function */ + parse_filter = i40e_find_parse_filter_func(items); + if (!parse_filter) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + pattern, "Unsupported pattern"); + return -rte_errno; + } + + ret = parse_filter(dev, attr, items, actions, error, &cons_filter); + + rte_free(items); + + return ret; +} + +static struct rte_flow * +i40e_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct rte_flow *flow; + int ret; + + flow = rte_zmalloc("i40e_flow", sizeof(struct rte_flow), 0); + if (!flow) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to allocate memory"); + return flow; + } + + ret = i40e_flow_validate(dev, attr, pattern, actions, error); + if (ret < 0) + return NULL; + + switch (cons_filter_type) { + case RTE_ETH_FILTER_ETHERTYPE: + ret = i40e_ethertype_filter_set(pf, + &cons_filter.ethertype_filter, 1); + if (ret) + goto free_flow; + flow->rule = TAILQ_LAST(&pf->ethertype.ethertype_list, + i40e_ethertype_filter_list); + break; + case RTE_ETH_FILTER_FDIR: + ret = i40e_add_del_fdir_filter(dev, + &cons_filter.fdir_filter, 1); + if (ret) + goto free_flow; + flow->rule = TAILQ_LAST(&pf->fdir.fdir_list, + i40e_fdir_filter_list); + break; + case RTE_ETH_FILTER_TUNNEL: + ret = i40e_dev_tunnel_filter_set(pf, + &cons_filter.tunnel_filter, 1); + if (ret) + goto free_flow; + flow->rule = TAILQ_LAST(&pf->tunnel.tunnel_list, + i40e_tunnel_filter_list); + break; + default: + goto free_flow; + } + + flow->filter_type = cons_filter_type; + TAILQ_INSERT_TAIL(&pf->flow_list, flow, node); + return flow; + +free_flow: + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to create flow."); + rte_free(flow); + return NULL; +} + +static int +i40e_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + enum rte_filter_type filter_type = flow->filter_type; + int ret = 0; + + switch (filter_type) { + case RTE_ETH_FILTER_ETHERTYPE: + ret = i40e_flow_destroy_ethertype_filter(pf, + (struct i40e_ethertype_filter *)flow->rule); + break; + case RTE_ETH_FILTER_TUNNEL: + ret = i40e_flow_destroy_tunnel_filter(pf, + (struct i40e_tunnel_filter *)flow->rule); + break; + case RTE_ETH_FILTER_FDIR: + ret = i40e_add_del_fdir_filter(dev, + &((struct i40e_fdir_filter *)flow->rule)->fdir, 0); + break; + default: + PMD_DRV_LOG(WARNING, "Filter type (%d) not supported", + filter_type); + ret = -EINVAL; + break; + } + + if (!ret) { + TAILQ_REMOVE(&pf->flow_list, flow, node); + rte_free(flow); + } else + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to destroy flow."); + + return ret; +} + +static int +i40e_flow_destroy_ethertype_filter(struct i40e_pf *pf, + struct i40e_ethertype_filter *filter) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype; + struct i40e_ethertype_filter *node; + struct i40e_control_filter_stats stats; + uint16_t flags = 0; + int ret = 0; + + if (!(filter->flags & RTE_ETHTYPE_FLAGS_MAC)) + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC; + if (filter->flags & RTE_ETHTYPE_FLAGS_DROP) + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP; + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE; + + memset(&stats, 0, sizeof(stats)); + ret = i40e_aq_add_rem_control_packet_filter(hw, + filter->input.mac_addr.addr_bytes, + filter->input.ether_type, + flags, pf->main_vsi->seid, + filter->queue, 0, &stats, NULL); + if (ret < 0) + return ret; + + node = i40e_sw_ethertype_filter_lookup(ethertype_rule, &filter->input); + if (!node) + return -EINVAL; + + ret = i40e_sw_ethertype_filter_del(pf, &node->input); + + return ret; +} + +static int +i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf, + struct i40e_tunnel_filter *filter) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_vsi *vsi = pf->main_vsi; + struct i40e_aqc_add_remove_cloud_filters_element_data cld_filter; + struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel; + struct i40e_tunnel_filter *node; + int ret = 0; + + memset(&cld_filter, 0, sizeof(cld_filter)); + ether_addr_copy((struct ether_addr *)&filter->input.outer_mac, + (struct ether_addr *)&cld_filter.outer_mac); + ether_addr_copy((struct ether_addr *)&filter->input.inner_mac, + (struct ether_addr *)&cld_filter.inner_mac); + cld_filter.inner_vlan = filter->input.inner_vlan; + cld_filter.flags = filter->input.flags; + cld_filter.tenant_id = filter->input.tenant_id; + cld_filter.queue_number = filter->queue; + + ret = i40e_aq_remove_cloud_filters(hw, vsi->seid, + &cld_filter, 1); + if (ret < 0) + return ret; + + node = i40e_sw_tunnel_filter_lookup(tunnel_rule, &filter->input); + if (!node) + return -EINVAL; + + ret = i40e_sw_tunnel_filter_del(pf, &node->input); + + return ret; +} + +static int +i40e_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + int ret; + + ret = i40e_flow_flush_fdir_filter(pf); + if (ret) { + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to flush FDIR flows."); + return -rte_errno; + } + + ret = i40e_flow_flush_ethertype_filter(pf); + if (ret) { + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to ethertype flush flows."); + return -rte_errno; + } + + ret = i40e_flow_flush_tunnel_filter(pf); + if (ret) { + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to flush tunnel flows."); + return -rte_errno; + } + + return ret; +} + +static int +i40e_flow_flush_fdir_filter(struct i40e_pf *pf) +{ + struct rte_eth_dev *dev = pf->adapter->eth_dev; + struct i40e_fdir_info *fdir_info = &pf->fdir; + struct i40e_fdir_filter *fdir_filter; + struct rte_flow *flow; + void *temp; + int ret; + + ret = i40e_fdir_flush(dev); + if (!ret) { + /* Delete FDIR filters in FDIR list. */ + while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) { + ret = i40e_sw_fdir_filter_del(pf, + &fdir_filter->fdir.input); + if (ret < 0) + return ret; + } + + /* Delete FDIR flows in flow list. */ + TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) { + if (flow->filter_type == RTE_ETH_FILTER_FDIR) { + TAILQ_REMOVE(&pf->flow_list, flow, node); + rte_free(flow); + } + } + } + + return ret; +} + +/* Flush all ethertype filters */ +static int +i40e_flow_flush_ethertype_filter(struct i40e_pf *pf) +{ + struct i40e_ethertype_filter_list + *ethertype_list = &pf->ethertype.ethertype_list; + struct i40e_ethertype_filter *filter; + struct rte_flow *flow; + void *temp; + int ret = 0; + + while ((filter = TAILQ_FIRST(ethertype_list))) { + ret = i40e_flow_destroy_ethertype_filter(pf, filter); + if (ret) + return ret; + } + + /* Delete ethertype flows in flow list. */ + TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) { + if (flow->filter_type == RTE_ETH_FILTER_ETHERTYPE) { + TAILQ_REMOVE(&pf->flow_list, flow, node); + rte_free(flow); + } + } + + return ret; +} + +/* Flush all tunnel filters */ +static int +i40e_flow_flush_tunnel_filter(struct i40e_pf *pf) +{ + struct i40e_tunnel_filter_list + *tunnel_list = &pf->tunnel.tunnel_list; + struct i40e_tunnel_filter *filter; + struct rte_flow *flow; + void *temp; + int ret = 0; + + while ((filter = TAILQ_FIRST(tunnel_list))) { + ret = i40e_flow_destroy_tunnel_filter(pf, filter); + if (ret) + return ret; + } + + /* Delete tunnel flows in flow list. */ + TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) { + if (flow->filter_type == RTE_ETH_FILTER_TUNNEL) { + TAILQ_REMOVE(&pf->flow_list, flow, node); + rte_free(flow); + } + } + + return ret; +} diff --git a/src/dpdk/drivers/net/i40e/i40e_pf.c b/src/dpdk/drivers/net/i40e/i40e_pf.c index d5b2d450..f771dfb6 100644 --- a/src/dpdk/drivers/net/i40e/i40e_pf.c +++ b/src/dpdk/drivers/net/i40e/i40e_pf.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,7 @@ #include "i40e_ethdev.h" #include "i40e_rxtx.h" #include "i40e_pf.h" +#include "rte_pmd_i40e.h" #define I40E_CFG_CRCSTRIP_DEFAULT 1 @@ -138,7 +139,7 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset) abs_vf_id = vf_id + hw->func_caps.vf_base_id; /* Notify VF that we are in VFR progress */ - I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_PF_VFR_INPROGRESS); + I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_INPROGRESS); /* * If require a SW VF reset, a VFLR interrupt will be generated, @@ -219,7 +220,7 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset) } /* Reset done, Set COMPLETE flag and clear reset bit */ - I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_PF_VFR_COMPLETED); + I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_COMPLETED); val = I40E_READ_REG(hw, I40E_VPGEN_VFRTRIG(vf_id)); val &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK; I40E_WRITE_REG(hw, I40E_VPGEN_VFRTRIG(vf_id), val); @@ -247,10 +248,12 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset) return -EFAULT; } + I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_VFACTIVE); + return ret; } -static int +int i40e_pf_host_send_msg_to_vf(struct i40e_pf_vf *vf, uint32_t opcode, uint32_t retval, @@ -272,14 +275,30 @@ i40e_pf_host_send_msg_to_vf(struct i40e_pf_vf *vf, } static void -i40e_pf_host_process_cmd_version(struct i40e_pf_vf *vf) +i40e_pf_host_process_cmd_version(struct i40e_pf_vf *vf, bool b_op) { struct i40e_virtchnl_version_info info; - info.major = I40E_DPDK_VERSION_MAJOR; - info.minor = I40E_DPDK_VERSION_MINOR; - i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION, - I40E_SUCCESS, (uint8_t *)&info, sizeof(info)); + /* Respond like a Linux PF host in order to support both DPDK VF and + * Linux VF driver. The expense is original DPDK host specific feature + * like CFG_VLAN_PVID and CONFIG_VSI_QUEUES_EXT will not available. + * + * DPDK VF also can't identify host driver by version number returned. + * It always assume talking with Linux PF. + */ + info.major = I40E_VIRTCHNL_VERSION_MAJOR; + info.minor = I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS; + + if (b_op) + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION, + I40E_SUCCESS, + (uint8_t *)&info, + sizeof(info)); + else + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION, + I40E_NOT_SUPPORTED, + (uint8_t *)&info, + sizeof(info)); } static int @@ -292,13 +311,20 @@ i40e_pf_host_process_cmd_reset_vf(struct i40e_pf_vf *vf) } static int -i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf) +i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf, bool b_op) { struct i40e_virtchnl_vf_resource *vf_res = NULL; struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); uint32_t len = 0; int ret = I40E_SUCCESS; + if (!b_op) { + i40e_pf_host_send_msg_to_vf(vf, + I40E_VIRTCHNL_OP_GET_VF_RESOURCES, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + /* only have 1 VSI by default */ len = sizeof(struct i40e_virtchnl_vf_resource) + I40E_DEFAULT_VF_VSI_NUM * @@ -321,8 +347,7 @@ i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf) /* Change below setting if PF host can support more VSIs for VF */ vf_res->vsi_res[0].vsi_type = I40E_VSI_SRIOV; - /* As assume Vf only has single VSI now, always return 0 */ - vf_res->vsi_res[0].vsi_id = 0; + vf_res->vsi_res[0].vsi_id = vf->vsi->vsi_id; vf_res->vsi_res[0].num_queue_pairs = vf->vsi->nb_qps; ether_addr_copy(&vf->mac_addr, (struct ether_addr *)vf_res->vsi_res[0].default_mac_addr); @@ -393,10 +418,12 @@ i40e_pf_host_hmc_config_txq(struct i40e_hw *hw, /* clear the context structure first */ memset(&tx_ctx, 0, sizeof(tx_ctx)); - tx_ctx.new_context = 1; tx_ctx.base = txq->dma_ring_addr / I40E_QUEUE_BASE_ADDR_UNIT; tx_ctx.qlen = txq->ring_len; tx_ctx.rdylist = rte_le_to_cpu_16(vf->vsi->info.qs_handle[0]); + tx_ctx.head_wb_ena = txq->headwb_enabled; + tx_ctx.head_wb_addr = txq->dma_headwb_addr; + err = i40e_clear_lan_tx_queue_context(hw, abs_queue_id); if (err != I40E_SUCCESS) return err; @@ -423,7 +450,8 @@ i40e_pf_host_hmc_config_txq(struct i40e_hw *hw, static int i40e_pf_host_process_cmd_config_vsi_queues(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); struct i40e_vsi *vsi = vf->vsi; @@ -432,11 +460,18 @@ i40e_pf_host_process_cmd_config_vsi_queues(struct i40e_pf_vf *vf, struct i40e_virtchnl_queue_pair_info *vc_qpi; int i, ret = I40E_SUCCESS; + if (!b_op) { + i40e_pf_host_send_msg_to_vf(vf, + I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (!msg || vc_vqci->num_queue_pairs > vsi->nb_qps || vc_vqci->num_queue_pairs > I40E_MAX_VSI_QP || msglen < I40E_VIRTCHNL_CONFIG_VSI_QUEUES_SIZE(vc_vqci, vc_vqci->num_queue_pairs)) { - PMD_DRV_LOG(ERR, "vsi_queue_config_info argument wrong\n"); + PMD_DRV_LOG(ERR, "vsi_queue_config_info argument wrong"); ret = I40E_ERR_PARAM; goto send_msg; } @@ -482,7 +517,8 @@ send_msg: static int i40e_pf_host_process_cmd_config_vsi_queues_ext(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); struct i40e_vsi *vsi = vf->vsi; @@ -491,11 +527,19 @@ i40e_pf_host_process_cmd_config_vsi_queues_ext(struct i40e_pf_vf *vf, struct i40e_virtchnl_queue_pair_ext_info *vc_qpei; int i, ret = I40E_SUCCESS; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (!msg || vc_vqcei->num_queue_pairs > vsi->nb_qps || vc_vqcei->num_queue_pairs > I40E_MAX_VSI_QP || msglen < I40E_VIRTCHNL_CONFIG_VSI_QUEUES_SIZE(vc_vqcei, vc_vqcei->num_queue_pairs)) { - PMD_DRV_LOG(ERR, "vsi_queue_config_ext_info argument wrong\n"); + PMD_DRV_LOG(ERR, "vsi_queue_config_ext_info argument wrong"); ret = I40E_ERR_PARAM; goto send_msg; } @@ -537,13 +581,125 @@ send_msg: return ret; } +static void +i40e_pf_config_irq_link_list(struct i40e_pf_vf *vf, + struct i40e_virtchnl_vector_map *vvm) +{ +#define BITS_PER_CHAR 8 + uint64_t linklistmap = 0, tempmap; + struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); + uint16_t qid; + bool b_first_q = true; + enum i40e_queue_type qtype; + uint16_t vector_id; + uint32_t reg, reg_idx; + uint16_t itr_idx = 0, i; + + vector_id = vvm->vector_id; + /* setup the head */ + if (!vector_id) + reg_idx = I40E_VPINT_LNKLST0(vf->vf_idx); + else + reg_idx = I40E_VPINT_LNKLSTN( + ((hw->func_caps.num_msix_vectors_vf - 1) * vf->vf_idx) + + (vector_id - 1)); + + if (vvm->rxq_map == 0 && vvm->txq_map == 0) { + I40E_WRITE_REG(hw, reg_idx, + I40E_VPINT_LNKLST0_FIRSTQ_INDX_MASK); + goto cfg_irq_done; + } + + /* sort all rx and tx queues */ + tempmap = vvm->rxq_map; + for (i = 0; i < sizeof(vvm->rxq_map) * BITS_PER_CHAR; i++) { + if (tempmap & 0x1) + linklistmap |= (1 << (2 * i)); + tempmap >>= 1; + } + + tempmap = vvm->txq_map; + for (i = 0; i < sizeof(vvm->txq_map) * BITS_PER_CHAR; i++) { + if (tempmap & 0x1) + linklistmap |= (1 << (2 * i + 1)); + tempmap >>= 1; + } + + /* Link all rx and tx queues into a chained list */ + tempmap = linklistmap; + i = 0; + b_first_q = true; + do { + if (tempmap & 0x1) { + qtype = (enum i40e_queue_type)(i % 2); + qid = vf->vsi->base_queue + i / 2; + if (b_first_q) { + /* This is header */ + b_first_q = false; + reg = ((qtype << + I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) + | qid); + } else { + /* element in the link list */ + reg = (vector_id) | + (qtype << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | + (qid << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | + BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) | + (itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT); + } + I40E_WRITE_REG(hw, reg_idx, reg); + /* find next register to program */ + switch (qtype) { + case I40E_QUEUE_TYPE_RX: + reg_idx = I40E_QINT_RQCTL(qid); + itr_idx = vvm->rxitr_idx; + break; + case I40E_QUEUE_TYPE_TX: + reg_idx = I40E_QINT_TQCTL(qid); + itr_idx = vvm->txitr_idx; + break; + default: + break; + } + } + i++; + tempmap >>= 1; + } while (tempmap); + + /* Terminate the link list */ + reg = (vector_id) | + (0 << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | + (0x7FF << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | + BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) | + (itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT); + I40E_WRITE_REG(hw, reg_idx, reg); + +cfg_irq_done: + I40E_WRITE_FLUSH(hw); +} + static int i40e_pf_host_process_cmd_config_irq_map(struct i40e_pf_vf *vf, - uint8_t *msg, uint16_t msglen) + uint8_t *msg, uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); struct i40e_virtchnl_irq_map_info *irqmap = (struct i40e_virtchnl_irq_map_info *)msg; + struct i40e_virtchnl_vector_map *map; + int i; + uint16_t vector_id; + unsigned long qbit_max; + + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } if (msg == NULL || msglen < sizeof(struct i40e_virtchnl_irq_map_info)) { PMD_DRV_LOG(ERR, "buffer too short"); @@ -551,23 +707,46 @@ i40e_pf_host_process_cmd_config_irq_map(struct i40e_pf_vf *vf, goto send_msg; } - /* Assume VF only have 1 vector to bind all queues */ - if (irqmap->num_vectors != 1) { - PMD_DRV_LOG(ERR, "DKDK host only support 1 vector"); - ret = I40E_ERR_PARAM; + /* PF host will support both DPDK VF or Linux VF driver, identify by + * number of vectors requested. + */ + + /* DPDK VF only requires single vector */ + if (irqmap->num_vectors == 1) { + /* This MSIX intr store the intr in VF range */ + vf->vsi->msix_intr = irqmap->vecmap[0].vector_id; + vf->vsi->nb_msix = irqmap->num_vectors; + vf->vsi->nb_used_qps = vf->vsi->nb_qps; + + /* Don't care how the TX/RX queue mapping with this vector. + * Link all VF RX queues together. Only did mapping work. + * VF can disable/enable the intr by itself. + */ + i40e_vsi_queues_bind_intr(vf->vsi); goto send_msg; } - /* This MSIX intr store the intr in VF range */ - vf->vsi->msix_intr = irqmap->vecmap[0].vector_id; - vf->vsi->nb_msix = irqmap->num_vectors; - vf->vsi->nb_used_qps = vf->vsi->nb_qps; + /* Then, it's Linux VF driver */ + qbit_max = 1 << pf->vf_nb_qp_max; + for (i = 0; i < irqmap->num_vectors; i++) { + map = &irqmap->vecmap[i]; + + vector_id = map->vector_id; + /* validate msg params */ + if (vector_id >= hw->func_caps.num_msix_vectors_vf) { + ret = I40E_ERR_PARAM; + goto send_msg; + } + + if ((map->rxq_map < qbit_max) && (map->txq_map < qbit_max)) { + i40e_pf_config_irq_link_list(vf, map); + } else { + /* configured queue size excceed limit */ + ret = I40E_ERR_PARAM; + goto send_msg; + } + } - /* Don't care how the TX/RX queue mapping with this vector. - * Link all VF RX queues together. Only did mapping work. - * VF can disable/enable the intr by itself. - */ - i40e_vsi_queues_bind_intr(vf->vsi); send_msg: i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, ret, NULL, 0); @@ -646,12 +825,21 @@ send_msg: static int i40e_pf_host_process_cmd_disable_queues(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_queue_select *q_sel = (struct i40e_virtchnl_queue_select *)msg; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_DISABLE_QUEUES, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen != sizeof(*q_sel)) { ret = I40E_ERR_PARAM; goto send_msg; @@ -669,7 +857,8 @@ send_msg: static int i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_ether_addr_list *addr_list = @@ -678,6 +867,14 @@ i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf, int i; struct ether_addr *mac; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + memset(&filter, 0 , sizeof(struct i40e_mac_filter_info)); if (msg == NULL || msglen <= sizeof(*addr_list)) { @@ -690,8 +887,8 @@ i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf, mac = (struct ether_addr *)(addr_list->list[i].addr); (void)rte_memcpy(&filter.mac_addr, mac, ETHER_ADDR_LEN); filter.filter_type = RTE_MACVLAN_PERFECT_MATCH; - if(!is_valid_assigned_ether_addr(mac) || - i40e_vsi_add_mac(vf->vsi, &filter)) { + if (is_zero_ether_addr(mac) || + i40e_vsi_add_mac(vf->vsi, &filter)) { ret = I40E_ERR_INVALID_MAC_ADDR; goto send_msg; } @@ -707,7 +904,8 @@ send_msg: static int i40e_pf_host_process_cmd_del_ether_address(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_ether_addr_list *addr_list = @@ -715,6 +913,14 @@ i40e_pf_host_process_cmd_del_ether_address(struct i40e_pf_vf *vf, int i; struct ether_addr *mac; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen <= sizeof(*addr_list)) { PMD_DRV_LOG(ERR, "delete_ether_address argument too short"); ret = I40E_ERR_PARAM; @@ -723,7 +929,7 @@ i40e_pf_host_process_cmd_del_ether_address(struct i40e_pf_vf *vf, for (i = 0; i < addr_list->num_elements; i++) { mac = (struct ether_addr *)(addr_list->list[i].addr); - if(!is_valid_assigned_ether_addr(mac) || + if(is_zero_ether_addr(mac) || i40e_vsi_delete_mac(vf->vsi, mac)) { ret = I40E_ERR_INVALID_MAC_ADDR; goto send_msg; @@ -739,7 +945,8 @@ send_msg: static int i40e_pf_host_process_cmd_add_vlan(struct i40e_pf_vf *vf, - uint8_t *msg, uint16_t msglen) + uint8_t *msg, uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_vlan_filter_list *vlan_filter_list = @@ -747,6 +954,14 @@ i40e_pf_host_process_cmd_add_vlan(struct i40e_pf_vf *vf, int i; uint16_t *vid; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_ADD_VLAN, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen <= sizeof(*vlan_filter_list)) { PMD_DRV_LOG(ERR, "add_vlan argument too short"); ret = I40E_ERR_PARAM; @@ -771,7 +986,8 @@ send_msg: static int i40e_pf_host_process_cmd_del_vlan(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_vlan_filter_list *vlan_filter_list = @@ -779,6 +995,14 @@ i40e_pf_host_process_cmd_del_vlan(struct i40e_pf_vf *vf, int i; uint16_t *vid; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_DEL_VLAN, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen <= sizeof(*vlan_filter_list)) { PMD_DRV_LOG(ERR, "delete_vlan argument too short"); ret = I40E_ERR_PARAM; @@ -803,7 +1027,8 @@ static int i40e_pf_host_process_cmd_config_promisc_mode( struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_promisc_info *promisc = @@ -811,6 +1036,14 @@ i40e_pf_host_process_cmd_config_promisc_mode( struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); bool unicast = FALSE, multicast = FALSE; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen != sizeof(*promisc)) { ret = I40E_ERR_PARAM; goto send_msg; @@ -836,39 +1069,43 @@ send_msg: } static int -i40e_pf_host_process_cmd_get_stats(struct i40e_pf_vf *vf) +i40e_pf_host_process_cmd_get_stats(struct i40e_pf_vf *vf, bool b_op) { i40e_update_vsi_stats(vf->vsi); - i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS, - I40E_SUCCESS, (uint8_t *)&vf->vsi->eth_stats, - sizeof(vf->vsi->eth_stats)); + if (b_op) + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS, + I40E_SUCCESS, + (uint8_t *)&vf->vsi->eth_stats, + sizeof(vf->vsi->eth_stats)); + else + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS, + I40E_NOT_SUPPORTED, + (uint8_t *)&vf->vsi->eth_stats, + sizeof(vf->vsi->eth_stats)); return I40E_SUCCESS; } -static void -i40e_pf_host_process_cmd_get_link_status(struct i40e_pf_vf *vf) -{ - struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(vf->pf->main_vsi); - - /* Update link status first to acquire latest link change */ - i40e_dev_link_update(dev, 1); - i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_LINK_STAT, - I40E_SUCCESS, (uint8_t *)&dev->data->dev_link, - sizeof(struct rte_eth_link)); -} - static int i40e_pf_host_process_cmd_cfg_vlan_offload( struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_vlan_offload_info *offload = (struct i40e_virtchnl_vlan_offload_info *)msg; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen != sizeof(*offload)) { ret = I40E_ERR_PARAM; goto send_msg; @@ -889,12 +1126,21 @@ send_msg: static int i40e_pf_host_process_cmd_cfg_pvid(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_pvid_info *tpid_info = (struct i40e_virtchnl_pvid_info *)msg; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CFG_VLAN_PVID, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen != sizeof(*tpid_info)) { ret = I40E_ERR_PARAM; goto send_msg; @@ -909,6 +1155,20 @@ send_msg: return ret; } +void +i40e_notify_vf_link_status(struct rte_eth_dev *dev, struct i40e_pf_vf *vf) +{ + struct i40e_virtchnl_pf_event event; + + event.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE; + event.event_data.link_event.link_status = + dev->data->dev_link.link_status; + event.event_data.link_event.link_speed = + (enum i40e_aq_link_speed)dev->data->dev_link.link_speed; + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_EVENT, + I40E_SUCCESS, (uint8_t *)&event, sizeof(event)); +} + void i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, uint16_t abs_vf_id, uint32_t opcode, @@ -921,6 +1181,8 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, struct i40e_pf_vf *vf; /* AdminQ will pass absolute VF id, transfer to internal vf id */ uint16_t vf_id = abs_vf_id - hw->func_caps.vf_base_id; + struct rte_pmd_i40e_mb_event_param cb_param; + bool b_op = TRUE; if (vf_id > pf->vf_num - 1 || !pf->vfs) { PMD_DRV_LOG(ERR, "invalid argument"); @@ -935,10 +1197,35 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, return; } + /** + * initialise structure to send to user application + * will return response from user in retval field + */ + cb_param.retval = RTE_PMD_I40E_MB_EVENT_PROCEED; + cb_param.vfid = vf_id; + cb_param.msg_type = opcode; + cb_param.msg = (void *)msg; + cb_param.msglen = msglen; + + /** + * Ask user application if we're allowed to perform those functions. + * If we get cb_param.retval == RTE_PMD_I40E_MB_EVENT_PROCEED, + * then business as usual. + * If RTE_PMD_I40E_MB_EVENT_NOOP_ACK or RTE_PMD_I40E_MB_EVENT_NOOP_NACK, + * do nothing and send not_supported to VF. As PF must send a response + * to VF and ACK/NACK is not defined. + */ + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_VF_MBOX, &cb_param); + if (cb_param.retval != RTE_PMD_I40E_MB_EVENT_PROCEED) { + PMD_DRV_LOG(WARNING, "VF to PF message(%d) is not permitted!", + opcode); + b_op = FALSE; + } + switch (opcode) { case I40E_VIRTCHNL_OP_VERSION : PMD_DRV_LOG(INFO, "OP_VERSION received"); - i40e_pf_host_process_cmd_version(vf); + i40e_pf_host_process_cmd_version(vf, b_op); break; case I40E_VIRTCHNL_OP_RESET_VF : PMD_DRV_LOG(INFO, "OP_RESET_VF received"); @@ -946,64 +1233,72 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, break; case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: PMD_DRV_LOG(INFO, "OP_GET_VF_RESOURCES received"); - i40e_pf_host_process_cmd_get_vf_resource(vf); + i40e_pf_host_process_cmd_get_vf_resource(vf, b_op); break; case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES: PMD_DRV_LOG(INFO, "OP_CONFIG_VSI_QUEUES received"); - i40e_pf_host_process_cmd_config_vsi_queues(vf, msg, msglen); + i40e_pf_host_process_cmd_config_vsi_queues(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT: PMD_DRV_LOG(INFO, "OP_CONFIG_VSI_QUEUES_EXT received"); i40e_pf_host_process_cmd_config_vsi_queues_ext(vf, msg, - msglen); + msglen, b_op); break; case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: PMD_DRV_LOG(INFO, "OP_CONFIG_IRQ_MAP received"); - i40e_pf_host_process_cmd_config_irq_map(vf, msg, msglen); + i40e_pf_host_process_cmd_config_irq_map(vf, msg, msglen, b_op); break; case I40E_VIRTCHNL_OP_ENABLE_QUEUES: PMD_DRV_LOG(INFO, "OP_ENABLE_QUEUES received"); - i40e_pf_host_process_cmd_enable_queues(vf, msg, msglen); + if (b_op) { + i40e_pf_host_process_cmd_enable_queues(vf, msg, msglen); + i40e_notify_vf_link_status(dev, vf); + } else { + i40e_pf_host_send_msg_to_vf( + vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, + I40E_NOT_SUPPORTED, NULL, 0); + } break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: PMD_DRV_LOG(INFO, "OP_DISABLE_QUEUE received"); - i40e_pf_host_process_cmd_disable_queues(vf, msg, msglen); + i40e_pf_host_process_cmd_disable_queues(vf, msg, msglen, b_op); break; case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: PMD_DRV_LOG(INFO, "OP_ADD_ETHER_ADDRESS received"); - i40e_pf_host_process_cmd_add_ether_address(vf, msg, msglen); + i40e_pf_host_process_cmd_add_ether_address(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: PMD_DRV_LOG(INFO, "OP_DEL_ETHER_ADDRESS received"); - i40e_pf_host_process_cmd_del_ether_address(vf, msg, msglen); + i40e_pf_host_process_cmd_del_ether_address(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_ADD_VLAN: PMD_DRV_LOG(INFO, "OP_ADD_VLAN received"); - i40e_pf_host_process_cmd_add_vlan(vf, msg, msglen); + i40e_pf_host_process_cmd_add_vlan(vf, msg, msglen, b_op); break; case I40E_VIRTCHNL_OP_DEL_VLAN: PMD_DRV_LOG(INFO, "OP_DEL_VLAN received"); - i40e_pf_host_process_cmd_del_vlan(vf, msg, msglen); + i40e_pf_host_process_cmd_del_vlan(vf, msg, msglen, b_op); break; case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: PMD_DRV_LOG(INFO, "OP_CONFIG_PROMISCUOUS_MODE received"); - i40e_pf_host_process_cmd_config_promisc_mode(vf, msg, msglen); + i40e_pf_host_process_cmd_config_promisc_mode(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_GET_STATS: PMD_DRV_LOG(INFO, "OP_GET_STATS received"); - i40e_pf_host_process_cmd_get_stats(vf); - break; - case I40E_VIRTCHNL_OP_GET_LINK_STAT: - PMD_DRV_LOG(INFO, "OP_GET_LINK_STAT received"); - i40e_pf_host_process_cmd_get_link_status(vf); + i40e_pf_host_process_cmd_get_stats(vf, b_op); break; case I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD: PMD_DRV_LOG(INFO, "OP_CFG_VLAN_OFFLOAD received"); - i40e_pf_host_process_cmd_cfg_vlan_offload(vf, msg, msglen); + i40e_pf_host_process_cmd_cfg_vlan_offload(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_CFG_VLAN_PVID: PMD_DRV_LOG(INFO, "OP_CFG_VLAN_PVID received"); - i40e_pf_host_process_cmd_cfg_pvid(vf, msg, msglen); + i40e_pf_host_process_cmd_cfg_pvid(vf, msg, msglen, b_op); break; /* Don't add command supported below, which will * return an error code. diff --git a/src/dpdk/drivers/net/i40e/i40e_pf.h b/src/dpdk/drivers/net/i40e/i40e_pf.h index 9c01829a..b4c22876 100644 --- a/src/dpdk/drivers/net/i40e/i40e_pf.h +++ b/src/dpdk/drivers/net/i40e/i40e_pf.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,20 +48,14 @@ #define I40E_DPDK_OFFSET 0x100 -enum i40e_pf_vfr_state { - I40E_PF_VFR_INPROGRESS = 0, - I40E_PF_VFR_COMPLETED = 1, -}; - /* DPDK pf driver specific command to VF */ enum i40e_virtchnl_ops_dpdk { /* * Keep some gap between Linux PF commands and * DPDK PF extended commands. */ - I40E_VIRTCHNL_OP_GET_LINK_STAT = I40E_VIRTCHNL_OP_VERSION + + I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD = I40E_VIRTCHNL_OP_VERSION + I40E_DPDK_OFFSET, - I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD, I40E_VIRTCHNL_OP_CFG_VLAN_PVID, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT, }; @@ -124,5 +118,7 @@ void i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, uint8_t *msg, uint16_t msglen); int i40e_pf_host_init(struct rte_eth_dev *dev); int i40e_pf_host_uninit(struct rte_eth_dev *dev); +void i40e_notify_vf_link_status(struct rte_eth_dev *dev, + struct i40e_pf_vf *vf); #endif /* _I40E_PF_H_ */ diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx.c b/src/dpdk/drivers/net/i40e/i40e_rxtx.c index 19b431c3..608685fa 100644 --- a/src/dpdk/drivers/net/i40e/i40e_rxtx.c +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG | \ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -138,12 +151,21 @@ i40e_rxd_error_to_pkt_flags(uint64_t qword) uint64_t error_bits = (qword >> I40E_RXD_QW1_ERROR_SHIFT); #define I40E_RX_ERR_BITS 0x3f - if (likely((error_bits & I40E_RX_ERR_BITS) == 0)) + if (likely((error_bits & I40E_RX_ERR_BITS) == 0)) { + flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD); return flags; + } + if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))) flags |= PKT_RX_IP_CKSUM_BAD; + else + flags |= PKT_RX_IP_CKSUM_GOOD; + if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) flags |= PKT_RX_L4_CKSUM_BAD; + else + flags |= PKT_RX_L4_CKSUM_GOOD; + if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))) flags |= PKT_RX_EIP_CKSUM_BAD; @@ -174,569 +196,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword) } #endif -/* For each value it means, datasheet of hardware can tell more details - * - * @note: fix i40e_dev_supported_ptypes_get() if any change here. - */ -static inline uint32_t -i40e_rxd_pkt_type_mapping(uint8_t ptype) -{ - static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { - /* L2 types */ - /* [0] reserved */ - [1] = RTE_PTYPE_L2_ETHER, - [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, - /* [3] - [5] reserved */ - [6] = RTE_PTYPE_L2_ETHER_LLDP, - /* [7] - [10] reserved */ - [11] = RTE_PTYPE_L2_ETHER_ARP, - /* [12] - [21] reserved */ - - /* Non tunneled IPv4 */ - [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - /* [25] reserved */ - [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* IPv4 --> IPv4 */ - [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [32] reserved */ - [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> IPv6 */ - [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [39] reserved */ - [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN */ - [43] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT, - - /* IPv4 --> GRE/Teredo/VXLAN --> IPv4 */ - [44] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [47] reserved */ - [48] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> IPv6 */ - [51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [53] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [54] reserved */ - [55] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [56] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [57] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC */ - [58] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ - [59] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [62] reserved */ - [63] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [65] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ - [66] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [67] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [68] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [69] reserved */ - [70] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [71] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [72] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN */ - [73] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ - [74] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [75] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [76] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [77] reserved */ - [78] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [79] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ - [81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [84] reserved */ - [85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [87] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* Non tunneled IPv6 */ - [88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [90] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - /* [91] reserved */ - [92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* IPv6 --> IPv4 */ - [95] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [96] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [97] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [98] reserved */ - [99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [100] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [101] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> IPv6 */ - [102] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [103] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [104] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [105] reserved */ - [106] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [107] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [108] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN */ - [109] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT, - - /* IPv6 --> GRE/Teredo/VXLAN --> IPv4 */ - [110] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [111] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [112] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [113] reserved */ - [114] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [115] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [116] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> IPv6 */ - [117] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [118] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [119] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [120] reserved */ - [121] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [122] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [123] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC */ - [124] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ - [125] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [126] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [127] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [128] reserved */ - [129] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [130] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [131] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ - [132] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [133] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [134] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [135] reserved */ - [136] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [137] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [138] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN */ - [139] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ - [140] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [141] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [142] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [143] reserved */ - [144] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [145] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [146] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ - [147] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [148] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [149] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [150] reserved */ - [151] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [152] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [153] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* L2 NSH packet type */ - [154] = RTE_PTYPE_L2_ETHER_NSH, - [155] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [156] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [157] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - [158] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [159] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [160] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - [161] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [162] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [163] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - [164] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [165] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [166] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* All others reserved */ - }; - - return type_table[ptype]; -} - #define I40E_RX_DESC_EXT_STATUS_FLEXBH_MASK 0x03 #define I40E_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01 #define I40E_RX_DESC_EXT_STATUS_FLEXBH_FLEX 0x02 @@ -779,33 +238,65 @@ i40e_rxd_build_fdir(volatile union i40e_rx_desc *rxdp, struct rte_mbuf *mb) #endif return flags; } + +static inline void +i40e_parse_tunneling_params(uint64_t ol_flags, + union i40e_tx_offload tx_offload, + uint32_t *cd_tunneling) +{ + /* EIPT: External (outer) IP header type */ + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; + else if (ol_flags & PKT_TX_OUTER_IPV4) + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + else if (ol_flags & PKT_TX_OUTER_IPV6) + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; + + /* EIPLEN: External (outer) IP header length, in DWords */ + *cd_tunneling |= (tx_offload.outer_l3_len >> 2) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* L4TUNT: L4 Tunneling Type */ + switch (ol_flags & PKT_TX_TUNNEL_MASK) { + case PKT_TX_TUNNEL_IPIP: + /* for non UDP / GRE tunneling, set to 00b */ + break; + case PKT_TX_TUNNEL_VXLAN: + case PKT_TX_TUNNEL_GENEVE: + *cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING; + break; + case PKT_TX_TUNNEL_GRE: + *cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING; + break; + default: + PMD_TX_LOG(ERR, "Tunnel type not supported"); + return; + } + + /* L4TUNLEN: L4 Tunneling Length, in Words + * + * We depend on app to set rte_mbuf.l2_len correctly. + * For IP in GRE it should be set to the length of the GRE + * header; + * for MAC in GRE or MAC in UDP it should be set to the length + * of the GRE or UDP headers plus the inner MAC up to including + * its last Ethertype. + */ + *cd_tunneling |= (tx_offload.l2_len >> 1) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; +} + static inline void i40e_txd_enable_checksum(uint64_t ol_flags, uint32_t *td_cmd, uint32_t *td_offset, - union i40e_tx_offload tx_offload, - uint32_t *cd_tunneling) + union i40e_tx_offload tx_offload) { - /* UDP tunneling packet TX checksum offload */ - if (ol_flags & PKT_TX_OUTER_IP_CKSUM) { - + /* Set MACLEN */ + if (ol_flags & PKT_TX_TUNNEL_MASK) *td_offset |= (tx_offload.outer_l2_len >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; - - if (ol_flags & PKT_TX_OUTER_IP_CKSUM) - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - else if (ol_flags & PKT_TX_OUTER_IPV4) - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - else if (ol_flags & PKT_TX_OUTER_IPV6) - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; - - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (tx_offload.outer_l3_len >> 2) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | - (tx_offload.l2_len >> 1) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - - } else + else *td_offset |= (tx_offload.l2_len >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; @@ -934,15 +425,6 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq) "rxq->rx_free_thresh=%d", rxq->nb_rx_desc, rxq->rx_free_thresh); ret = -EINVAL; - } else if (!(rxq->nb_rx_desc < (I40E_MAX_RING_DESC - - RTE_PMD_I40E_RX_MAX_BURST))) { - PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: " - "rxq->nb_rx_desc=%d, " - "I40E_MAX_RING_DESC=%d, " - "RTE_PMD_I40E_RX_MAX_BURST=%d", - rxq->nb_rx_desc, I40E_MAX_RING_DESC, - RTE_PMD_I40E_RX_MAX_BURST); - ret = -EINVAL; } #else ret = -EINVAL; @@ -994,6 +476,8 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq) I40E_RXD_QW1_STATUS_SHIFT; } + rte_smp_rmb(); + /* Compute how many status bits were set */ for (j = 0, nb_dd = 0; j < I40E_LOOK_AHEAD; j++) nb_dd += s[j] & (1 << I40E_RX_DESC_STATUS_DD_SHIFT); @@ -1104,7 +588,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq) /* Update rx tail regsiter */ rte_wmb(); - I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->rx_free_trigger); + I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rxq->rx_free_trigger); rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_trigger + rxq->rx_free_thresh); @@ -1484,7 +968,8 @@ i40e_calc_context_desc(uint64_t flags) { static uint64_t mask = PKT_TX_OUTER_IP_CKSUM | PKT_TX_TCP_SEG | - PKT_TX_QINQ_PKT; + PKT_TX_QINQ_PKT | + PKT_TX_TUNNEL_MASK; #ifdef RTE_LIBRTE_IEEE1588 mask |= PKT_TX_IEEE1588_TMST; @@ -1506,7 +991,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload) } /** - * in case of tunneling packet, the outer_l2_len and + * in case of non tunneling packet, the outer_l2_len and * outer_l3_len must be 0. */ hdr_len = tx_offload.outer_l2_len + @@ -1623,12 +1108,15 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) /* Always enable CRC offload insertion */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - /* Enable checksum offloading */ + /* Fill in tunneling parameters if necessary */ cd_tunneling_params = 0; - if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK) { - i40e_txd_enable_checksum(ol_flags, &td_cmd, &td_offset, - tx_offload, &cd_tunneling_params); - } + if (ol_flags & PKT_TX_TUNNEL_MASK) + i40e_parse_tunneling_params(ol_flags, tx_offload, + &cd_tunneling_params); + /* Enable checksum offloading */ + if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK) + i40e_txd_enable_checksum(ol_flags, &td_cmd, + &td_offset, tx_offload); if (nb_ctx) { /* Setup TX context descriptor if required */ @@ -1747,7 +1235,7 @@ end_of_tx: (unsigned) txq->port_id, (unsigned) txq->queue_id, (unsigned) tx_id, (unsigned) nb_tx); - I40E_PCI_REG_WRITE(txq->qtx_tail, tx_id); + I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id); txq->tx_tail = tx_id; return nb_tx; @@ -1899,7 +1387,7 @@ tx_xmit_pkts(struct i40e_tx_queue *txq, /* Update the tx tail register */ rte_wmb(); - I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); + I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, txq->tx_tail); return nb_pkts; } @@ -1930,6 +1418,63 @@ i40e_xmit_pkts_simple(void *tx_queue, return nb_tx; } +/********************************************************************* + * + * TX prep functions + * + **********************************************************************/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** + * m->nb_segs is uint8_t, so nb_segs is always less than + * I40E_TX_MAX_SEG. + * We check only a condition for nb_segs > I40E_TX_MAX_MTU_SEG. + */ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered + * malicious + */ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2136,7 +1681,9 @@ i40e_dev_supported_ptypes_get(struct rte_eth_dev *dev) #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC dev->rx_pkt_burst == i40e_recv_pkts_bulk_alloc || #endif - dev->rx_pkt_burst == i40e_recv_scattered_pkts) + dev->rx_pkt_burst == i40e_recv_scattered_pkts || + dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec || + dev->rx_pkt_burst == i40e_recv_pkts_vec) return ptypes; return NULL; } @@ -2161,21 +1708,12 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t base, bsf, tc_mapping; int use_def_burst_func = 1; -#define TREX_PATCH_LOW_LATENCY -#ifdef TREX_PATCH_LOW_LATENCY - int is_vf = 0; -#endif - if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); vsi = &vf->vsi; -#ifdef TREX_PATCH_LOW_LATENCY - is_vf = 1; -#endif - } else { + } else vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx); - } if (vsi == NULL) { PMD_DRV_LOG(ERR, "VSI not available or queue " @@ -2224,8 +1762,19 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, rxq->rx_deferred_start = rx_conf->rx_deferred_start; /* Allocate the maximun number of RX ring hardware descriptor. */ - ring_size = sizeof(union i40e_rx_desc) * I40E_MAX_RING_DESC; - ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN); + len = I40E_MAX_RING_DESC; + +#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC + /** + * Allocating a little more memory because vectorized/bulk_alloc Rx + * functions doesn't check boundaries each time. + */ + len += RTE_PMD_I40E_RX_MAX_BURST; +#endif + + ring_size = RTE_ALIGN(len * sizeof(union i40e_rx_desc), + I40E_DMA_MEM_ALIGN); + rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, ring_size, I40E_RING_BASE_ALIGN, socket_id); if (!rz) { @@ -2280,12 +1829,6 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, ad->rx_bulk_alloc_allowed = false; } -#ifdef TREX_PATCH_LOW_LATENCY - if (! is_vf) - rxq->dcb_tc =0; - else // The entire for below is in the else -#endif - for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->enabled_tc & (1 << i))) continue; @@ -2393,25 +1936,12 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_rs_thresh, tx_free_thresh; uint16_t i, base, bsf, tc_mapping; -#ifdef TREX_PATCH_LOW_LATENCY - u8 low_latency = 0; - int is_vf = 1; -#endif - if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); vsi = &vf->vsi; - } else { + } else vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx); -#ifdef TREX_PATCH_LOW_LATENCY - if (queue_idx == pf->dev_data->nb_tx_queues-1) { - low_latency = 1; - } - is_vf = 0; -#endif - } - if (vsi == NULL) { PMD_DRV_LOG(ERR, "VSI is NULL, or queue index (%u) " @@ -2461,8 +1991,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, return I40E_ERR_PARAM; } if (tx_free_thresh >= (nb_desc - 3)) { - PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the " - "tx_free_thresh must be less than the " + PMD_INIT_LOG(ERR, "tx_free_thresh must be less than the " "number of TX descriptors minus 3. " "(tx_free_thresh=%u port=%d queue=%d)", (unsigned int)tx_free_thresh, @@ -2567,15 +2096,6 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, /* Use a simple TX queue without offloads or multi segs if possible */ i40e_set_tx_function_flag(dev, txq); -#ifdef TREX_PATCH_LOW_LATENCY - if (! is_vf) { - if (low_latency) { - txq->dcb_tc=1; - }else{ - txq->dcb_tc=0; - } - } else // The entire for below is in the else -#endif for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->enabled_tc & (1 << i))) continue; @@ -2985,11 +2505,15 @@ i40e_dev_clear_queues(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); for (i = 0; i < dev->data->nb_tx_queues; i++) { + if (!dev->data->tx_queues[i]) + continue; i40e_tx_queue_release_mbufs(dev->data->tx_queues[i]); i40e_reset_tx_queue(dev->data->tx_queues[i]); } for (i = 0; i < dev->data->nb_rx_queues; i++) { + if (!dev->data->rx_queues[i]) + continue; i40e_rx_queue_release_mbufs(dev->data->rx_queues[i]); i40e_reset_rx_queue(dev->data->rx_queues[i]); } @@ -3003,12 +2527,16 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); for (i = 0; i < dev->data->nb_rx_queues; i++) { + if (!dev->data->rx_queues[i]) + continue; i40e_dev_rx_queue_release(dev->data->rx_queues[i]); dev->data->rx_queues[i] = NULL; } dev->data->nb_rx_queues = 0; for (i = 0; i < dev->data->nb_tx_queues; i++) { + if (!dev->data->tx_queues[i]) + continue; i40e_dev_tx_queue_release(dev->data->tx_queues[i]); dev->data->tx_queues[i] = NULL; } @@ -3191,7 +2719,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev) struct i40e_rx_queue *rxq = dev->data->rx_queues[i]; - if (i40e_rxq_vec_setup(rxq)) { + if (rxq && i40e_rxq_vec_setup(rxq)) { ad->rx_vec_allowed = false; break; } @@ -3253,7 +2781,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev) for (i = 0; i < dev->data->nb_rx_queues; i++) { struct i40e_rx_queue *rxq = dev->data->rx_queues[i]; - rxq->rx_using_sse = rx_using_sse; + if (rxq) + rxq->rx_using_sse = rx_using_sse; } } } @@ -3292,7 +2821,7 @@ i40e_set_tx_function(struct rte_eth_dev *dev) struct i40e_tx_queue *txq = dev->data->tx_queues[i]; - if (i40e_txq_vec_setup(txq)) { + if (txq && i40e_txq_vec_setup(txq)) { ad->tx_vec_allowed = false; break; } @@ -3308,9 +2837,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts_simple; } + dev->tx_pkt_prepare = NULL; } else { PMD_INIT_LOG(DEBUG, "Xmit tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prepare = i40e_prep_pkts; } } diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx.h b/src/dpdk/drivers/net/i40e/i40e_rxtx.h index 98179f00..9df8a56f 100644 --- a/src/dpdk/drivers/net/i40e/i40e_rxtx.h +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx.h @@ -63,6 +63,12 @@ #define I40E_MIN_RING_DESC 64 #define I40E_MAX_RING_DESC 4096 +#define I40E_MIN_TSO_MSS 256 +#define I40E_MAX_TSO_MSS 9674 + +#define I40E_TX_MAX_SEG UINT8_MAX +#define I40E_TX_MAX_MTU_SEG 8 + #undef container_of #define container_of(ptr, type, member) ({ \ typeof(((type *)0)->member)(*__mptr) = (ptr); \ @@ -223,6 +229,8 @@ uint16_t i40e_recv_scattered_pkts(void *rx_queue, uint16_t i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t i40e_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); int i40e_tx_queue_init(struct i40e_tx_queue *txq); int i40e_rx_queue_init(struct i40e_rx_queue *rxq); void i40e_free_tx_resources(struct i40e_tx_queue *txq); @@ -255,4 +263,567 @@ void i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq); void i40e_set_tx_function(struct rte_eth_dev *dev); +/* For each value it means, datasheet of hardware can tell more details + * + * @note: fix i40e_dev_supported_ptypes_get() if any change here. + */ +static inline uint32_t +i40e_rxd_pkt_type_mapping(uint8_t ptype) +{ + static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { + /* L2 types */ + /* [0] reserved */ + [1] = RTE_PTYPE_L2_ETHER, + [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, + /* [3] - [5] reserved */ + [6] = RTE_PTYPE_L2_ETHER_LLDP, + /* [7] - [10] reserved */ + [11] = RTE_PTYPE_L2_ETHER_ARP, + /* [12] - [21] reserved */ + + /* Non tunneled IPv4 */ + [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG, + [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG, + [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP, + /* [25] reserved */ + [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP, + [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_SCTP, + [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_ICMP, + + /* IPv4 --> IPv4 */ + [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [32] reserved */ + [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> IPv6 */ + [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [39] reserved */ + [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN */ + [43] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT, + + /* IPv4 --> GRE/Teredo/VXLAN --> IPv4 */ + [44] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [47] reserved */ + [48] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> IPv6 */ + [51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [53] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [54] reserved */ + [55] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [56] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [57] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC */ + [58] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ + [59] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [62] reserved */ + [63] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [65] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ + [66] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [67] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [68] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [69] reserved */ + [70] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [71] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [72] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN */ + [73] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ + [74] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [75] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [76] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [77] reserved */ + [78] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [79] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ + [81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [84] reserved */ + [85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [87] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* Non tunneled IPv6 */ + [88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG, + [89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG, + [90] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP, + /* [91] reserved */ + [92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP, + [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_SCTP, + [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_ICMP, + + /* IPv6 --> IPv4 */ + [95] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [96] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [97] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [98] reserved */ + [99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [100] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [101] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> IPv6 */ + [102] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [103] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [104] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [105] reserved */ + [106] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [107] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [108] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN */ + [109] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT, + + /* IPv6 --> GRE/Teredo/VXLAN --> IPv4 */ + [110] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [111] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [112] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [113] reserved */ + [114] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [115] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [116] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> IPv6 */ + [117] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [118] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [119] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [120] reserved */ + [121] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [122] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [123] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC */ + [124] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ + [125] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [126] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [127] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [128] reserved */ + [129] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [130] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [131] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ + [132] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [133] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [134] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [135] reserved */ + [136] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [137] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [138] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN */ + [139] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ + [140] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [141] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [142] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [143] reserved */ + [144] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [145] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [146] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ + [147] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [148] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [149] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [150] reserved */ + [151] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [152] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [153] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* L2 NSH packet type */ + [154] = RTE_PTYPE_L2_ETHER_NSH, + [155] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG, + [156] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG, + [157] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP, + [158] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP, + [159] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_SCTP, + [160] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_ICMP, + [161] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG, + [162] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG, + [163] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP, + [164] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP, + [165] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_SCTP, + [166] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_ICMP, + + /* All others reserved */ + }; + + return type_table[ptype]; +} + #endif /* _I40E_RXTX_H_ */ diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec.c b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec.c deleted file mode 100644 index 51fb282a..00000000 --- a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec.c +++ /dev/null @@ -1,761 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#include "base/i40e_prototype.h" -#include "base/i40e_type.h" -#include "i40e_ethdev.h" -#include "i40e_rxtx.h" - -#include - -#ifndef __INTEL_COMPILER -#pragma GCC diagnostic ignored "-Wcast-qual" -#endif - -static inline void -i40e_rxq_rearm(struct i40e_rx_queue *rxq) -{ - int i; - uint16_t rx_id; - volatile union i40e_rx_desc *rxdp; - struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; - struct rte_mbuf *mb0, *mb1; - __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, - RTE_PKTMBUF_HEADROOM); - __m128i dma_addr0, dma_addr1; - - rxdp = rxq->rx_ring + rxq->rxrearm_start; - - /* Pull 'n' more MBUFs into the software ring */ - if (rte_mempool_get_bulk(rxq->mp, - (void *)rxep, - RTE_I40E_RXQ_REARM_THRESH) < 0) { - if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= - rxq->nb_rx_desc) { - dma_addr0 = _mm_setzero_si128(); - for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { - rxep[i].mbuf = &rxq->fake_mbuf; - _mm_store_si128((__m128i *)&rxdp[i].read, - dma_addr0); - } - } - rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - RTE_I40E_RXQ_REARM_THRESH; - return; - } - - /* Initialize the mbufs in vector, process 2 mbufs in one loop */ - for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { - __m128i vaddr0, vaddr1; - uintptr_t p0, p1; - - mb0 = rxep[0].mbuf; - mb1 = rxep[1].mbuf; - - /* Flush mbuf with pkt template. - * Data to be rearmed is 6 bytes long. - * Though, RX will overwrite ol_flags that are coming next - * anyway. So overwrite whole 8 bytes with one load: - * 6 bytes of rearm_data plus first 2 bytes of ol_flags. - */ - p0 = (uintptr_t)&mb0->rearm_data; - *(uint64_t *)p0 = rxq->mbuf_initializer; - p1 = (uintptr_t)&mb1->rearm_data; - *(uint64_t *)p1 = rxq->mbuf_initializer; - - /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ - vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); - vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); - - /* convert pa to dma_addr hdr/data */ - dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); - dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); - - /* add headroom to pa values */ - dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); - dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room); - - /* flush desc with pa dma_addr */ - _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); - _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); - } - - rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; - if (rxq->rxrearm_start >= rxq->nb_rx_desc) - rxq->rxrearm_start = 0; - - rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; - - rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? - (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); - - /* Update the tail pointer on the NIC */ - I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); -} - -/* Handling the offload flags (olflags) field takes computation - * time when receiving packets. Therefore we provide a flag to disable - * the processing of the olflags field when they are not needed. This - * gives improved performance, at the cost of losing the offload info - * in the received packet - */ -#ifdef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE - -static inline void -desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) -{ - __m128i vlan0, vlan1, rss; - union { - uint16_t e[4]; - uint64_t dword; - } vol; - - /* mask everything except RSS, flow director and VLAN flags - * bit2 is for VLAN tag, bit11 for flow director indication - * bit13:12 for RSS indication. - */ - const __m128i rss_vlan_msk = _mm_set_epi16( - 0x0000, 0x0000, 0x0000, 0x0000, - 0x3804, 0x3804, 0x3804, 0x3804); - - /* map rss and vlan type to rss hash and vlan flag */ - const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED, - 0, 0, 0, 0); - - const __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0, - 0, 0, 0, 0, - PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0, - 0, 0, PKT_RX_FDIR, 0); - - vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]); - vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]); - vlan0 = _mm_unpacklo_epi32(vlan0, vlan1); - - vlan1 = _mm_and_si128(vlan0, rss_vlan_msk); - vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1); - - rss = _mm_srli_epi16(vlan1, 11); - rss = _mm_shuffle_epi8(rss_flags, rss); - - vlan0 = _mm_or_si128(vlan0, rss); - vol.dword = _mm_cvtsi128_si64(vlan0); - - rx_pkts[0]->ol_flags = vol.e[0]; - rx_pkts[1]->ol_flags = vol.e[1]; - rx_pkts[2]->ol_flags = vol.e[2]; - rx_pkts[3]->ol_flags = vol.e[3]; -} -#else -#define desc_to_olflags_v(desc, rx_pkts) do {} while (0) -#endif - -#define PKTLEN_SHIFT 10 - - /* - * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet - * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST - * numbers of DD bits - */ -static inline uint16_t -_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts, uint8_t *split_packet) -{ - volatile union i40e_rx_desc *rxdp; - struct i40e_rx_entry *sw_ring; - uint16_t nb_pkts_recd; - int pos; - uint64_t var; - __m128i shuf_msk; - - __m128i crc_adjust = _mm_set_epi16( - 0, 0, 0, /* ignore non-length fields */ - -rxq->crc_len, /* sub crc on data_len */ - 0, /* ignore high-16bits of pkt_len */ - -rxq->crc_len, /* sub crc on pkt_len */ - 0, 0 /* ignore pkt_type field */ - ); - __m128i dd_check, eop_check; - - /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */ - nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST); - - /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); - - /* Just the act of getting into the function from the application is - * going to cost about 7 cycles - */ - rxdp = rxq->rx_ring + rxq->rx_tail; - - _mm_prefetch((const void *)rxdp, _MM_HINT_T0); - - /* See if we need to rearm the RX queue - gives the prefetch a bit - * of time to act - */ - if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) - i40e_rxq_rearm(rxq); - - /* Before we start moving massive data around, check to see if - * there is actually a packet available - */ - if (!(rxdp->wb.qword1.status_error_len & - rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT))) - return 0; - - /* 4 packets DD mask */ - dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); - - /* 4 packets EOP mask */ - eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); - - /* mask to shuffle from desc. to mbuf */ - shuf_msk = _mm_set_epi8( - 7, 6, 5, 4, /* octet 4~7, 32bits rss */ - 3, 2, /* octet 2~3, low 16 bits vlan_macip */ - 15, 14, /* octet 15~14, 16 bits data_len */ - 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ - 15, 14, /* octet 15~14, low 16 bits pkt_len */ - 0xFF, 0xFF, /* pkt_type set as unknown */ - 0xFF, 0xFF /*pkt_type set as unknown */ - ); - - /* Cache is empty -> need to scan the buffer rings, but first move - * the next 'n' mbufs into the cache - */ - sw_ring = &rxq->sw_ring[rxq->rx_tail]; - - /* A. load 4 packet in one loop - * [A*. mask out 4 unused dirty field in desc] - * B. copy 4 mbuf point from swring to rx_pkts - * C. calc the number of DD bits among the 4 packets - * [C*. extract the end-of-packet bit, if requested] - * D. fill info. from desc to mbuf - */ - - for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; - pos += RTE_I40E_DESCS_PER_LOOP, - rxdp += RTE_I40E_DESCS_PER_LOOP) { - __m128i descs[RTE_I40E_DESCS_PER_LOOP]; - __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; - __m128i zero, staterr, sterr_tmp1, sterr_tmp2; - __m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */ - - /* B.1 load 1 mbuf point */ - mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); - /* Read desc statuses backwards to avoid race condition */ - /* A.1 load 4 pkts desc */ - descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); - - /* B.2 copy 2 mbuf point into rx_pkts */ - _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); - - /* B.1 load 1 mbuf point */ - mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); - - descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); - /* B.1 load 2 mbuf point */ - descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); - descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); - - /* B.2 copy 2 mbuf point into rx_pkts */ - _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); - - if (split_packet) { - rte_mbuf_prefetch_part2(rx_pkts[pos]); - rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); - rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); - rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); - } - - /* avoid compiler reorder optimization */ - rte_compiler_barrier(); - - /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/ - const __m128i len3 = _mm_slli_epi32(descs[3], PKTLEN_SHIFT); - const __m128i len2 = _mm_slli_epi32(descs[2], PKTLEN_SHIFT); - - /* merge the now-aligned packet length fields back in */ - descs[3] = _mm_blend_epi16(descs[3], len3, 0x80); - descs[2] = _mm_blend_epi16(descs[2], len2, 0x80); - - /* D.1 pkt 3,4 convert format from desc to pktmbuf */ - pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); - pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); - - /* C.1 4=>2 filter staterr info only */ - sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); - /* C.1 4=>2 filter staterr info only */ - sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); - - desc_to_olflags_v(descs, &rx_pkts[pos]); - - /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ - pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); - pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); - - /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/ - const __m128i len1 = _mm_slli_epi32(descs[1], PKTLEN_SHIFT); - const __m128i len0 = _mm_slli_epi32(descs[0], PKTLEN_SHIFT); - - /* merge the now-aligned packet length fields back in */ - descs[1] = _mm_blend_epi16(descs[1], len1, 0x80); - descs[0] = _mm_blend_epi16(descs[0], len0, 0x80); - - /* D.1 pkt 1,2 convert format from desc to pktmbuf */ - pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); - pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); - - /* C.2 get 4 pkts staterr value */ - zero = _mm_xor_si128(dd_check, dd_check); - staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); - - /* D.3 copy final 3,4 data to rx_pkts */ - _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, - pkt_mb4); - _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, - pkt_mb3); - - /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ - pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); - pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); - - /* C* extract and record EOP bit */ - if (split_packet) { - __m128i eop_shuf_mask = _mm_set_epi8( - 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, - 0x04, 0x0C, 0x00, 0x08 - ); - - /* and with mask to extract bits, flipping 1-0 */ - __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); - /* the staterr values are not in order, as the count - * count of dd bits doesn't care. However, for end of - * packet tracking, we do care, so shuffle. This also - * compresses the 32-bit values to 8-bit - */ - eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); - /* store the resulting 32-bit value */ - *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); - split_packet += RTE_I40E_DESCS_PER_LOOP; - - /* zero-out next pointers */ - rx_pkts[pos]->next = NULL; - rx_pkts[pos + 1]->next = NULL; - rx_pkts[pos + 2]->next = NULL; - rx_pkts[pos + 3]->next = NULL; - } - - /* C.3 calc available number of desc */ - staterr = _mm_and_si128(staterr, dd_check); - staterr = _mm_packs_epi32(staterr, zero); - - /* D.3 copy final 1,2 data to rx_pkts */ - _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, - pkt_mb2); - _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, - pkt_mb1); - /* C.4 calc avaialbe number of desc */ - var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); - nb_pkts_recd += var; - if (likely(var != RTE_I40E_DESCS_PER_LOOP)) - break; - } - - /* Update our internal tail pointer */ - rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); - rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); - rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); - - return nb_pkts_recd; -} - - /* - * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet - * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST - * numbers of DD bits - */ -uint16_t -i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) -{ - return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); -} - -static inline uint16_t -reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs, - uint16_t nb_bufs, uint8_t *split_flags) -{ - struct rte_mbuf *pkts[RTE_I40E_VPMD_RX_BURST]; /*finished pkts*/ - struct rte_mbuf *start = rxq->pkt_first_seg; - struct rte_mbuf *end = rxq->pkt_last_seg; - unsigned pkt_idx, buf_idx; - - for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) { - if (end != NULL) { - /* processing a split packet */ - end->next = rx_bufs[buf_idx]; - rx_bufs[buf_idx]->data_len += rxq->crc_len; - - start->nb_segs++; - start->pkt_len += rx_bufs[buf_idx]->data_len; - end = end->next; - - if (!split_flags[buf_idx]) { - /* it's the last packet of the set */ - start->hash = end->hash; - start->ol_flags = end->ol_flags; - /* we need to strip crc for the whole packet */ - start->pkt_len -= rxq->crc_len; - if (end->data_len > rxq->crc_len) { - end->data_len -= rxq->crc_len; - } else { - /* free up last mbuf */ - struct rte_mbuf *secondlast = start; - - while (secondlast->next != end) - secondlast = secondlast->next; - secondlast->data_len -= (rxq->crc_len - - end->data_len); - secondlast->next = NULL; - rte_pktmbuf_free_seg(end); - end = secondlast; - } - pkts[pkt_idx++] = start; - start = end = NULL; - } - } else { - /* not processing a split packet */ - if (!split_flags[buf_idx]) { - /* not a split packet, save and skip */ - pkts[pkt_idx++] = rx_bufs[buf_idx]; - continue; - } - end = start = rx_bufs[buf_idx]; - rx_bufs[buf_idx]->data_len += rxq->crc_len; - rx_bufs[buf_idx]->pkt_len += rxq->crc_len; - } - } - - /* save the partial packet for next time */ - rxq->pkt_first_seg = start; - rxq->pkt_last_seg = end; - memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts))); - return pkt_idx; -} - - /* vPMD receive routine that reassembles scattered packets - * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet - * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST - * numbers of DD bits - */ -uint16_t -i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) -{ - - struct i40e_rx_queue *rxq = rx_queue; - uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; - - /* get some new buffers */ - uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, - split_flags); - if (nb_bufs == 0) - return 0; - - /* happy day case, full burst + no packets to be joined */ - const uint64_t *split_fl64 = (uint64_t *)split_flags; - - if (rxq->pkt_first_seg == NULL && - split_fl64[0] == 0 && split_fl64[1] == 0 && - split_fl64[2] == 0 && split_fl64[3] == 0) - return nb_bufs; - - /* reassemble any packets that need reassembly*/ - unsigned i = 0; - - if (rxq->pkt_first_seg == NULL) { - /* find the first split flag, and only reassemble then*/ - while (i < nb_bufs && !split_flags[i]) - i++; - if (i == nb_bufs) - return nb_bufs; - } - return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, - &split_flags[i]); -} - -static inline void -vtx1(volatile struct i40e_tx_desc *txdp, - struct rte_mbuf *pkt, uint64_t flags) -{ - uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA | - ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) | - ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)); - - __m128i descriptor = _mm_set_epi64x(high_qw, - pkt->buf_physaddr + pkt->data_off); - _mm_store_si128((__m128i *)txdp, descriptor); -} - -static inline void -vtx(volatile struct i40e_tx_desc *txdp, - struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) -{ - int i; - - for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) - vtx1(txdp, *pkt, flags); -} - -static inline int __attribute__((always_inline)) -i40e_tx_free_bufs(struct i40e_tx_queue *txq) -{ - struct i40e_tx_entry *txep; - uint32_t n; - uint32_t i; - int nb_free = 0; - struct rte_mbuf *m, *free[RTE_I40E_TX_MAX_FREE_BUF_SZ]; - - /* check DD bits on threshold descriptor */ - if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & - rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != - rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) - return 0; - - n = txq->tx_rs_thresh; - - /* first buffer to free from S/W ring is at index - * tx_next_dd - (tx_rs_thresh-1) - */ - txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; - m = __rte_pktmbuf_prefree_seg(txep[0].mbuf); - if (likely(m != NULL)) { - free[0] = m; - nb_free = 1; - for (i = 1; i < n; i++) { - m = __rte_pktmbuf_prefree_seg(txep[i].mbuf); - if (likely(m != NULL)) { - if (likely(m->pool == free[0]->pool)) { - free[nb_free++] = m; - } else { - rte_mempool_put_bulk(free[0]->pool, - (void *)free, - nb_free); - free[0] = m; - nb_free = 1; - } - } - } - rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); - } else { - for (i = 1; i < n; i++) { - m = __rte_pktmbuf_prefree_seg(txep[i].mbuf); - if (m != NULL) - rte_mempool_put(m->pool, m); - } - } - - /* buffers were freed, update counters */ - txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh); - txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh); - if (txq->tx_next_dd >= txq->nb_tx_desc) - txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1); - - return txq->tx_rs_thresh; -} - -static inline void __attribute__((always_inline)) -tx_backlog_entry(struct i40e_tx_entry *txep, - struct rte_mbuf **tx_pkts, uint16_t nb_pkts) -{ - int i; - - for (i = 0; i < (int)nb_pkts; ++i) - txep[i].mbuf = tx_pkts[i]; -} - -uint16_t -i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts) -{ - struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue; - volatile struct i40e_tx_desc *txdp; - struct i40e_tx_entry *txep; - uint16_t n, nb_commit, tx_id; - uint64_t flags = I40E_TD_CMD; - uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD; - int i; - - /* cross rx_thresh boundary is not allowed */ - nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); - - if (txq->nb_tx_free < txq->tx_free_thresh) - i40e_tx_free_bufs(txq); - - nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); - if (unlikely(nb_pkts == 0)) - return 0; - - tx_id = txq->tx_tail; - txdp = &txq->tx_ring[tx_id]; - txep = &txq->sw_ring[tx_id]; - - txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); - - n = (uint16_t)(txq->nb_tx_desc - tx_id); - if (nb_commit >= n) { - tx_backlog_entry(txep, tx_pkts, n); - - for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) - vtx1(txdp, *tx_pkts, flags); - - vtx1(txdp, *tx_pkts++, rs); - - nb_commit = (uint16_t)(nb_commit - n); - - tx_id = 0; - txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1); - - /* avoid reach the end of ring */ - txdp = &txq->tx_ring[tx_id]; - txep = &txq->sw_ring[tx_id]; - } - - tx_backlog_entry(txep, tx_pkts, nb_commit); - - vtx(txdp, tx_pkts, nb_commit, flags); - - tx_id = (uint16_t)(tx_id + nb_commit); - if (tx_id > txq->tx_next_rs) { - txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |= - rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) << - I40E_TXD_QW1_CMD_SHIFT); - txq->tx_next_rs = - (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh); - } - - txq->tx_tail = tx_id; - - I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); - - return nb_pkts; -} - -void __attribute__((cold)) -i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq) -{ - const unsigned mask = rxq->nb_rx_desc - 1; - unsigned i; - - if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc) - return; - - /* free all mbufs that are valid in the ring */ - for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask) - rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); - rxq->rxrearm_nb = rxq->nb_rx_desc; - - /* set all entries to NULL */ - memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc); -} - -int __attribute__((cold)) -i40e_rxq_vec_setup(struct i40e_rx_queue *rxq) -{ - uintptr_t p; - struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ - - mb_def.nb_segs = 1; - mb_def.data_off = RTE_PKTMBUF_HEADROOM; - mb_def.port = rxq->port_id; - rte_mbuf_refcnt_set(&mb_def, 1); - - /* prevent compiler reordering: rearm_data covers previous fields */ - rte_compiler_barrier(); - p = (uintptr_t)&mb_def.rearm_data; - rxq->mbuf_initializer = *(uint64_t *)p; - return 0; -} - -int __attribute__((cold)) -i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused *txq) -{ - return 0; -} - -int __attribute__((cold)) -i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) -{ -#ifndef RTE_LIBRTE_IEEE1588 - struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; - struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; - - /* need SSE4.1 support */ - if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1)) - return -1; - -#ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE - /* whithout rx ol_flags, no VP flag report */ - if (rxmode->hw_vlan_strip != 0 || - rxmode->hw_vlan_extend != 0) - return -1; -#endif - - /* no fdir support */ - if (fconf->mode != RTE_FDIR_MODE_NONE) - return -1; - - /* - no csum error report support - * - no header split support - */ - if (rxmode->hw_ip_checksum == 1 || - rxmode->header_split == 1) - return -1; - - return 0; -#else - RTE_SET_USED(dev); - return -1; -#endif -} diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h new file mode 100644 index 00000000..37455589 --- /dev/null +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h @@ -0,0 +1,251 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _I40E_RXTX_VEC_COMMON_H_ +#define _I40E_RXTX_VEC_COMMON_H_ +#include +#include +#include + +#include "i40e_ethdev.h" +#include "i40e_rxtx.h" + +static inline uint16_t +reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs, + uint16_t nb_bufs, uint8_t *split_flags) +{ + struct rte_mbuf *pkts[RTE_I40E_VPMD_RX_BURST]; /*finished pkts*/ + struct rte_mbuf *start = rxq->pkt_first_seg; + struct rte_mbuf *end = rxq->pkt_last_seg; + unsigned pkt_idx, buf_idx; + + for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) { + if (end != NULL) { + /* processing a split packet */ + end->next = rx_bufs[buf_idx]; + rx_bufs[buf_idx]->data_len += rxq->crc_len; + + start->nb_segs++; + start->pkt_len += rx_bufs[buf_idx]->data_len; + end = end->next; + + if (!split_flags[buf_idx]) { + /* it's the last packet of the set */ + start->hash = end->hash; + start->ol_flags = end->ol_flags; + /* we need to strip crc for the whole packet */ + start->pkt_len -= rxq->crc_len; + if (end->data_len > rxq->crc_len) + end->data_len -= rxq->crc_len; + else { + /* free up last mbuf */ + struct rte_mbuf *secondlast = start; + + start->nb_segs--; + while (secondlast->next != end) + secondlast = secondlast->next; + secondlast->data_len -= (rxq->crc_len - + end->data_len); + secondlast->next = NULL; + rte_pktmbuf_free_seg(end); + } + pkts[pkt_idx++] = start; + start = end = NULL; + } + } else { + /* not processing a split packet */ + if (!split_flags[buf_idx]) { + /* not a split packet, save and skip */ + pkts[pkt_idx++] = rx_bufs[buf_idx]; + continue; + } + end = start = rx_bufs[buf_idx]; + rx_bufs[buf_idx]->data_len += rxq->crc_len; + rx_bufs[buf_idx]->pkt_len += rxq->crc_len; + } + } + + /* save the partial packet for next time */ + rxq->pkt_first_seg = start; + rxq->pkt_last_seg = end; + memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts))); + return pkt_idx; +} + +static inline int __attribute__((always_inline)) +i40e_tx_free_bufs(struct i40e_tx_queue *txq) +{ + struct i40e_tx_entry *txep; + uint32_t n; + uint32_t i; + int nb_free = 0; + struct rte_mbuf *m, *free[RTE_I40E_TX_MAX_FREE_BUF_SZ]; + + /* check DD bits on threshold descriptor */ + if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & + rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != + rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) + return 0; + + n = txq->tx_rs_thresh; + + /* first buffer to free from S/W ring is at index + * tx_next_dd - (tx_rs_thresh-1) + */ + txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; + m = __rte_pktmbuf_prefree_seg(txep[0].mbuf); + if (likely(m != NULL)) { + free[0] = m; + nb_free = 1; + for (i = 1; i < n; i++) { + m = __rte_pktmbuf_prefree_seg(txep[i].mbuf); + if (likely(m != NULL)) { + if (likely(m->pool == free[0]->pool)) { + free[nb_free++] = m; + } else { + rte_mempool_put_bulk(free[0]->pool, + (void *)free, + nb_free); + free[0] = m; + nb_free = 1; + } + } + } + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + } else { + for (i = 1; i < n; i++) { + m = __rte_pktmbuf_prefree_seg(txep[i].mbuf); + if (m != NULL) + rte_mempool_put(m->pool, m); + } + } + + /* buffers were freed, update counters */ + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh); + txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh); + if (txq->tx_next_dd >= txq->nb_tx_desc) + txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1); + + return txq->tx_rs_thresh; +} + +static inline void __attribute__((always_inline)) +tx_backlog_entry(struct i40e_tx_entry *txep, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i; + + for (i = 0; i < (int)nb_pkts; ++i) + txep[i].mbuf = tx_pkts[i]; +} + +static inline void +_i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq) +{ + const unsigned mask = rxq->nb_rx_desc - 1; + unsigned i; + + if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc) + return; + + /* free all mbufs that are valid in the ring */ + if (rxq->rxrearm_nb == 0) { + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } else { + for (i = rxq->rx_tail; + i != rxq->rxrearm_start; + i = (i + 1) & mask) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } + + rxq->rxrearm_nb = rxq->nb_rx_desc; + + /* set all entries to NULL */ + memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc); +} + +static inline int +i40e_rxq_vec_setup_default(struct i40e_rx_queue *rxq) +{ + uintptr_t p; + struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ + + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; + mb_def.port = rxq->port_id; + rte_mbuf_refcnt_set(&mb_def, 1); + + /* prevent compiler reordering: rearm_data covers previous fields */ + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + rxq->mbuf_initializer = *(uint64_t *)p; + return 0; +} + +static inline int +i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev) +{ +#ifndef RTE_LIBRTE_IEEE1588 + struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; + struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; + +#ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE + /* whithout rx ol_flags, no VP flag report */ + if (rxmode->hw_vlan_strip != 0 || + rxmode->hw_vlan_extend != 0 || + rxmode->hw_ip_checksum != 0) + return -1; +#endif + + /* no fdir support */ + if (fconf->mode != RTE_FDIR_MODE_NONE) + return -1; + + /* - no csum error report support + * - no header split support + */ + if (rxmode->header_split == 1) + return -1; + + return 0; +#else + RTE_SET_USED(dev); + return -1; +#endif +} +#endif diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c new file mode 100644 index 00000000..011c54e0 --- /dev/null +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c @@ -0,0 +1,614 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "base/i40e_prototype.h" +#include "base/i40e_type.h" +#include "i40e_ethdev.h" +#include "i40e_rxtx.h" +#include "i40e_rxtx_vec_common.h" + +#include + +#pragma GCC diagnostic ignored "-Wcast-qual" + +static inline void +i40e_rxq_rearm(struct i40e_rx_queue *rxq) +{ + int i; + uint16_t rx_id; + volatile union i40e_rx_desc *rxdp; + struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; + struct rte_mbuf *mb0, *mb1; + uint64x2_t dma_addr0, dma_addr1; + uint64x2_t zero = vdupq_n_u64(0); + uint64_t paddr; + uint8x8_t p; + + rxdp = rxq->rx_ring + rxq->rxrearm_start; + + /* Pull 'n' more MBUFs into the software ring */ + if (unlikely(rte_mempool_get_bulk(rxq->mp, + (void *)rxep, + RTE_I40E_RXQ_REARM_THRESH) < 0)) { + if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= + rxq->nb_rx_desc) { + for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { + rxep[i].mbuf = &rxq->fake_mbuf; + vst1q_u64((uint64_t *)&rxdp[i].read, zero); + } + } + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += + RTE_I40E_RXQ_REARM_THRESH; + return; + } + + p = vld1_u8((uint8_t *)&rxq->mbuf_initializer); + + /* Initialize the mbufs in vector, process 2 mbufs in one loop */ + for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { + mb0 = rxep[0].mbuf; + mb1 = rxep[1].mbuf; + + /* Flush mbuf with pkt template. + * Data to be rearmed is 6 bytes long. + * Though, RX will overwrite ol_flags that are coming next + * anyway. So overwrite whole 8 bytes with one load: + * 6 bytes of rearm_data plus first 2 bytes of ol_flags. + */ + vst1_u8((uint8_t *)&mb0->rearm_data, p); + paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM; + dma_addr0 = vdupq_n_u64(paddr); + + /* flush desc with pa dma_addr */ + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); + + vst1_u8((uint8_t *)&mb1->rearm_data, p); + paddr = mb1->buf_physaddr + RTE_PKTMBUF_HEADROOM; + dma_addr1 = vdupq_n_u64(paddr); + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1); + } + + rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; + if (rxq->rxrearm_start >= rxq->nb_rx_desc) + rxq->rxrearm_start = 0; + + rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; + + rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? + (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); + + /* Update the tail pointer on the NIC */ + I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); +} + +/* Handling the offload flags (olflags) field takes computation + * time when receiving packets. Therefore we provide a flag to disable + * the processing of the olflags field when they are not needed. This + * gives improved performance, at the cost of losing the offload info + * in the received packet + */ +#ifdef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE + +static inline void +desc_to_olflags_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts) +{ + uint32x4_t vlan0, vlan1, rss, l3_l4e; + + /* mask everything except RSS, flow director and VLAN flags + * bit2 is for VLAN tag, bit11 for flow director indication + * bit13:12 for RSS indication. + */ + const uint32x4_t rss_vlan_msk = { + 0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804}; + + /* map rss and vlan type to rss hash and vlan flag */ + const uint8x16_t vlan_flags = { + 0, 0, 0, 0, + PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0}; + + const uint8x16_t rss_flags = { + 0, PKT_RX_FDIR, 0, 0, + 0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH | PKT_RX_FDIR, + 0, 0, 0, 0, + 0, 0, 0, 0}; + + const uint8x16_t l3_l4e_flags = { + 0, + PKT_RX_IP_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + 0, 0, 0, 0, 0, 0, 0, 0}; + + vlan0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]), + vreinterpretq_u32_u64(descs[2])).val[1]; + vlan1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]), + vreinterpretq_u32_u64(descs[3])).val[1]; + vlan0 = vzipq_u32(vlan0, vlan1).val[0]; + + vlan1 = vandq_u32(vlan0, rss_vlan_msk); + vlan0 = vreinterpretq_u32_u8(vqtbl1q_u8(vlan_flags, + vreinterpretq_u8_u32(vlan1))); + + rss = vshrq_n_u32(vlan1, 11); + rss = vreinterpretq_u32_u8(vqtbl1q_u8(rss_flags, + vreinterpretq_u8_u32(rss))); + + l3_l4e = vshrq_n_u32(vlan1, 22); + l3_l4e = vreinterpretq_u32_u8(vqtbl1q_u8(l3_l4e_flags, + vreinterpretq_u8_u32(l3_l4e))); + + + vlan0 = vorrq_u32(vlan0, rss); + vlan0 = vorrq_u32(vlan0, l3_l4e); + + rx_pkts[0]->ol_flags = vgetq_lane_u32(vlan0, 0); + rx_pkts[1]->ol_flags = vgetq_lane_u32(vlan0, 1); + rx_pkts[2]->ol_flags = vgetq_lane_u32(vlan0, 2); + rx_pkts[3]->ol_flags = vgetq_lane_u32(vlan0, 3); +} +#else +#define desc_to_olflags_v(descs, rx_pkts) do {} while (0) +#endif + +#define PKTLEN_SHIFT 10 + +#define I40E_VPMD_DESC_DD_MASK 0x0001000100010001ULL + +static inline void +desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts) +{ + int i; + uint8_t ptype; + uint8x16_t tmp; + + for (i = 0; i < 4; i++) { + tmp = vreinterpretq_u8_u64(vshrq_n_u64(descs[i], 30)); + ptype = vgetq_lane_u8(tmp, 8); + rx_pkts[0]->packet_type = i40e_rxd_pkt_type_mapping(ptype); + } + +} + + /* + * Notice: + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST + * numbers of DD bits + */ +static inline uint16_t +_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts, uint8_t *split_packet) +{ + volatile union i40e_rx_desc *rxdp; + struct i40e_rx_entry *sw_ring; + uint16_t nb_pkts_recd; + int pos; + uint64_t var; + + /* mask to shuffle from desc. to mbuf */ + uint8x16_t shuf_msk = { + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0xFF, 0xFF, /* pkt_type set as unknown */ + 14, 15, /* octet 15~14, low 16 bits pkt_len */ + 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ + 14, 15, /* octet 15~14, 16 bits data_len */ + 2, 3, /* octet 2~3, low 16 bits vlan_macip */ + 4, 5, 6, 7 /* octet 4~7, 32bits rss */ + }; + + uint8x16_t eop_check = { + 0x02, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + }; + + uint16x8_t crc_adjust = { + 0, 0, /* ignore pkt_type field */ + rxq->crc_len, /* sub crc on pkt_len */ + 0, /* ignore high-16bits of pkt_len */ + rxq->crc_len, /* sub crc on data_len */ + 0, 0, 0 /* ignore non-length fields */ + }; + + /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */ + nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST); + + /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); + + /* Just the act of getting into the function from the application is + * going to cost about 7 cycles + */ + rxdp = rxq->rx_ring + rxq->rx_tail; + + rte_prefetch_non_temporal(rxdp); + + /* See if we need to rearm the RX queue - gives the prefetch a bit + * of time to act + */ + if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) + i40e_rxq_rearm(rxq); + + /* Before we start moving massive data around, check to see if + * there is actually a packet available + */ + if (!(rxdp->wb.qword1.status_error_len & + rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT))) + return 0; + + /* Cache is empty -> need to scan the buffer rings, but first move + * the next 'n' mbufs into the cache + */ + sw_ring = &rxq->sw_ring[rxq->rx_tail]; + + /* A. load 4 packet in one loop + * [A*. mask out 4 unused dirty field in desc] + * B. copy 4 mbuf point from swring to rx_pkts + * C. calc the number of DD bits among the 4 packets + * [C*. extract the end-of-packet bit, if requested] + * D. fill info. from desc to mbuf + */ + + for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; + pos += RTE_I40E_DESCS_PER_LOOP, + rxdp += RTE_I40E_DESCS_PER_LOOP) { + uint64x2_t descs[RTE_I40E_DESCS_PER_LOOP]; + uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; + uint16x8x2_t sterr_tmp1, sterr_tmp2; + uint64x2_t mbp1, mbp2; + uint16x8_t staterr; + uint16x8_t tmp; + uint64_t stat; + + int32x4_t len_shl = {0, 0, 0, PKTLEN_SHIFT}; + + /* B.1 load 1 mbuf point */ + mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); + /* Read desc statuses backwards to avoid race condition */ + /* A.1 load 4 pkts desc */ + descs[3] = vld1q_u64((uint64_t *)(rxdp + 3)); + rte_rmb(); + + /* B.2 copy 2 mbuf point into rx_pkts */ + vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); + + /* B.1 load 1 mbuf point */ + mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); + + descs[2] = vld1q_u64((uint64_t *)(rxdp + 2)); + /* B.1 load 2 mbuf point */ + descs[1] = vld1q_u64((uint64_t *)(rxdp + 1)); + descs[0] = vld1q_u64((uint64_t *)(rxdp)); + + /* B.2 copy 2 mbuf point into rx_pkts */ + vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); + + if (split_packet) { + rte_mbuf_prefetch_part2(rx_pkts[pos]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); + } + + /* avoid compiler reorder optimization */ + rte_compiler_barrier(); + + /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/ + uint32x4_t len3 = vshlq_u32(vreinterpretq_u32_u64(descs[3]), + len_shl); + descs[3] = vreinterpretq_u64_u32(len3); + uint32x4_t len2 = vshlq_u32(vreinterpretq_u32_u64(descs[2]), + len_shl); + descs[2] = vreinterpretq_u64_u32(len2); + + /* D.1 pkt 3,4 convert format from desc to pktmbuf */ + pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk); + pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk); + + /* C.1 4=>2 filter staterr info only */ + sterr_tmp2 = vzipq_u16(vreinterpretq_u16_u64(descs[1]), + vreinterpretq_u16_u64(descs[3])); + /* C.1 4=>2 filter staterr info only */ + sterr_tmp1 = vzipq_u16(vreinterpretq_u16_u64(descs[0]), + vreinterpretq_u16_u64(descs[2])); + + /* C.2 get 4 pkts staterr value */ + staterr = vzipq_u16(sterr_tmp1.val[1], + sterr_tmp2.val[1]).val[0]; + stat = vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0); + + desc_to_olflags_v(descs, &rx_pkts[pos]); + + /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); + pkt_mb4 = vreinterpretq_u8_u16(tmp); + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); + pkt_mb3 = vreinterpretq_u8_u16(tmp); + + /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/ + uint32x4_t len1 = vshlq_u32(vreinterpretq_u32_u64(descs[1]), + len_shl); + descs[1] = vreinterpretq_u64_u32(len1); + uint32x4_t len0 = vshlq_u32(vreinterpretq_u32_u64(descs[0]), + len_shl); + descs[0] = vreinterpretq_u64_u32(len0); + + /* D.1 pkt 1,2 convert format from desc to pktmbuf */ + pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk); + pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk); + + /* D.3 copy final 3,4 data to rx_pkts */ + vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1, + pkt_mb4); + vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1, + pkt_mb3); + + /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust); + pkt_mb2 = vreinterpretq_u8_u16(tmp); + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust); + pkt_mb1 = vreinterpretq_u8_u16(tmp); + + /* C* extract and record EOP bit */ + if (split_packet) { + uint8x16_t eop_shuf_mask = { + 0x00, 0x02, 0x04, 0x06, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF}; + uint8x16_t eop_bits; + + /* and with mask to extract bits, flipping 1-0 */ + eop_bits = vmvnq_u8(vreinterpretq_u8_u16(staterr)); + eop_bits = vandq_u8(eop_bits, eop_check); + /* the staterr values are not in order, as the count + * count of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ + eop_bits = vqtbl1q_u8(eop_bits, eop_shuf_mask); + + /* store the resulting 32-bit value */ + vst1q_lane_u32((uint32_t *)split_packet, + vreinterpretq_u32_u8(eop_bits), 0); + split_packet += RTE_I40E_DESCS_PER_LOOP; + + /* zero-out next pointers */ + rx_pkts[pos]->next = NULL; + rx_pkts[pos + 1]->next = NULL; + rx_pkts[pos + 2]->next = NULL; + rx_pkts[pos + 3]->next = NULL; + } + + rte_prefetch_non_temporal(rxdp + RTE_I40E_DESCS_PER_LOOP); + + /* D.3 copy final 1,2 data to rx_pkts */ + vst1q_u8((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1, + pkt_mb2); + vst1q_u8((void *)&rx_pkts[pos]->rx_descriptor_fields1, + pkt_mb1); + desc_to_ptype_v(descs, &rx_pkts[pos]); + /* C.4 calc avaialbe number of desc */ + var = __builtin_popcountll(stat & I40E_VPMD_DESC_DD_MASK); + nb_pkts_recd += var; + if (likely(var != RTE_I40E_DESCS_PER_LOOP)) + break; + } + + /* Update our internal tail pointer */ + rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); + rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); + rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); + + return nb_pkts_recd; +} + + /* + * Notice: + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST + * numbers of DD bits + */ +uint16_t +i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); +} + + /* vPMD receive routine that reassembles scattered packets + * Notice: + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST + * numbers of DD bits + */ +uint16_t +i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + + struct i40e_rx_queue *rxq = rx_queue; + uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; + + /* get some new buffers */ + uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, + split_flags); + if (nb_bufs == 0) + return 0; + + /* happy day case, full burst + no packets to be joined */ + const uint64_t *split_fl64 = (uint64_t *)split_flags; + + if (rxq->pkt_first_seg == NULL && + split_fl64[0] == 0 && split_fl64[1] == 0 && + split_fl64[2] == 0 && split_fl64[3] == 0) + return nb_bufs; + + /* reassemble any packets that need reassembly*/ + unsigned i = 0; + + if (rxq->pkt_first_seg == NULL) { + /* find the first split flag, and only reassemble then*/ + while (i < nb_bufs && !split_flags[i]) + i++; + if (i == nb_bufs) + return nb_bufs; + } + return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, + &split_flags[i]); +} + +static inline void +vtx1(volatile struct i40e_tx_desc *txdp, + struct rte_mbuf *pkt, uint64_t flags) +{ + uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA | + ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) | + ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)); + + uint64x2_t descriptor = {pkt->buf_physaddr + pkt->data_off, high_qw}; + vst1q_u64((uint64_t *)txdp, descriptor); +} + +static inline void +vtx(volatile struct i40e_tx_desc *txdp, + struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) +{ + int i; + + for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) + vtx1(txdp, *pkt, flags); +} + +uint16_t +i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue; + volatile struct i40e_tx_desc *txdp; + struct i40e_tx_entry *txep; + uint16_t n, nb_commit, tx_id; + uint64_t flags = I40E_TD_CMD; + uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD; + int i; + + /* cross rx_thresh boundary is not allowed */ + nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); + + if (txq->nb_tx_free < txq->tx_free_thresh) + i40e_tx_free_bufs(txq); + + nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); + if (unlikely(nb_pkts == 0)) + return 0; + + tx_id = txq->tx_tail; + txdp = &txq->tx_ring[tx_id]; + txep = &txq->sw_ring[tx_id]; + + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); + + n = (uint16_t)(txq->nb_tx_desc - tx_id); + if (nb_commit >= n) { + tx_backlog_entry(txep, tx_pkts, n); + + for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) + vtx1(txdp, *tx_pkts, flags); + + vtx1(txdp, *tx_pkts++, rs); + + nb_commit = (uint16_t)(nb_commit - n); + + tx_id = 0; + txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1); + + /* avoid reach the end of ring */ + txdp = &txq->tx_ring[tx_id]; + txep = &txq->sw_ring[tx_id]; + } + + tx_backlog_entry(txep, tx_pkts, nb_commit); + + vtx(txdp, tx_pkts, nb_commit, flags); + + tx_id = (uint16_t)(tx_id + nb_commit); + if (tx_id > txq->tx_next_rs) { + txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |= + rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) << + I40E_TXD_QW1_CMD_SHIFT); + txq->tx_next_rs = + (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh); + } + + txq->tx_tail = tx_id; + + I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); + + return nb_pkts; +} + +void __attribute__((cold)) +i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq) +{ + _i40e_rx_queue_release_mbufs_vec(rxq); +} + +int __attribute__((cold)) +i40e_rxq_vec_setup(struct i40e_rx_queue *rxq) +{ + return i40e_rxq_vec_setup_default(rxq); +} + +int __attribute__((cold)) +i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused *txq) +{ + return 0; +} + +int __attribute__((cold)) +i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) +{ + return i40e_rx_vec_dev_conf_condition_check_default(dev); +} diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c new file mode 100644 index 00000000..b95cc8e1 --- /dev/null +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c @@ -0,0 +1,633 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "base/i40e_prototype.h" +#include "base/i40e_type.h" +#include "i40e_ethdev.h" +#include "i40e_rxtx.h" +#include "i40e_rxtx_vec_common.h" + +#include + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif + +static inline void +i40e_rxq_rearm(struct i40e_rx_queue *rxq) +{ + int i; + uint16_t rx_id; + volatile union i40e_rx_desc *rxdp; + struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; + struct rte_mbuf *mb0, *mb1; + __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, + RTE_PKTMBUF_HEADROOM); + __m128i dma_addr0, dma_addr1; + + rxdp = rxq->rx_ring + rxq->rxrearm_start; + + /* Pull 'n' more MBUFs into the software ring */ + if (rte_mempool_get_bulk(rxq->mp, + (void *)rxep, + RTE_I40E_RXQ_REARM_THRESH) < 0) { + if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= + rxq->nb_rx_desc) { + dma_addr0 = _mm_setzero_si128(); + for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { + rxep[i].mbuf = &rxq->fake_mbuf; + _mm_store_si128((__m128i *)&rxdp[i].read, + dma_addr0); + } + } + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += + RTE_I40E_RXQ_REARM_THRESH; + return; + } + + /* Initialize the mbufs in vector, process 2 mbufs in one loop */ + for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { + __m128i vaddr0, vaddr1; + uintptr_t p0, p1; + + mb0 = rxep[0].mbuf; + mb1 = rxep[1].mbuf; + + /* Flush mbuf with pkt template. + * Data to be rearmed is 6 bytes long. + * Though, RX will overwrite ol_flags that are coming next + * anyway. So overwrite whole 8 bytes with one load: + * 6 bytes of rearm_data plus first 2 bytes of ol_flags. + */ + p0 = (uintptr_t)&mb0->rearm_data; + *(uint64_t *)p0 = rxq->mbuf_initializer; + p1 = (uintptr_t)&mb1->rearm_data; + *(uint64_t *)p1 = rxq->mbuf_initializer; + + /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ + vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); + vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); + + /* convert pa to dma_addr hdr/data */ + dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); + dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); + + /* add headroom to pa values */ + dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); + dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room); + + /* flush desc with pa dma_addr */ + _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); + _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); + } + + rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; + if (rxq->rxrearm_start >= rxq->nb_rx_desc) + rxq->rxrearm_start = 0; + + rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; + + rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? + (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); + + /* Update the tail pointer on the NIC */ + I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); +} + +/* Handling the offload flags (olflags) field takes computation + * time when receiving packets. Therefore we provide a flag to disable + * the processing of the olflags field when they are not needed. This + * gives improved performance, at the cost of losing the offload info + * in the received packet + */ +#ifdef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE + +static inline void +desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) +{ + __m128i vlan0, vlan1, rss, l3_l4e; + + /* mask everything except RSS, flow director and VLAN flags + * bit2 is for VLAN tag, bit11 for flow director indication + * bit13:12 for RSS indication. + */ + const __m128i rss_vlan_msk = _mm_set_epi32( + 0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804); + + const __m128i cksum_mask = _mm_set_epi32( + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD); + + /* map rss and vlan type to rss hash and vlan flag */ + const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED, + 0, 0, 0, 0); + + const __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0, + 0, 0, 0, 0, + PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0, + 0, 0, PKT_RX_FDIR, 0); + + const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + /* shift right 1 bit to make sure it not exceed 255 */ + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1, + PKT_RX_IP_CKSUM_BAD >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1); + + vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]); + vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]); + vlan0 = _mm_unpacklo_epi64(vlan0, vlan1); + + vlan1 = _mm_and_si128(vlan0, rss_vlan_msk); + vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1); + + rss = _mm_srli_epi32(vlan1, 11); + rss = _mm_shuffle_epi8(rss_flags, rss); + + l3_l4e = _mm_srli_epi32(vlan1, 22); + l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e); + /* then we shift left 1 bit */ + l3_l4e = _mm_slli_epi32(l3_l4e, 1); + /* we need to mask out the reduntant bits */ + l3_l4e = _mm_and_si128(l3_l4e, cksum_mask); + + vlan0 = _mm_or_si128(vlan0, rss); + vlan0 = _mm_or_si128(vlan0, l3_l4e); + + rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0); + rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2); + rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4); + rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6); +} +#else +#define desc_to_olflags_v(desc, rx_pkts) do {} while (0) +#endif + +#define PKTLEN_SHIFT 10 + +static inline void +desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts) +{ + __m128i ptype0 = _mm_unpackhi_epi64(descs[0], descs[1]); + __m128i ptype1 = _mm_unpackhi_epi64(descs[2], descs[3]); + + ptype0 = _mm_srli_epi64(ptype0, 30); + ptype1 = _mm_srli_epi64(ptype1, 30); + + rx_pkts[0]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype0, 0)); + rx_pkts[1]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype0, 8)); + rx_pkts[2]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype1, 0)); + rx_pkts[3]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype1, 8)); +} + + /* + * Notice: + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST + * numbers of DD bits + */ +static inline uint16_t +_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts, uint8_t *split_packet) +{ + volatile union i40e_rx_desc *rxdp; + struct i40e_rx_entry *sw_ring; + uint16_t nb_pkts_recd; + int pos; + uint64_t var; + __m128i shuf_msk; + + __m128i crc_adjust = _mm_set_epi16( + 0, 0, 0, /* ignore non-length fields */ + -rxq->crc_len, /* sub crc on data_len */ + 0, /* ignore high-16bits of pkt_len */ + -rxq->crc_len, /* sub crc on pkt_len */ + 0, 0 /* ignore pkt_type field */ + ); + __m128i dd_check, eop_check; + + /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */ + nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST); + + /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); + + /* Just the act of getting into the function from the application is + * going to cost about 7 cycles + */ + rxdp = rxq->rx_ring + rxq->rx_tail; + + rte_prefetch0(rxdp); + + /* See if we need to rearm the RX queue - gives the prefetch a bit + * of time to act + */ + if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) + i40e_rxq_rearm(rxq); + + /* Before we start moving massive data around, check to see if + * there is actually a packet available + */ + if (!(rxdp->wb.qword1.status_error_len & + rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT))) + return 0; + + /* 4 packets DD mask */ + dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); + + /* 4 packets EOP mask */ + eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); + + /* mask to shuffle from desc. to mbuf */ + shuf_msk = _mm_set_epi8( + 7, 6, 5, 4, /* octet 4~7, 32bits rss */ + 3, 2, /* octet 2~3, low 16 bits vlan_macip */ + 15, 14, /* octet 15~14, 16 bits data_len */ + 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ + 15, 14, /* octet 15~14, low 16 bits pkt_len */ + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0xFF, 0xFF /*pkt_type set as unknown */ + ); + + /* Cache is empty -> need to scan the buffer rings, but first move + * the next 'n' mbufs into the cache + */ + sw_ring = &rxq->sw_ring[rxq->rx_tail]; + + /* A. load 4 packet in one loop + * [A*. mask out 4 unused dirty field in desc] + * B. copy 4 mbuf point from swring to rx_pkts + * C. calc the number of DD bits among the 4 packets + * [C*. extract the end-of-packet bit, if requested] + * D. fill info. from desc to mbuf + */ + + for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; + pos += RTE_I40E_DESCS_PER_LOOP, + rxdp += RTE_I40E_DESCS_PER_LOOP) { + __m128i descs[RTE_I40E_DESCS_PER_LOOP]; + __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; + __m128i zero, staterr, sterr_tmp1, sterr_tmp2; + __m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */ + + /* B.1 load 1 mbuf point */ + mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); + /* Read desc statuses backwards to avoid race condition */ + /* A.1 load 4 pkts desc */ + descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); + + /* B.2 copy 2 mbuf point into rx_pkts */ + _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); + + /* B.1 load 1 mbuf point */ + mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); + + descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); + /* B.1 load 2 mbuf point */ + descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); + descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); + + /* B.2 copy 2 mbuf point into rx_pkts */ + _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); + + if (split_packet) { + rte_mbuf_prefetch_part2(rx_pkts[pos]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); + } + + /* avoid compiler reorder optimization */ + rte_compiler_barrier(); + + /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/ + const __m128i len3 = _mm_slli_epi32(descs[3], PKTLEN_SHIFT); + const __m128i len2 = _mm_slli_epi32(descs[2], PKTLEN_SHIFT); + + /* merge the now-aligned packet length fields back in */ + descs[3] = _mm_blend_epi16(descs[3], len3, 0x80); + descs[2] = _mm_blend_epi16(descs[2], len2, 0x80); + + /* D.1 pkt 3,4 convert format from desc to pktmbuf */ + pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); + pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); + + /* C.1 4=>2 filter staterr info only */ + sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); + /* C.1 4=>2 filter staterr info only */ + sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); + + desc_to_olflags_v(descs, &rx_pkts[pos]); + + /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ + pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); + pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); + + /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/ + const __m128i len1 = _mm_slli_epi32(descs[1], PKTLEN_SHIFT); + const __m128i len0 = _mm_slli_epi32(descs[0], PKTLEN_SHIFT); + + /* merge the now-aligned packet length fields back in */ + descs[1] = _mm_blend_epi16(descs[1], len1, 0x80); + descs[0] = _mm_blend_epi16(descs[0], len0, 0x80); + + /* D.1 pkt 1,2 convert format from desc to pktmbuf */ + pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); + pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); + + /* C.2 get 4 pkts staterr value */ + zero = _mm_xor_si128(dd_check, dd_check); + staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); + + /* D.3 copy final 3,4 data to rx_pkts */ + _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, + pkt_mb4); + _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, + pkt_mb3); + + /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ + pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); + pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); + + /* C* extract and record EOP bit */ + if (split_packet) { + __m128i eop_shuf_mask = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0x04, 0x0C, 0x00, 0x08 + ); + + /* and with mask to extract bits, flipping 1-0 */ + __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); + /* the staterr values are not in order, as the count + * count of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ + eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); + /* store the resulting 32-bit value */ + *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); + split_packet += RTE_I40E_DESCS_PER_LOOP; + + /* zero-out next pointers */ + rx_pkts[pos]->next = NULL; + rx_pkts[pos + 1]->next = NULL; + rx_pkts[pos + 2]->next = NULL; + rx_pkts[pos + 3]->next = NULL; + } + + /* C.3 calc available number of desc */ + staterr = _mm_and_si128(staterr, dd_check); + staterr = _mm_packs_epi32(staterr, zero); + + /* D.3 copy final 1,2 data to rx_pkts */ + _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, + pkt_mb2); + _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, + pkt_mb1); + desc_to_ptype_v(descs, &rx_pkts[pos]); + /* C.4 calc avaialbe number of desc */ + var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); + nb_pkts_recd += var; + if (likely(var != RTE_I40E_DESCS_PER_LOOP)) + break; + } + + /* Update our internal tail pointer */ + rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); + rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); + rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); + + return nb_pkts_recd; +} + + /* + * Notice: + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST + * numbers of DD bits + */ +uint16_t +i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); +} + + /* vPMD receive routine that reassembles scattered packets + * Notice: + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST + * numbers of DD bits + */ +uint16_t +i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + + struct i40e_rx_queue *rxq = rx_queue; + uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; + + /* get some new buffers */ + uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, + split_flags); + if (nb_bufs == 0) + return 0; + + /* happy day case, full burst + no packets to be joined */ + const uint64_t *split_fl64 = (uint64_t *)split_flags; + + if (rxq->pkt_first_seg == NULL && + split_fl64[0] == 0 && split_fl64[1] == 0 && + split_fl64[2] == 0 && split_fl64[3] == 0) + return nb_bufs; + + /* reassemble any packets that need reassembly*/ + unsigned i = 0; + + if (rxq->pkt_first_seg == NULL) { + /* find the first split flag, and only reassemble then*/ + while (i < nb_bufs && !split_flags[i]) + i++; + if (i == nb_bufs) + return nb_bufs; + } + return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, + &split_flags[i]); +} + +static inline void +vtx1(volatile struct i40e_tx_desc *txdp, + struct rte_mbuf *pkt, uint64_t flags) +{ + uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA | + ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) | + ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)); + + __m128i descriptor = _mm_set_epi64x(high_qw, + pkt->buf_physaddr + pkt->data_off); + _mm_store_si128((__m128i *)txdp, descriptor); +} + +static inline void +vtx(volatile struct i40e_tx_desc *txdp, + struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) +{ + int i; + + for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) + vtx1(txdp, *pkt, flags); +} + +uint16_t +i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue; + volatile struct i40e_tx_desc *txdp; + struct i40e_tx_entry *txep; + uint16_t n, nb_commit, tx_id; + uint64_t flags = I40E_TD_CMD; + uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD; + int i; + + /* cross rx_thresh boundary is not allowed */ + nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); + + if (txq->nb_tx_free < txq->tx_free_thresh) + i40e_tx_free_bufs(txq); + + nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); + if (unlikely(nb_pkts == 0)) + return 0; + + tx_id = txq->tx_tail; + txdp = &txq->tx_ring[tx_id]; + txep = &txq->sw_ring[tx_id]; + + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); + + n = (uint16_t)(txq->nb_tx_desc - tx_id); + if (nb_commit >= n) { + tx_backlog_entry(txep, tx_pkts, n); + + for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) + vtx1(txdp, *tx_pkts, flags); + + vtx1(txdp, *tx_pkts++, rs); + + nb_commit = (uint16_t)(nb_commit - n); + + tx_id = 0; + txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1); + + /* avoid reach the end of ring */ + txdp = &txq->tx_ring[tx_id]; + txep = &txq->sw_ring[tx_id]; + } + + tx_backlog_entry(txep, tx_pkts, nb_commit); + + vtx(txdp, tx_pkts, nb_commit, flags); + + tx_id = (uint16_t)(tx_id + nb_commit); + if (tx_id > txq->tx_next_rs) { + txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |= + rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) << + I40E_TXD_QW1_CMD_SHIFT); + txq->tx_next_rs = + (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh); + } + + txq->tx_tail = tx_id; + + I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); + + return nb_pkts; +} + +void __attribute__((cold)) +i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq) +{ + _i40e_rx_queue_release_mbufs_vec(rxq); +} + +int __attribute__((cold)) +i40e_rxq_vec_setup(struct i40e_rx_queue *rxq) +{ + return i40e_rxq_vec_setup_default(rxq); +} + +int __attribute__((cold)) +i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused *txq) +{ + return 0; +} + +int __attribute__((cold)) +i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) +{ +#ifndef RTE_LIBRTE_IEEE1588 + /* need SSE4.1 support */ + if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1)) + return -1; +#endif + + return i40e_rx_vec_dev_conf_condition_check_default(dev); +} diff --git a/src/dpdk/drivers/net/i40e/rte_pmd_i40e.h b/src/dpdk/drivers/net/i40e/rte_pmd_i40e.h new file mode 100644 index 00000000..a0ad88c6 --- /dev/null +++ b/src/dpdk/drivers/net/i40e/rte_pmd_i40e.h @@ -0,0 +1,335 @@ +/*- + * BSD LICENSE + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PMD_I40E_H_ +#define _PMD_I40E_H_ + +/** + * @file rte_pmd_i40e.h + * + * i40e PMD specific functions. + * + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + */ + +#include + +/** + * Response sent back to i40e driver from user app after callback + */ +enum rte_pmd_i40e_mb_event_rsp { + RTE_PMD_I40E_MB_EVENT_NOOP_ACK, /**< skip mbox request and ACK */ + RTE_PMD_I40E_MB_EVENT_NOOP_NACK, /**< skip mbox request and NACK */ + RTE_PMD_I40E_MB_EVENT_PROCEED, /**< proceed with mbox request */ + RTE_PMD_I40E_MB_EVENT_MAX /**< max value of this enum */ +}; + +/** + * Data sent to the user application when the callback is executed. + */ +struct rte_pmd_i40e_mb_event_param { + uint16_t vfid; /**< Virtual Function number */ + uint16_t msg_type; /**< VF to PF message type, see i40e_virtchnl_ops */ + uint16_t retval; /**< return value */ + void *msg; /**< pointer to message */ + uint16_t msglen; /**< length of the message */ +}; + +/** + * Notify VF when PF link status changes. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if *vf* invalid. + */ +int rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf); + +/** + * Enable/Disable VF MAC anti spoofing. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to set MAC anti spoofing. + * @param on + * 1 - Enable VFs MAC anti spoofing. + * 0 - Disable VFs MAC anti spoofing. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port, + uint16_t vf_id, + uint8_t on); + +/** + * Enable/Disable VF VLAN anti spoofing. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to set VLAN anti spoofing. + * @param on + * 1 - Enable VFs VLAN anti spoofing. + * 0 - Disable VFs VLAN anti spoofing. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port, + uint16_t vf_id, + uint8_t on); + +/** + * Enable/Disable TX loopback on all the PF and VFs. + * + * @param port + * The port identifier of the Ethernet device. + * @param on + * 1 - Enable TX loopback. + * 0 - Disable TX loopback. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_tx_loopback(uint8_t port, + uint8_t on); + +/** + * Enable/Disable VF unicast promiscuous mode. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to set. + * @param on + * 1 - Enable. + * 0 - Disable. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port, + uint16_t vf_id, + uint8_t on); + +/** + * Enable/Disable VF multicast promiscuous mode. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to set. + * @param on + * 1 - Enable. + * 0 - Disable. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port, + uint16_t vf_id, + uint8_t on); + +/** + * Set the VF MAC address. + * + * PF should set MAC address before VF initialized, if PF sets the MAC + * address after VF initialized, new MAC address won't be effective until + * VF reinitialize. + * + * This will remove all existing MAC filters. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF id. + * @param mac_addr + * VF MAC address. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if *vf* or *mac_addr* is invalid. + */ +int rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id, + struct ether_addr *mac_addr); + +/** + * Enable/Disable vf vlan strip for all queues in a pool + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * ID specifying VF. + * @param on + * 1 - Enable VF's vlan strip on RX queues. + * 0 - Disable VF's vlan strip on RX queues. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int +rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable/Disable vf vlan insert + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * ID specifying VF. + * @param vlan_id + * 0 - Disable VF's vlan insert. + * n - Enable; n is inserted as the vlan id. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id, + uint16_t vlan_id); + +/** + * Enable/Disable vf broadcast mode + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * ID specifying VF. + * @param on + * 0 - Disable broadcast. + * 1 - Enable broadcast. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id, + uint8_t on); + +/** + * Enable/Disable vf vlan tag + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * ID specifying VF. + * @param on + * 0 - Disable VF's vlan tag. + * n - Enable VF's vlan tag. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on); + +/** + * Enable/Disable VF VLAN filter + * + * @param port + * The port identifier of the Ethernet device. + * @param vlan_id + * ID specifying VLAN + * @param vf_mask + * Mask to filter VF's + * @param on + * 0 - Disable VF's VLAN filter. + * 1 - Enable VF's VLAN filter. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + * - (-ENOTSUP) not supported by firmware. + */ +int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id, + uint64_t vf_mask, uint8_t on); + +/** + * Get VF's statistics + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to get. + * @param stats + * A pointer to a structure of type *rte_eth_stats* to be filled with + * the values of device counters for the following set of statistics: + * - *ipackets* with the total of successfully received packets. + * - *opackets* with the total of successfully transmitted packets. + * - *ibytes* with the total of successfully received bytes. + * - *obytes* with the total of successfully transmitted bytes. + * - *ierrors* with the total of erroneous received packets. + * - *oerrors* with the total of failed transmitted packets. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ + +int rte_pmd_i40e_get_vf_stats(uint8_t port, + uint16_t vf_id, + struct rte_eth_stats *stats); + +/** + * Clear VF's statistics + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to get. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_reset_vf_stats(uint8_t port, + uint16_t vf_id); + +#endif /* _PMD_I40E_H_ */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c index db808801..724dcbbc 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c @@ -995,19 +995,19 @@ STATIC s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) * @vlan: VLAN id to write to VLAN filter * @vind: VMDq output index that maps queue to VLAN id in VFTA * @vlan_on: boolean flag to turn on/off VLAN in VFTA - * @bypass_vlvf: boolean flag - unused + * @vlvf_bypass: boolean flag - unused * * Turn on/off specified VLAN in the VLAN filter table. **/ s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, - bool vlan_on, bool bypass_vlvf) + bool vlan_on, bool vlvf_bypass) { u32 regindex; u32 bitindex; u32 bits; u32 vftabyte; - UNREFERENCED_1PARAMETER(bypass_vlvf); + UNREFERENCED_1PARAMETER(vlvf_bypass); DEBUGFUNC("ixgbe_set_vfta_82598"); diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c index 5bc7c2b9..832242ee 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c @@ -1178,6 +1178,7 @@ mac_reset_top: if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { /* Save the SAN MAC RAR index */ hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index, hw->mac.san_addr, 0, IXGBE_RAH_AV); @@ -1809,14 +1810,23 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, } IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIP6M, fdirip6m); - /* Set all bits in FDIRTCPM, FDIRUDPM, FDIRSIP4M and - * FDIRDIP4M in cloud mode to allow L3/L3 packets to - * tunnel. + /* Set all bits in FDIRTCPM, FDIRUDPM, FDIRSCTPM, + * FDIRSIP4M and FDIRDIP4M in cloud mode to allow + * L3/L3 packets to tunnel. */ IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, 0xFFFFFFFF); IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, 0xFFFFFFFF); IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M, 0xFFFFFFFF); IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M, 0xFFFFFFFF); + switch (hw->mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_X550EM_a: + IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, 0xFFFFFFFF); + break; + default: + break; + } } /* Now mask VM pool and destination IPv6 - bits 5 and 2 */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c index 17868676..270a97dc 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c @@ -106,8 +106,10 @@ s32 ixgbe_init_shared_code(struct ixgbe_hw *hw) status = ixgbe_init_ops_X550(hw); break; case ixgbe_mac_X550EM_x: + status = ixgbe_init_ops_X550EM_x(hw); + break; case ixgbe_mac_X550EM_a: - status = ixgbe_init_ops_X550EM(hw); + status = ixgbe_init_ops_X550EM_a(hw); break; case ixgbe_mac_82599_vf: case ixgbe_mac_X540_vf: @@ -203,6 +205,7 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_X_10G_T: case IXGBE_DEV_ID_X550EM_X_1G_T: case IXGBE_DEV_ID_X550EM_X_SFP: + case IXGBE_DEV_ID_X550EM_X_XFI: hw->mac.type = ixgbe_mac_X550EM_x; hw->mvals = ixgbe_mvals_X550EM_x; break; @@ -1090,7 +1093,7 @@ s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, bool vlvf_bypass) { return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind, - vlan_on, vlvf_bypass), IXGBE_NOT_IMPLEMENTED); + vlan_on, vlvf_bypass), IXGBE_NOT_IMPLEMENTED); } /** @@ -1100,7 +1103,7 @@ s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, * @vind: VMDq output index that maps queue to VLAN id in VLVFB * @vlan_on: boolean flag to turn on/off VLAN in VLVF * @vfta_delta: pointer to the difference between the current value of VFTA - * and the desired value + * and the desired value * @vfta: the desired value of the VFTA * @vlvf_bypass: boolean flag indicating updating the default pool is okay * @@ -1110,7 +1113,7 @@ s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, u32 *vfta_delta, u32 vfta, bool vlvf_bypass) { return ixgbe_call_func(hw, hw->mac.ops.set_vlvf, (hw, vlan, vind, - vlan_on, vfta_delta, vfta, vlvf_bypass), + vlan_on, vfta_delta, vfta, vlvf_bypass), IXGBE_NOT_IMPLEMENTED); } @@ -1145,12 +1148,15 @@ s32 ixgbe_setup_fc(struct ixgbe_hw *hw) * @min: driver minor number to be sent to firmware * @build: driver build number to be sent to firmware * @ver: driver version number to be sent to firmware + * @len: length of driver_ver string + * @driver_ver: driver string **/ s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, - u8 ver) + u8 ver, u16 len, char *driver_ver) { return ixgbe_call_func(hw, hw->mac.ops.set_fw_drv_ver, (hw, maj, min, - build, ver), IXGBE_NOT_IMPLEMENTED); + build, ver, len, driver_ver), + IXGBE_NOT_IMPLEMENTED); } @@ -1659,6 +1665,7 @@ void ixgbe_init_swfw_semaphore(struct ixgbe_hw *hw) hw->mac.ops.init_swfw_sync(hw); } + void ixgbe_disable_rx(struct ixgbe_hw *hw) { if (hw->mac.ops.disable_rx) diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h index 3aad1da7..af85d4ea 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h @@ -45,6 +45,8 @@ extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw); +extern s32 ixgbe_init_ops_X550EM_x(struct ixgbe_hw *hw); +extern s32 ixgbe_init_ops_X550EM_a(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw); s32 ixgbe_set_mac_type(struct ixgbe_hw *hw); @@ -131,7 +133,7 @@ s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, s32 ixgbe_fc_enable(struct ixgbe_hw *hw); s32 ixgbe_setup_fc(struct ixgbe_hw *hw); s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, - u8 ver); + u8 ver, u16 len, char *driver_ver); s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw); s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw); void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr); diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c index 811875a4..96456678 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c @@ -113,6 +113,7 @@ s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw) mac->ops.led_off = ixgbe_led_off_generic; mac->ops.blink_led_start = ixgbe_blink_led_start_generic; mac->ops.blink_led_stop = ixgbe_blink_led_stop_generic; + mac->ops.init_led_link_act = ixgbe_init_led_link_act_generic; /* RAR, Multicast, VLAN */ mac->ops.set_rar = ixgbe_set_rar_generic; @@ -168,13 +169,24 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) switch (hw->phy.media_type) { case ixgbe_media_type_fiber_qsfp: case ixgbe_media_type_fiber: - hw->mac.ops.check_link(hw, &speed, &link_up, false); - /* if link is down, assume supported */ - if (link_up) - supported = speed == IXGBE_LINK_SPEED_1GB_FULL ? + /* flow control autoneg black list */ + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_A_SFP: + case IXGBE_DEV_ID_X550EM_A_SFP_N: + case IXGBE_DEV_ID_X550EM_A_QSFP: + case IXGBE_DEV_ID_X550EM_A_QSFP_N: + supported = false; + break; + default: + hw->mac.ops.check_link(hw, &speed, &link_up, false); + /* if link is down, assume supported */ + if (link_up) + supported = speed == IXGBE_LINK_SPEED_1GB_FULL ? true : false; - else - supported = true; + else + supported = true; + } + break; case ixgbe_media_type_backplane: supported = true; @@ -188,6 +200,9 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550T: case IXGBE_DEV_ID_X550T1: case IXGBE_DEV_ID_X550EM_X_10G_T: + case IXGBE_DEV_ID_X550EM_A_10G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: supported = true; break; default: @@ -197,9 +212,10 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) break; } - ERROR_REPORT2(IXGBE_ERROR_UNSUPPORTED, - "Device %x does not support flow control autoneg", - hw->device_id); + if (!supported) + ERROR_REPORT2(IXGBE_ERROR_UNSUPPORTED, + "Device %x does not support flow control autoneg", + hw->device_id); return supported; } @@ -371,6 +387,7 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) { s32 ret_val; u32 ctrl_ext; + u16 device_caps; DEBUGFUNC("ixgbe_start_hw_generic"); @@ -393,14 +410,31 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) /* Setup flow control */ ret_val = ixgbe_setup_fc(hw); - if (ret_val != IXGBE_SUCCESS) - goto out; + if (ret_val != IXGBE_SUCCESS && ret_val != IXGBE_NOT_IMPLEMENTED) { + DEBUGOUT1("Flow control setup failed, returning %d\n", ret_val); + return ret_val; + } + + /* Cache bit indicating need for crosstalk fix */ + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_X550EM_a: + hw->mac.ops.get_device_caps(hw, &device_caps); + if (device_caps & IXGBE_DEVICE_CAPS_NO_CROSSTALK_WR) + hw->need_crosstalk_fix = false; + else + hw->need_crosstalk_fix = true; + break; + default: + hw->need_crosstalk_fix = false; + break; + } /* Clear adapter stopped flag */ hw->adapter_stopped = false; -out: - return ret_val; + return IXGBE_SUCCESS; } /** @@ -466,6 +500,12 @@ s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw) status = hw->mac.ops.start_hw(hw); } + /* Initialize the LED link active for LED blink support */ + hw->mac.ops.init_led_link_act(hw); + + if (status != IXGBE_SUCCESS) + DEBUGOUT1("Failed to initialize HW, STATUS = %d\n", status); + return status; } @@ -1046,7 +1086,7 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw) if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP) { hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_4, &ee_ctrl_4); bus->instance_id = (ee_ctrl_4 & IXGBE_EE_CTRL_4_INST_ID) >> - IXGBE_EE_CTRL_4_INST_ID_SHIFT; + IXGBE_EE_CTRL_4_INST_ID_SHIFT; } } @@ -1104,6 +1144,47 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) return ixgbe_disable_pcie_master(hw); } +/** + * ixgbe_init_led_link_act_generic - Store the LED index link/activity. + * @hw: pointer to hardware structure + * + * Store the index for the link active LED. This will be used to support + * blinking the LED. + **/ +s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + u32 led_reg, led_mode; + u8 i; + + led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* Get LED link active from the LEDCTL register */ + for (i = 0; i < 4; i++) { + led_mode = led_reg >> IXGBE_LED_MODE_SHIFT(i); + + if ((led_mode & IXGBE_LED_MODE_MASK_BASE) == + IXGBE_LED_LINK_ACTIVE) { + mac->led_link_act = i; + return IXGBE_SUCCESS; + } + } + + /* + * If LEDCTL register does not have the LED link active set, then use + * known MAC defaults. + */ + switch (hw->mac.type) { + case ixgbe_mac_X550EM_a: + case ixgbe_mac_X550EM_x: + mac->led_link_act = 1; + break; + default: + mac->led_link_act = 2; + } + return IXGBE_SUCCESS; +} + /** * ixgbe_led_on_generic - Turns on the software controllable LEDs. * @hw: pointer to hardware structure @@ -1115,6 +1196,9 @@ s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_led_on_generic"); + if (index > 3) + return IXGBE_ERR_PARAM; + /* To turn on the LED, set mode to ON. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index); @@ -1135,6 +1219,9 @@ s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_led_off_generic"); + if (index > 3) + return IXGBE_ERR_PARAM; + /* To turn off the LED, set mode to OFF. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index); @@ -2851,7 +2938,7 @@ out: * advertised settings **/ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, - u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) + u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) { if ((!(adv_reg)) || (!(lp_reg))) { ERROR_REPORT3(IXGBE_ERROR_UNSUPPORTED, @@ -3323,7 +3410,7 @@ s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked) **/ s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw) { - int secrxreg; + u32 secrxreg; DEBUGFUNC("ixgbe_enable_sec_rx_path_generic"); @@ -3370,6 +3457,9 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_blink_led_start_generic"); + if (index > 3) + return IXGBE_ERR_PARAM; + /* * Link must be up to auto-blink the LEDs; * Force it if link is down. @@ -3415,6 +3505,10 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_blink_led_stop_generic"); + if (index > 3) + return IXGBE_ERR_PARAM; + + ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg); if (ret_val != IXGBE_SUCCESS) goto out; @@ -3720,7 +3814,8 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) } /* was that the last pool using this rar? */ - if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0) + if (mpsar_lo == 0 && mpsar_hi == 0 && + rar != 0 && rar != hw->mac.san_mac_rar_index) hw->mac.ops.clear_rar(hw, rar); done: return IXGBE_SUCCESS; @@ -3887,7 +3982,8 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, vfta_delta = 1 << (vlan % 32); vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regidx)); - /* vfta_delta represents the difference between the current value + /* + * vfta_delta represents the difference between the current value * of vfta and the value we want in the register. Since the diff * is an XOR mask we can just update the vfta using an XOR */ @@ -3920,7 +4016,7 @@ vfta_update: * @vind: VMDq output index that maps queue to VLAN id in VLVFB * @vlan_on: boolean flag to turn on/off VLAN in VLVF * @vfta_delta: pointer to the difference between the current value of VFTA - * and the desired value + * and the desired value * @vfta: the desired value of the VFTA * @vlvf_bypass: boolean flag indicating updating default pool is okay * @@ -3947,6 +4043,7 @@ s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, */ if (!(IXGBE_READ_REG(hw, IXGBE_VT_CTL) & IXGBE_VT_CTL_VT_ENABLE)) return IXGBE_SUCCESS; + vlvf_index = ixgbe_find_vlvf_slot(hw, vlan, vlvf_bypass); if (vlvf_index < 0) return vlvf_index; @@ -3967,7 +4064,7 @@ s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, * we run the risk of stray packets leaking into * the PF via the default pool */ - if (vfta_delta) + if (*vfta_delta) IXGBE_WRITE_REG(hw, IXGBE_VFTA(vlan / 32), vfta); /* disable VLVF and clear remaining bit from pool */ @@ -3976,6 +4073,7 @@ s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, return IXGBE_SUCCESS; } + /* If there are still bits set in the VLVFB registers * for the VLAN ID indicated we need to see if the * caller is requesting that we clear the VFTA entry bit. @@ -4024,6 +4122,32 @@ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) return IXGBE_SUCCESS; } +/** + * ixgbe_need_crosstalk_fix - Determine if we need to do cross talk fix + * @hw: pointer to hardware structure + * + * Contains the logic to identify if we need to verify link for the + * crosstalk fix + **/ +static bool ixgbe_need_crosstalk_fix(struct ixgbe_hw *hw) +{ + + /* Does FW say we need the fix */ + if (!hw->need_crosstalk_fix) + return false; + + /* Only consider SFP+ PHYs i.e. media type fiber */ + switch (hw->mac.ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + case ixgbe_media_type_fiber_qsfp: + break; + default: + return false; + } + + return true; +} + /** * ixgbe_check_mac_link_generic - Determine link and speed status * @hw: pointer to hardware structure @@ -4041,6 +4165,35 @@ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, DEBUGFUNC("ixgbe_check_mac_link_generic"); + /* If Crosstalk fix enabled do the sanity check of making sure + * the SFP+ cage is full. + */ + if (ixgbe_need_crosstalk_fix(hw)) { + u32 sfp_cage_full; + + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & + IXGBE_ESDP_SDP2; + break; + case ixgbe_mac_X550EM_x: + case ixgbe_mac_X550EM_a: + sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & + IXGBE_ESDP_SDP0; + break; + default: + /* sanity check - No SFP+ devices here */ + sfp_cage_full = false; + break; + } + + if (!sfp_cage_full) { + *link_up = false; + *speed = IXGBE_LINK_SPEED_UNKNOWN; + return IXGBE_SUCCESS; + } + } + /* clear the old state */ links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS); @@ -4082,11 +4235,18 @@ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, break; case IXGBE_LINKS_SPEED_100_82599: *speed = IXGBE_LINK_SPEED_100_FULL; - if (hw->mac.type >= ixgbe_mac_X550) { + if (hw->mac.type == ixgbe_mac_X550) { if (links_reg & IXGBE_LINKS_SPEED_NON_STD) *speed = IXGBE_LINK_SPEED_5GB_FULL; } break; + case IXGBE_LINKS_SPEED_10_X550EM_A: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + if (hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T || + hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L) { + *speed = IXGBE_LINK_SPEED_10_FULL; + } + break; default: *speed = IXGBE_LINK_SPEED_UNKNOWN; } @@ -4318,43 +4478,31 @@ u8 ixgbe_calculate_checksum(u8 *buffer, u32 length) } /** - * ixgbe_host_interface_command - Issue command to manageability block + * ixgbe_hic_unlocked - Issue command to manageability block unlocked * @hw: pointer to the HW structure - * @buffer: contains the command to write and where the return status will - * be placed + * @buffer: command to write and where the return status will be placed * @length: length of buffer, must be multiple of 4 bytes * @timeout: time in ms to wait for command completion - * @return_data: read and return data from the buffer (true) or not (false) - * Needed because FW structures are big endian and decoding of - * these fields can be 8 bit or 16 bit based on command. Decoding - * is not easily understood without making a table of commands. - * So we will leave this up to the caller to read back the data - * in these cases. * * Communicates with the manageability block. On success return IXGBE_SUCCESS * else returns semaphore error when encountering an error acquiring * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * + * This function assumes that the IXGBE_GSSR_SW_MNG_SM semaphore is held + * by the caller. **/ -s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, - u32 length, u32 timeout, bool return_data) +s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, + u32 timeout) { - u32 hicr, i, bi, fwsts; - u32 hdr_size = sizeof(struct ixgbe_hic_hdr); - u16 buf_len; + u32 hicr, i, fwsts; u16 dword_len; - s32 status; - DEBUGFUNC("ixgbe_host_interface_command"); + DEBUGFUNC("ixgbe_hic_unlocked"); - if (length == 0 || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { + if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { DEBUGOUT1("Buffer length failure buffersize=%d.\n", length); return IXGBE_ERR_HOST_INTERFACE_COMMAND; } - /* Take management host interface semaphore */ - status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); - - if (status) - return status; /* Set bit 9 of FWSTS clearing FW reset indication */ fwsts = IXGBE_READ_REG(hw, IXGBE_FWSTS); @@ -4362,17 +4510,15 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, /* Check that the host interface is enabled. */ hicr = IXGBE_READ_REG(hw, IXGBE_HICR); - if ((hicr & IXGBE_HICR_EN) == 0) { + if (!(hicr & IXGBE_HICR_EN)) { DEBUGOUT("IXGBE_HOST_EN bit disabled.\n"); - status = IXGBE_ERR_HOST_INTERFACE_COMMAND; - goto rel_out; + return IXGBE_ERR_HOST_INTERFACE_COMMAND; } /* Calculate length in DWORDs. We must be DWORD aligned */ - if ((length % (sizeof(u32))) != 0) { + if (length % sizeof(u32)) { DEBUGOUT("Buffer length failure, not aligned to dword"); - status = IXGBE_ERR_INVALID_ARGUMENT; - goto rel_out; + return IXGBE_ERR_INVALID_ARGUMENT; } dword_len = length >> 2; @@ -4395,14 +4541,59 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, } /* Check command completion */ - if ((timeout != 0 && i == timeout) || + if ((timeout && i == timeout) || !(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV)) { ERROR_REPORT1(IXGBE_ERROR_CAUTION, "Command has failed with no status valid.\n"); - status = IXGBE_ERR_HOST_INTERFACE_COMMAND; - goto rel_out; + return IXGBE_ERR_HOST_INTERFACE_COMMAND; } + return IXGBE_SUCCESS; +} + +/** + * ixgbe_host_interface_command - Issue command to manageability block + * @hw: pointer to the HW structure + * @buffer: contains the command to write and where the return status will + * be placed + * @length: length of buffer, must be multiple of 4 bytes + * @timeout: time in ms to wait for command completion + * @return_data: read and return data from the buffer (true) or not (false) + * Needed because FW structures are big endian and decoding of + * these fields can be 8 bit or 16 bit based on command. Decoding + * is not easily understood without making a table of commands. + * So we will leave this up to the caller to read back the data + * in these cases. + * + * Communicates with the manageability block. On success return IXGBE_SUCCESS + * else returns semaphore error when encountering an error acquiring + * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + **/ +s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, + u32 length, u32 timeout, bool return_data) +{ + u32 hdr_size = sizeof(struct ixgbe_hic_hdr); + u16 dword_len; + u16 buf_len; + s32 status; + u32 bi; + + DEBUGFUNC("ixgbe_host_interface_command"); + + if (length == 0 || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { + DEBUGOUT1("Buffer length failure buffersize=%d.\n", length); + return IXGBE_ERR_HOST_INTERFACE_COMMAND; + } + + /* Take management host interface semaphore */ + status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); + if (status) + return status; + + status = ixgbe_hic_unlocked(hw, buffer, length, timeout); + if (status) + goto rel_out; + if (!return_data) goto rel_out; @@ -4417,7 +4608,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, /* If there is any thing in data position pull it in */ buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len; - if (buf_len == 0) + if (!buf_len) goto rel_out; if (length < buf_len + hdr_size) { @@ -4455,13 +4646,15 @@ rel_out: * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. **/ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, - u8 build, u8 sub) + u8 build, u8 sub, u16 len, + const char *driver_ver) { struct ixgbe_hic_drv_info fw_cmd; int i; s32 ret_val = IXGBE_SUCCESS; DEBUGFUNC("ixgbe_set_fw_drv_ver_generic"); + UNREFERENCED_2PARAMETER(len, driver_ver); fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN; @@ -4923,14 +5116,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, speedcnt++; highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL; - /* If we already have link at this speed, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, false); - if (status != IXGBE_SUCCESS) - return status; - - if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up) - goto out; - /* Set the module link speed */ switch (hw->phy.media_type) { case ixgbe_media_type_fiber: @@ -4981,14 +5166,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN) highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL; - /* If we already have link at this speed, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, false); - if (status != IXGBE_SUCCESS) - return status; - - if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up) - goto out; - /* Set the module link speed */ switch (hw->phy.media_type) { case ixgbe_media_type_fiber: diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h index 0545f85c..903f34d5 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h @@ -72,6 +72,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw); s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index); s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw); s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw); s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data); @@ -133,7 +134,7 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq); s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq); s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw); s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, - u32 vind, bool vlan_on, bool vlvf_bypass); + u32 vind, bool vlan_on, bool vlvf_bypass); s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, u32 *vfta_delta, u32 vfta, bool vlvf_bypass); @@ -155,11 +156,14 @@ void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom, int strategy); void ixgbe_enable_relaxed_ordering_gen2(struct ixgbe_hw *hw); s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, - u8 build, u8 ver); + u8 build, u8 ver, u16 len, const char *str); u8 ixgbe_calculate_checksum(u8 *buffer, u32 length); s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, u32 length, u32 timeout, bool return_data); - +s32 ixgbe_hic_unlocked(struct ixgbe_hw *, u32 *buffer, u32 length, u32 timeout); +s32 ixgbe_shutdown_fw_phy(struct ixgbe_hw *); +s32 ixgbe_fw_phy_activity(struct ixgbe_hw *, u16 activity, + u32 (*data)[FW_PHY_ACT_DATA_COUNT]); void ixgbe_clear_tx_pending(struct ixgbe_hw *hw); extern s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw); diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c new file mode 100644 index 00000000..47143a26 --- /dev/null +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c @@ -0,0 +1,240 @@ +/******************************************************************************* + +Copyright (c) 2001-2015, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ + +#include "ixgbe_vf.h" +#include "ixgbe_hv_vf.h" + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, ixgbe_mc_addr_itr next, + bool clear) +{ + UNREFERENCED_5PARAMETER(hw, mc_addr_list, mc_addr_count, next, clear); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) +{ + UNREFERENCED_2PARAMETER(hw, xcast_mode); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool vlvf_bypass) +{ + UNREFERENCED_5PARAMETER(hw, vlan, vind, vlan_on, vlvf_bypass); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +static s32 ixgbevf_hv_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr) +{ + UNREFERENCED_3PARAMETER(hw, index, addr); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_reset_hw_vf(struct ixgbe_hw *hw) +{ + UNREFERENCED_PARAMETER(hw); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vlan, u32 vind) +{ + UNREFERENCED_5PARAMETER(hw, index, addr, vlan, vind); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant; there is no mailbox communication. + */ +static s32 ixgbevf_hv_check_mac_link_vf(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *link_up, + bool autoneg_wait_to_complete) +{ + struct ixgbe_mbx_info *mbx = &hw->mbx; + struct ixgbe_mac_info *mac = &hw->mac; + u32 links_reg; + UNREFERENCED_1PARAMETER(autoneg_wait_to_complete); + + /* If we were hit with a reset drop the link */ + if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout) + mac->get_link_status = true; + + if (!mac->get_link_status) + goto out; + + /* if link status is down no point in checking to see if pf is up */ + links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + if (!(links_reg & IXGBE_LINKS_UP)) + goto out; + + /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs + * before the link status is correct + */ + if (mac->type == ixgbe_mac_82599_vf) { + int i; + + for (i = 0; i < 5; i++) { + DELAY(100); + links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + + if (!(links_reg & IXGBE_LINKS_UP)) + goto out; + } + } + + switch (links_reg & IXGBE_LINKS_SPEED_82599) { + case IXGBE_LINKS_SPEED_10G_82599: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + if (hw->mac.type >= ixgbe_mac_X550) { + if (links_reg & IXGBE_LINKS_SPEED_NON_STD) + *speed = IXGBE_LINK_SPEED_2_5GB_FULL; + } + break; + case IXGBE_LINKS_SPEED_1G_82599: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + case IXGBE_LINKS_SPEED_100_82599: + *speed = IXGBE_LINK_SPEED_100_FULL; + if (hw->mac.type == ixgbe_mac_X550) { + if (links_reg & IXGBE_LINKS_SPEED_NON_STD) + *speed = IXGBE_LINK_SPEED_5GB_FULL; + } + break; + case IXGBE_LINKS_SPEED_10_X550EM_A: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + /* Reserved for pre-x550 devices */ + if (hw->mac.type >= ixgbe_mac_X550) + *speed = IXGBE_LINK_SPEED_10_FULL; + break; + default: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + } + + /* if we passed all the tests above then the link is up and we no + * longer need to check for link + */ + mac->get_link_status = false; + +out: + *link_up = !mac->get_link_status; + return IXGBE_SUCCESS; +} + +/** + * ixgbevf_hv_set_rlpml_vf - Set the maximum receive packet length + * @hw: pointer to the HW structure + * @max_size: value to assign to max frame size + * Hyper-V variant. + **/ +static s32 ixgbevf_hv_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size) +{ + u32 reg; + + /* If we are on Hyper-V, we implement this functionality + * differently. + */ + reg = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(0)); + /* CRC == 4 */ + reg |= ((max_size + 4) | IXGBE_RXDCTL_RLPML_EN); + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(0), reg); + + return IXGBE_SUCCESS; +} + +/** + * ixgbevf_hv_negotiate_api_version_vf - Negotiate supported API version + * @hw: pointer to the HW structure + * @api: integer containing requested API version + * Hyper-V version - only ixgbe_mbox_api_10 supported. + **/ +static int ixgbevf_hv_negotiate_api_version_vf(struct ixgbe_hw *hw, int api) +{ + UNREFERENCED_1PARAMETER(hw); + + /* Hyper-V only supports api version ixgbe_mbox_api_10 */ + if (api != ixgbe_mbox_api_10) + return IXGBE_ERR_INVALID_ARGUMENT; + + return IXGBE_SUCCESS; +} + +/** + * ixgbevf_hv_init_ops_vf - Initialize the pointers for vf + * @hw: pointer to hardware structure + * + * This will assign function pointers, adapter-specific functions can + * override the assignment of generic function pointers by assigning + * their own adapter-specific function pointers. + * Does not touch the hardware. + **/ +s32 ixgbevf_hv_init_ops_vf(struct ixgbe_hw *hw) +{ + /* Set defaults for VF then override applicable Hyper-V + * specific functions + */ + ixgbe_init_ops_vf(hw); + + hw->mac.ops.reset_hw = ixgbevf_hv_reset_hw_vf; + hw->mac.ops.check_link = ixgbevf_hv_check_mac_link_vf; + hw->mac.ops.negotiate_api_version = ixgbevf_hv_negotiate_api_version_vf; + hw->mac.ops.set_rar = ixgbevf_hv_set_rar_vf; + hw->mac.ops.update_mc_addr_list = ixgbevf_hv_update_mc_addr_list_vf; + hw->mac.ops.update_xcast_mode = ixgbevf_hv_update_xcast_mode; + hw->mac.ops.set_uc_addr = ixgbevf_hv_set_uc_addr_vf; + hw->mac.ops.set_vfta = ixgbevf_hv_set_vfta_vf; + hw->mac.ops.set_rlpml = ixgbevf_hv_set_rlpml_vf; + + return IXGBE_SUCCESS; +} diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h new file mode 100644 index 00000000..9119f29f --- /dev/null +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h @@ -0,0 +1,41 @@ +/******************************************************************************* + +Copyright (c) 2001-2016, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ + +#ifndef _IXGBE_HV_VF_H_ +#define _IXGBE_HV_VF_H_ + +#include "ixgbe_type.h" + +s32 ixgbevf_hv_init_ops_vf(struct ixgbe_hw *hw); + +#endif /* _IXGBE_HV_VF_H_ */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h index d775142d..7556a818 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h @@ -90,6 +90,7 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */ ixgbe_mbox_api_11, /* API version 1.1, linux/freebsd VF driver */ ixgbe_mbox_api_12, /* API version 1.2, linux/freebsd VF driver */ + ixgbe_mbox_api_13, /* API version 1.3, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -109,9 +110,9 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_GET_QUEUES 0x09 /* get queue configuration */ /* mailbox API, version 1.2 VF requests */ -#define IXGBE_VF_GET_RETA 0x0a /* VF request for RETA */ -#define IXGBE_VF_GET_RSS_KEY 0x0b /* get RSS key */ -#define IXGBE_VF_UPDATE_XCAST_MODE 0x0C +#define IXGBE_VF_GET_RETA 0x0a /* VF request for RETA */ +#define IXGBE_VF_GET_RSS_KEY 0x0b /* get RSS key */ +#define IXGBE_VF_UPDATE_XCAST_MODE 0x0c /* GET_QUEUES return data indices within the mailbox */ #define IXGBE_VF_TX_QUEUES 1 /* number of Tx queues supported */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h index 06d1ee1c..4aab278d 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h @@ -44,6 +44,7 @@ #include #include #include +#include #include "../ixgbe_logs.h" #include "../ixgbe_bypass_defines.h" @@ -81,6 +82,7 @@ #define UNREFERENCED_2PARAMETER(_p, _q) #define UNREFERENCED_3PARAMETER(_p, _q, _r) #define UNREFERENCED_4PARAMETER(_p, _q, _r, _s) +#define UNREFERENCED_5PARAMETER(_p, _q, _r, _s, _t) /* Shared code error reporting */ enum { @@ -95,11 +97,13 @@ enum { #define STATIC static #define IXGBE_NTOHL(_i) rte_be_to_cpu_32(_i) #define IXGBE_NTOHS(_i) rte_be_to_cpu_16(_i) +#define IXGBE_CPU_TO_LE16(_i) rte_cpu_to_le_16(_i) #define IXGBE_CPU_TO_LE32(_i) rte_cpu_to_le_32(_i) -#define IXGBE_LE32_TO_CPU(_i) rte_le_to_cpu_32(_i) +#define IXGBE_LE32_TO_CPU(_i) rte_le_to_cpu_32(_i) #define IXGBE_LE32_TO_CPUS(_i) rte_le_to_cpu_32(_i) #define IXGBE_CPU_TO_BE16(_i) rte_cpu_to_be_16(_i) #define IXGBE_CPU_TO_BE32(_i) rte_cpu_to_be_32(_i) +#define IXGBE_BE32_TO_CPU(_i) rte_be_to_cpu_32(_i) typedef uint8_t u8; typedef int8_t s8; @@ -120,16 +124,18 @@ typedef int bool; #define prefetch(x) rte_prefetch0(x) -#define IXGBE_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define IXGBE_PCI_REG(reg) rte_read32(reg) static inline uint32_t ixgbe_read_addr(volatile void* addr) { return rte_le_to_cpu_32(IXGBE_PCI_REG(addr)); } -#define IXGBE_PCI_REG_WRITE(reg, value) do { \ - IXGBE_PCI_REG((reg)) = (rte_cpu_to_le_32(value)); \ -} while(0) +#define IXGBE_PCI_REG_WRITE(reg, value) \ + rte_write32((rte_cpu_to_le_32(value)), reg) + +#define IXGBE_PCI_REG_WRITE_RELAXED(reg, value) \ + rte_write32_relaxed((rte_cpu_to_le_32(value)), reg) #define IXGBE_PCI_REG_ADDR(hw, reg) \ ((volatile uint32_t *)((char *)(hw)->hw_addr + (reg))) diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c index ed1b14f3..c9538056 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c @@ -113,7 +113,7 @@ s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, u16 reg, u16 *val, bool lock) { u32 swfw_mask = hw->phy.phy_semaphore_mask; - int max_retry = 10; + int max_retry = 3; int retry = 0; u8 csum_byte; u8 high_bits; @@ -121,8 +121,6 @@ s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, u16 reg, u8 reg_high; u8 csum; - if (hw->mac.type >= ixgbe_mac_X550) - max_retry = 3; reg_high = ((reg >> 7) & 0xFE) | 1; /* Indicate read combined */ csum = ixgbe_ones_comp_byte_add(reg_high, reg & 0xFF); csum = ~csum; @@ -282,6 +280,42 @@ s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw) return IXGBE_SUCCESS; } +/** + * ixgbe_probe_phy - Probe a single address for a PHY + * @hw: pointer to hardware structure + * @phy_addr: PHY address to probe + * + * Returns true if PHY found + */ +static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr) +{ + u16 ext_ability = 0; + + if (!ixgbe_validate_phy_addr(hw, phy_addr)) { + DEBUGOUT1("Unable to validate PHY address 0x%04X\n", + phy_addr); + return false; + } + + if (ixgbe_get_phy_id(hw)) + return false; + + hw->phy.type = ixgbe_get_phy_type_from_id(hw->phy.id); + + if (hw->phy.type == ixgbe_phy_unknown) { + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); + if (ext_ability & + (IXGBE_MDIO_PHY_10GBASET_ABILITY | + IXGBE_MDIO_PHY_1000BASET_ABILITY)) + hw->phy.type = ixgbe_phy_cu_unknown; + else + hw->phy.type = ixgbe_phy_generic; + } + + return true; +} + /** * ixgbe_identify_phy_generic - Get physical layer module * @hw: pointer to hardware structure @@ -291,8 +325,7 @@ s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw) s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) { s32 status = IXGBE_ERR_PHY_ADDR_INVALID; - u32 phy_addr; - u16 ext_ability = 0; + u16 phy_addr; DEBUGFUNC("ixgbe_identify_phy_generic"); @@ -303,45 +336,33 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM; } - if (hw->phy.type == ixgbe_phy_unknown) { - for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { - if (ixgbe_validate_phy_addr(hw, phy_addr)) { - hw->phy.addr = phy_addr; - ixgbe_get_phy_id(hw); - hw->phy.type = - ixgbe_get_phy_type_from_id(hw->phy.id); - - if (hw->phy.type == ixgbe_phy_unknown) { - hw->phy.ops.read_reg(hw, - IXGBE_MDIO_PHY_EXT_ABILITY, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, - &ext_ability); - if (ext_ability & - (IXGBE_MDIO_PHY_10GBASET_ABILITY | - IXGBE_MDIO_PHY_1000BASET_ABILITY)) - hw->phy.type = - ixgbe_phy_cu_unknown; - else - hw->phy.type = - ixgbe_phy_generic; - } + if (hw->phy.type != ixgbe_phy_unknown) + return IXGBE_SUCCESS; - status = IXGBE_SUCCESS; - break; - } - } + if (hw->phy.nw_mng_if_sel) { + phy_addr = (hw->phy.nw_mng_if_sel & + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; + if (ixgbe_probe_phy(hw, phy_addr)) + return IXGBE_SUCCESS; + else + return IXGBE_ERR_PHY_ADDR_INVALID; + } - /* Certain media types do not have a phy so an address will not - * be found and the code will take this path. Caller has to - * decide if it is an error or not. - */ - if (status != IXGBE_SUCCESS) { - hw->phy.addr = 0; + for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { + if (ixgbe_probe_phy(hw, phy_addr)) { + status = IXGBE_SUCCESS; + break; } - } else { - status = IXGBE_SUCCESS; } + /* Certain media types do not have a phy so an address will not + * be found and the code will take this path. Caller has to + * decide if it is an error or not. + */ + if (status != IXGBE_SUCCESS) + hw->phy.addr = 0; + return status; } @@ -393,6 +414,8 @@ bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr) if (phy_id != 0xFFFF && phy_id != 0x0) valid = true; + DEBUGOUT1("PHY ID HIGH is 0x%04X\n", phy_id); + return valid; } @@ -421,6 +444,9 @@ s32 ixgbe_get_phy_id(struct ixgbe_hw *hw) hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK); hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK); } + DEBUGOUT2("PHY_ID_HIGH 0x%04X, PHY_ID_LOW 0x%04X\n", + phy_id_high, phy_id_low); + return status; } @@ -439,7 +465,6 @@ enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) case TN1010_PHY_ID: phy_type = ixgbe_phy_tn; break; - case X550_PHY_ID1: case X550_PHY_ID2: case X550_PHY_ID3: case X540_PHY_ID: @@ -452,10 +477,12 @@ enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) phy_type = ixgbe_phy_nl; break; case X557_PHY_ID: + case X557_PHY_ID2: phy_type = ixgbe_phy_x550em_ext_t; break; case IXGBE_M88E1500_E_PHY_ID: - phy_type = ixgbe_phy_m88; + case IXGBE_M88E1543_E_PHY_ID: + phy_type = ixgbe_phy_ext_1g_t; break; default: phy_type = ixgbe_phy_unknown; @@ -506,11 +533,30 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) */ for (i = 0; i < 30; i++) { msec_delay(100); - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, - IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl); - if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) { - usec_delay(2); - break; + if (hw->phy.type == ixgbe_phy_x550em_ext_t) { + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_TX_VENDOR_ALARMS_3, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &ctrl); + if (status != IXGBE_SUCCESS) + return status; + + if (ctrl & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) { + usec_delay(2); + break; + } + } else { + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, + &ctrl); + if (status != IXGBE_SUCCESS) + return status; + + if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) { + usec_delay(2); + break; + } } } @@ -532,7 +578,7 @@ out: * @phy_data: Pointer to read data from PHY register **/ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, - u16 *phy_data) + u16 *phy_data) { u32 i, data, command; @@ -554,12 +600,13 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, command = IXGBE_READ_REG(hw, IXGBE_MSCA); if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) - break; + break; } if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { ERROR_REPORT1(IXGBE_ERROR_POLLING, "PHY address command did not complete.\n"); + DEBUGOUT("PHY address command did not complete, returning IXGBE_ERR_PHY\n"); return IXGBE_ERR_PHY; } @@ -589,6 +636,7 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { ERROR_REPORT1(IXGBE_ERROR_POLLING, "PHY read command didn't complete\n"); + DEBUGOUT("PHY read command didn't complete, returning IXGBE_ERR_PHY\n"); return IXGBE_ERR_PHY; } @@ -719,7 +767,7 @@ s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, DEBUGFUNC("ixgbe_write_phy_reg_generic"); if (hw->mac.ops.acquire_swfw_sync(hw, gssr) == IXGBE_SUCCESS) { - status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type, + status = hw->phy.ops.write_reg_mdi(hw, reg_addr, device_type, phy_data); hw->mac.ops.release_swfw_sync(hw, gssr); } else { @@ -746,91 +794,63 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); - if (speed & IXGBE_LINK_SPEED_10GB_FULL) { - /* Set or unset auto-negotiation 10G advertisement */ - hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); + /* Set or unset auto-negotiation 10G advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); - autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) - autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; + autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) && + (speed & IXGBE_LINK_SPEED_10GB_FULL)) + autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; - hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); - if (hw->mac.type == ixgbe_mac_X550) { - if (speed & IXGBE_LINK_SPEED_5GB_FULL) { - /* Set or unset auto-negotiation 5G advertisement */ - hw->phy.ops.read_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & - IXGBE_LINK_SPEED_5GB_FULL) - autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); - if (speed & IXGBE_LINK_SPEED_2_5GB_FULL) { - /* Set or unset auto-negotiation 2.5G advertisement */ - hw->phy.ops.read_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & - IXGBE_LINK_SPEED_2_5GB_FULL) - autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + if (hw->mac.type == ixgbe_mac_X550) { + /* Set or unset auto-negotiation 5G advertisement */ + autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) && + (speed & IXGBE_LINK_SPEED_5GB_FULL)) + autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE; + + /* Set or unset auto-negotiation 2.5G advertisement */ + autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_2_5GB_FULL) && + (speed & IXGBE_LINK_SPEED_2_5GB_FULL)) + autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE; } - if (speed & IXGBE_LINK_SPEED_1GB_FULL) { - /* Set or unset auto-negotiation 1G advertisement */ - hw->phy.ops.read_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); + /* Set or unset auto-negotiation 1G advertisement */ + autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) && + (speed & IXGBE_LINK_SPEED_1GB_FULL)) + autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; - autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) - autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); - hw->phy.ops.write_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + /* Set or unset auto-negotiation 100M advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); - if (speed & IXGBE_LINK_SPEED_100_FULL) { - /* Set or unset auto-negotiation 100M advertisement */ - hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); + autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE | + IXGBE_MII_100BASE_T_ADVERTISE_HALF); + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) && + (speed & IXGBE_LINK_SPEED_100_FULL)) + autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; - autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE | - IXGBE_MII_100BASE_T_ADVERTISE_HALF); - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) - autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); /* Blocked by MNG FW so don't reset PHY */ if (ixgbe_check_reset_blocked(hw)) @@ -882,6 +902,9 @@ s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, if (speed & IXGBE_LINK_SPEED_100_FULL) hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL; + if (speed & IXGBE_LINK_SPEED_10_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10_FULL; + /* Setup link based on the new speed settings */ ixgbe_setup_phy_link(hw); @@ -919,6 +942,7 @@ static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw) hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL; break; case ixgbe_mac_X550EM_x: + case ixgbe_mac_X550EM_a: hw->phy.speeds_supported &= ~IXGBE_LINK_SPEED_100_FULL; break; default: diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h index 281f9faf..820d4712 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h @@ -92,8 +92,9 @@ POSSIBILITY OF SUCH DAMAGE. #define IXGBE_CS4227_GLOBAL_ID_MSB 1 #define IXGBE_CS4227_SCRATCH 2 #define IXGBE_CS4227_GLOBAL_ID_VALUE 0x03E5 -#define IXGBE_CS4223_PHY_ID 0x7003/* Quad port */ -#define IXGBE_CS4227_PHY_ID 0x3003/* Dual port */ +#define IXGBE_CS4227_EFUSE_PDF_SKU 0x19F +#define IXGBE_CS4223_SKU_ID 0x0010 /* Quad port */ +#define IXGBE_CS4227_SKU_ID 0x0014 /* Dual port */ #define IXGBE_CS4227_RESET_PENDING 0x1357 #define IXGBE_CS4227_RESET_COMPLETE 0x5AA5 #define IXGBE_CS4227_RETRIES 15 diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h index 83818a96..bb1f85b0 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h @@ -105,11 +105,11 @@ POSSIBILITY OF SUCH DAMAGE. #define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 #define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 #define IXGBE_SUBDEV_ID_82599_SP_560FLR 0x211B -#define IXGBE_SUBDEV_ID_82599_LOM_SFP 0x8976 #define IXGBE_SUBDEV_ID_82599_LOM_SNAP6 0x2159 #define IXGBE_SUBDEV_ID_82599_SFP_1OCP 0x000D #define IXGBE_SUBDEV_ID_82599_SFP_2OCP 0x0008 -#define IXGBE_SUBDEV_ID_82599_SFP_LOM 0x06EE +#define IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM1 0x8976 +#define IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM2 0x06EE #define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A #define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 #define IXGBE_DEV_ID_82599_SFP_EM 0x1507 @@ -146,6 +146,7 @@ POSSIBILITY OF SUCH DAMAGE. #define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC #define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD #define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE +#define IXGBE_DEV_ID_X550EM_X_XFI 0x15B0 #define IXGBE_DEV_ID_X550_VF_HV 0x1564 #define IXGBE_DEV_ID_X550_VF 0x1565 #define IXGBE_DEV_ID_X550EM_A_VF 0x15C5 @@ -565,6 +566,13 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_PROXYFC 0x05F64 /* Proxying Filter Control Register */ #define IXGBE_VXLANCTRL 0x0000507C /* Rx filter VXLAN UDPPORT Register */ +/* masks for accessing VXLAN and GENEVE UDP ports */ +#define IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK 0x0000ffff /* VXLAN port */ +#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK 0xffff0000 /* GENEVE port */ +#define IXGBE_VXLANCTRL_ALL_UDPPORT_MASK 0xffffffff /* GENEVE/VXLAN */ + +#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT 16 + #define IXGBE_FHFT(_n) (0x09000 + ((_n) * 0x100)) /* Flex host filter table */ /* Ext Flexible Host Filter Table */ #define IXGBE_FHFT_EXT(_n) (0x09800 + ((_n) * 0x100)) @@ -1038,7 +1046,7 @@ struct ixgbe_dmac_config { #define IXGBE_FTFT 0x09400 /* 0x9400-0x97FC */ #define IXGBE_METF(_i) (0x05190 + ((_i) * 4)) /* 4 of these (0-3) */ #define IXGBE_MDEF_EXT(_i) (0x05160 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_LSWFW 0x15014 +#define IXGBE_LSWFW 0x15F14 #define IXGBE_BMCIP(_i) (0x05050 + ((_i) * 4)) /* 0x5050-0x505C */ #define IXGBE_BMCIPVAL 0x05060 #define IXGBE_BMCIP_IPADDR_TYPE 0x00000001 @@ -1640,17 +1648,17 @@ struct ixgbe_dmac_config { #define TN1010_PHY_ID 0x00A19410 #define TNX_FW_REV 0xB #define X540_PHY_ID 0x01540200 -#define X550_PHY_ID1 0x01540220 #define X550_PHY_ID2 0x01540223 #define X550_PHY_ID3 0x01540221 #define X557_PHY_ID 0x01540240 +#define X557_PHY_ID2 0x01540250 #define AQ_FW_REV 0x20 #define QT2022_PHY_ID 0x0043A400 #define ATH_PHY_ID 0x03429050 /* PHY Types */ -#define IXGBE_M88E1500_E_PHY_ID 0x01410DD0 -#define IXGBE_M88E1543_E_PHY_ID 0x01410EA0 +#define IXGBE_M88E1500_E_PHY_ID 0x01410DD0 +#define IXGBE_M88E1543_E_PHY_ID 0x01410EA0 /* Special PHY Init Routine */ #define IXGBE_PHY_INIT_OFFSET_NL 0x002B @@ -1765,6 +1773,8 @@ enum { #define IXGBE_VT_CTL_POOL_MASK (0x3F << IXGBE_VT_CTL_POOL_SHIFT) /* VMOLR bitmasks */ +#define IXGBE_VMOLR_UPE 0x00400000 /* unicast promiscuous */ +#define IXGBE_VMOLR_VPE 0x00800000 /* VLAN promiscuous */ #define IXGBE_VMOLR_AUPE 0x01000000 /* accept untagged packets */ #define IXGBE_VMOLR_ROMPE 0x02000000 /* accept packets in MTA tbl */ #define IXGBE_VMOLR_ROPE 0x04000000 /* accept packets in UC tbl */ @@ -2203,6 +2213,7 @@ enum { #define IXGBE_LINKS_SPEED_10G_82599 0x30000000 #define IXGBE_LINKS_SPEED_1G_82599 0x20000000 #define IXGBE_LINKS_SPEED_100_82599 0x10000000 +#define IXGBE_LINKS_SPEED_10_X550EM_A 0x00000000 #define IXGBE_LINK_UP_TIME 90 /* 9.0 Seconds */ #define IXGBE_AUTO_NEG_TIME 45 /* 4.5 Seconds */ @@ -2335,7 +2346,9 @@ enum { #define IXGBE_SAN_MAC_ADDR_PTR 0x28 #define IXGBE_DEVICE_CAPS 0x2C -#define IXGBE_SERIAL_NUMBER_MAC_ADDR 0x11 +#define IXGBE_82599_SERIAL_NUMBER_MAC_ADDR 0x11 +#define IXGBE_X550_SERIAL_NUMBER_MAC_ADDR 0x04 + #define IXGBE_PCIE_MSIX_82599_CAPS 0x72 #define IXGBE_MAX_MSIX_VECTORS_82599 0x40 #define IXGBE_PCIE_MSIX_82598_CAPS 0x62 @@ -2780,6 +2793,7 @@ enum { #define IXGBE_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */ #define IXGBE_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */ #define IXGBE_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */ +#define IXGBE_RXDADV_PKTTYPE_GENEVE 0x00000800 /* GENEVE hdr present */ #define IXGBE_RXDADV_PKTTYPE_VXLAN 0x00000800 /* VXLAN hdr present */ #define IXGBE_RXDADV_PKTTYPE_TUNNEL 0x00010000 /* Tunnel type */ #define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */ @@ -3023,6 +3037,7 @@ enum ixgbe_fdir_pballoc_type { #define FW_CEM_UNUSED_VER 0x0 #define FW_CEM_MAX_RETRIES 3 #define FW_CEM_RESP_STATUS_SUCCESS 0x1 +#define FW_CEM_DRIVER_VERSION_SIZE 39 /* +9 would send 48 bytes to fw */ #define FW_READ_SHADOW_RAM_CMD 0x31 #define FW_READ_SHADOW_RAM_LEN 0x6 #define FW_WRITE_SHADOW_RAM_CMD 0x33 @@ -3048,13 +3063,66 @@ enum ixgbe_fdir_pballoc_type { #define FW_INT_PHY_REQ_LEN 10 #define FW_INT_PHY_REQ_READ 0 #define FW_INT_PHY_REQ_WRITE 1 +#define FW_PHY_ACT_REQ_CMD 5 +#define FW_PHY_ACT_DATA_COUNT 4 +#define FW_PHY_ACT_REQ_LEN (4 + 4 * FW_PHY_ACT_DATA_COUNT) +#define FW_PHY_ACT_INIT_PHY 1 +#define FW_PHY_ACT_SETUP_LINK 2 +#define FW_PHY_ACT_LINK_SPEED_10 (1u << 0) +#define FW_PHY_ACT_LINK_SPEED_100 (1u << 1) +#define FW_PHY_ACT_LINK_SPEED_1G (1u << 2) +#define FW_PHY_ACT_LINK_SPEED_2_5G (1u << 3) +#define FW_PHY_ACT_LINK_SPEED_5G (1u << 4) +#define FW_PHY_ACT_LINK_SPEED_10G (1u << 5) +#define FW_PHY_ACT_LINK_SPEED_20G (1u << 6) +#define FW_PHY_ACT_LINK_SPEED_25G (1u << 7) +#define FW_PHY_ACT_LINK_SPEED_40G (1u << 8) +#define FW_PHY_ACT_LINK_SPEED_50G (1u << 9) +#define FW_PHY_ACT_LINK_SPEED_100G (1u << 10) +#define FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT 16 +#define FW_PHY_ACT_SETUP_LINK_PAUSE_MASK (3u << \ + FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT) +#define FW_PHY_ACT_SETUP_LINK_PAUSE_NONE 0u +#define FW_PHY_ACT_SETUP_LINK_PAUSE_TX 1u +#define FW_PHY_ACT_SETUP_LINK_PAUSE_RX 2u +#define FW_PHY_ACT_SETUP_LINK_PAUSE_RXTX 3u +#define FW_PHY_ACT_SETUP_LINK_LP (1u << 18) +#define FW_PHY_ACT_SETUP_LINK_HP (1u << 19) +#define FW_PHY_ACT_SETUP_LINK_EEE (1u << 20) +#define FW_PHY_ACT_SETUP_LINK_AN (1u << 22) +#define FW_PHY_ACT_SETUP_LINK_RSP_DOWN (1u << 0) +#define FW_PHY_ACT_GET_LINK_INFO 3 +#define FW_PHY_ACT_GET_LINK_INFO_EEE (1u << 19) +#define FW_PHY_ACT_GET_LINK_INFO_FC_TX (1u << 20) +#define FW_PHY_ACT_GET_LINK_INFO_FC_RX (1u << 21) +#define FW_PHY_ACT_GET_LINK_INFO_POWER (1u << 22) +#define FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE (1u << 24) +#define FW_PHY_ACT_GET_LINK_INFO_TEMP (1u << 25) +#define FW_PHY_ACT_GET_LINK_INFO_LP_FC_TX (1u << 28) +#define FW_PHY_ACT_GET_LINK_INFO_LP_FC_RX (1u << 29) +#define FW_PHY_ACT_FORCE_LINK_DOWN 4 +#define FW_PHY_ACT_FORCE_LINK_DOWN_OFF (1u << 0) +#define FW_PHY_ACT_PHY_SW_RESET 5 +#define FW_PHY_ACT_PHY_HW_RESET 6 +#define FW_PHY_ACT_GET_PHY_INFO 7 +#define FW_PHY_ACT_UD_2 0x1002 +#define FW_PHY_ACT_UD_2_10G_KR_EEE (1u << 6) +#define FW_PHY_ACT_UD_2_10G_KX4_EEE (1u << 5) +#define FW_PHY_ACT_UD_2_1G_KX_EEE (1u << 4) +#define FW_PHY_ACT_UD_2_10G_T_EEE (1u << 3) +#define FW_PHY_ACT_UD_2_1G_T_EEE (1u << 2) +#define FW_PHY_ACT_UD_2_100M_TX_EEE (1u << 1) +#define FW_PHY_ACT_RETRIES 50 +#define FW_PHY_INFO_SPEED_MASK 0xFFFu +#define FW_PHY_INFO_ID_HI_MASK 0xFFFF0000u +#define FW_PHY_INFO_ID_LO_MASK 0x0000FFFFu /* Host Interface Command Structures */ #ifdef C99 #pragma pack(push, 1) #else -#pragma pack(1) +#pragma pack (1) #endif /* C99 */ struct ixgbe_hic_hdr { @@ -3097,6 +3165,16 @@ struct ixgbe_hic_drv_info { u16 pad2; /* end spacing to ensure length is mult. of dword2 */ }; +struct ixgbe_hic_drv_info2 { + struct ixgbe_hic_hdr hdr; + u8 port_num; + u8 ver_sub; + u8 ver_build; + u8 ver_min; + u8 ver_maj; + char driver_string[FW_CEM_DRIVER_VERSION_SIZE]; +}; + /* These need to be dword aligned */ struct ixgbe_hic_read_shadow_ram { union ixgbe_hic_hdr2 hdr; @@ -3136,13 +3214,26 @@ struct ixgbe_hic_internal_phy_req { u8 command_type; __be16 address; u16 rsv1; - __le32 write_data; + __be32 write_data; u16 pad; }; struct ixgbe_hic_internal_phy_resp { struct ixgbe_hic_hdr hdr; - __le32 read_data; + __be32 read_data; +}; + +struct ixgbe_hic_phy_activity_req { + struct ixgbe_hic_hdr hdr; + u8 port_number; + u8 pad; + __le16 activity_id; + __be32 data[FW_PHY_ACT_DATA_COUNT]; +}; + +struct ixgbe_hic_phy_activity_resp { + struct ixgbe_hic_hdr hdr; + __be32 data[FW_PHY_ACT_DATA_COUNT]; }; #ifdef C99 @@ -3305,7 +3396,7 @@ typedef u32 ixgbe_autoneg_advertised; /* Link speed */ typedef u32 ixgbe_link_speed; #define IXGBE_LINK_SPEED_UNKNOWN 0 -#define IXGBE_LINK_SPEED_10_FULL 0x0004 +#define IXGBE_LINK_SPEED_10_FULL 0x0002 #define IXGBE_LINK_SPEED_100_FULL 0x0008 #define IXGBE_LINK_SPEED_1GB_FULL 0x0020 #define IXGBE_LINK_SPEED_2_5GB_FULL 0x0400 @@ -3335,6 +3426,7 @@ typedef u32 ixgbe_physical_layer; #define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000 #define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000 #define IXGBE_PHYSICAL_LAYER_1000BASE_SX 0x4000 +#define IXGBE_PHYSICAL_LAYER_10BASE_T 0x8000 /* Flow Control Data Sheet defined values * Calculation and defines taken from 802.1bb Annex O @@ -3553,7 +3645,9 @@ enum ixgbe_phy_type { ixgbe_phy_aq, ixgbe_phy_x550em_kr, ixgbe_phy_x550em_kx4, + ixgbe_phy_x550em_xfi, ixgbe_phy_x550em_ext_t, + ixgbe_phy_ext_1g_t, ixgbe_phy_cu_unknown, ixgbe_phy_qt, ixgbe_phy_xaui, @@ -3572,7 +3666,7 @@ enum ixgbe_phy_type { ixgbe_phy_qsfp_unknown, ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/ ixgbe_phy_sgmii, - ixgbe_phy_m88, + ixgbe_phy_fw, ixgbe_phy_generic }; @@ -3629,14 +3723,6 @@ enum ixgbe_fc_mode { ixgbe_fc_default }; -/* Master/slave control */ -enum ixgbe_ms_type { - ixgbe_ms_hw_default = 0, - ixgbe_ms_force_master, - ixgbe_ms_force_slave, - ixgbe_ms_auto -}; - /* Smart Speed Settings */ #define IXGBE_SMARTSPEED_MAX_RETRIES 3 enum ixgbe_smart_speed { @@ -3840,6 +3926,7 @@ struct ixgbe_mac_operations { void (*init_swfw_sync)(struct ixgbe_hw *); s32 (*prot_autoc_read)(struct ixgbe_hw *, bool *, u32 *); s32 (*prot_autoc_write)(struct ixgbe_hw *, u32, bool); + s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api); /* Link */ void (*disable_tx_laser)(struct ixgbe_hw *); @@ -3860,6 +3947,7 @@ struct ixgbe_mac_operations { s32 (*led_off)(struct ixgbe_hw *, u32); s32 (*blink_led_start)(struct ixgbe_hw *, u32); s32 (*blink_led_stop)(struct ixgbe_hw *, u32); + s32 (*init_led_link_act)(struct ixgbe_hw *); /* RAR, Multicast, VLAN */ s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32); @@ -3883,6 +3971,8 @@ struct ixgbe_mac_operations { s32 (*init_uta_tables)(struct ixgbe_hw *); void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int); void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int); + s32 (*update_xcast_mode)(struct ixgbe_hw *, int); + s32 (*set_rlpml)(struct ixgbe_hw *, u16); /* Flow Control */ s32 (*fc_enable)(struct ixgbe_hw *); @@ -3890,7 +3980,8 @@ struct ixgbe_mac_operations { void (*fc_autoneg)(struct ixgbe_hw *); /* Manageability interface */ - s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8); + s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8, u16, + const char *); s32 (*get_thermal_sensor_data)(struct ixgbe_hw *); s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw); void (*get_rtrup2tc)(struct ixgbe_hw *hw, u8 *map); @@ -4000,6 +4091,7 @@ struct ixgbe_mac_info { struct ixgbe_dmac_config dmac_config; bool set_lben; u32 max_link_up_time; + u8 led_link_act; }; struct ixgbe_phy_info { @@ -4015,8 +4107,8 @@ struct ixgbe_phy_info { bool reset_disable; ixgbe_autoneg_advertised autoneg_advertised; ixgbe_link_speed speeds_supported; - enum ixgbe_ms_type ms_type; - enum ixgbe_ms_type original_ms_type; + ixgbe_link_speed eee_speeds_supported; + ixgbe_link_speed eee_speeds_advertised; enum ixgbe_smart_speed smart_speed; bool smart_speed_active; bool multispeed_fiber; @@ -4078,6 +4170,7 @@ struct ixgbe_hw { bool force_full_reset; bool allow_unsupported_sfp; bool wol_enabled; + bool need_crosstalk_fix; }; #define ixgbe_call_func(hw, func, params, error) \ @@ -4136,16 +4229,35 @@ struct ixgbe_hw { #define IXGBE_KRM_LINK_S1(P) ((P) ? 0x8200 : 0x4200) #define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C) #define IXGBE_KRM_AN_CNTL_1(P) ((P) ? 0x822C : 0x422C) +#define IXGBE_KRM_AN_CNTL_4(P) ((P) ? 0x8238 : 0x4238) #define IXGBE_KRM_AN_CNTL_8(P) ((P) ? 0x8248 : 0x4248) +#define IXGBE_KRM_PCS_KX_AN(P) ((P) ? 0x9918 : 0x5918) +#define IXGBE_KRM_PCS_KX_AN_LP(P) ((P) ? 0x991C : 0x591C) #define IXGBE_KRM_SGMII_CTRL(P) ((P) ? 0x82A0 : 0x42A0) #define IXGBE_KRM_LP_BASE_PAGE_HIGH(P) ((P) ? 0x836C : 0x436C) #define IXGBE_KRM_DSP_TXFFE_STATE_4(P) ((P) ? 0x8634 : 0x4634) #define IXGBE_KRM_DSP_TXFFE_STATE_5(P) ((P) ? 0x8638 : 0x4638) #define IXGBE_KRM_RX_TRN_LINKUP_CTRL(P) ((P) ? 0x8B00 : 0x4B00) #define IXGBE_KRM_PMD_DFX_BURNIN(P) ((P) ? 0x8E00 : 0x4E00) +#define IXGBE_KRM_PMD_FLX_MASK_ST20(P) ((P) ? 0x9054 : 0x5054) #define IXGBE_KRM_TX_COEFF_CTRL_1(P) ((P) ? 0x9520 : 0x5520) #define IXGBE_KRM_RX_ANA_CTL(P) ((P) ? 0x9A00 : 0x5A00) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA ~(0x3 << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR (1u << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_LR (0x2 << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN (1u << 25) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN (1u << 26) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN (1u << 27) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10M ~(0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_100M (1u << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G (0x2 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G (0x3 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN (0x4 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_2_5G (0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK (0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART (1u << 31) + #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B (1 << 9) #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS (1 << 11) @@ -4166,9 +4278,14 @@ struct ixgbe_hw { #define IXGBE_KRM_AN_CNTL_1_SYM_PAUSE (1 << 28) #define IXGBE_KRM_AN_CNTL_1_ASM_PAUSE (1 << 29) - +#define IXGBE_KRM_PCS_KX_AN_SYM_PAUSE (1 << 1) +#define IXGBE_KRM_PCS_KX_AN_ASM_PAUSE (1 << 2) +#define IXGBE_KRM_PCS_KX_AN_LP_SYM_PAUSE (1 << 2) +#define IXGBE_KRM_PCS_KX_AN_LP_ASM_PAUSE (1 << 3) +#define IXGBE_KRM_AN_CNTL_4_ECSR_AN37_OVER_73 (1 << 29) #define IXGBE_KRM_AN_CNTL_8_LINEAR (1 << 0) #define IXGBE_KRM_AN_CNTL_8_LIMITING (1 << 1) + #define IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE (1 << 10) #define IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE (1 << 11) @@ -4207,11 +4324,18 @@ struct ixgbe_hw { #define IXGBE_SB_IOSF_TARGET_KR_PHY 0 #define IXGBE_NW_MNG_IF_SEL 0x00011178 -#define IXGBE_NW_MNG_IF_SEL_MDIO_ACT (1 << 1) -#define IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M (1 << 23) -#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE (1 << 24) +#define IXGBE_NW_MNG_IF_SEL_MDIO_ACT (1u << 1) +#define IXGBE_NW_MNG_IF_SEL_MDIO_IF_MODE (1u << 2) +#define IXGBE_NW_MNG_IF_SEL_EN_SHARED_MDIO (1u << 13) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10M (1u << 17) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_100M (1u << 18) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_1G (1u << 19) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G (1u << 20) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10G (1u << 21) +#define IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE (1u << 25) +#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE (1 << 24) /* X552 reg field only */ #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT 3 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD \ - (0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT) + (0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT) #endif /* _IXGBE_TYPE_H_ */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c index a75074a5..8775ee51 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c @@ -64,6 +64,7 @@ s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw) hw->mac.ops.get_mac_addr = ixgbe_get_mac_addr_vf; hw->mac.ops.stop_adapter = ixgbe_stop_adapter_vf; hw->mac.ops.get_bus_info = NULL; + hw->mac.ops.negotiate_api_version = ixgbevf_negotiate_api_version; /* Link */ hw->mac.ops.setup_link = ixgbe_setup_mac_link_vf; @@ -75,10 +76,12 @@ s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw) hw->mac.ops.set_uc_addr = ixgbevf_set_uc_addr_vf; hw->mac.ops.init_rx_addrs = NULL; hw->mac.ops.update_mc_addr_list = ixgbe_update_mc_addr_list_vf; + hw->mac.ops.update_xcast_mode = ixgbevf_update_xcast_mode; hw->mac.ops.enable_mc = NULL; hw->mac.ops.disable_mc = NULL; hw->mac.ops.clear_vfta = NULL; hw->mac.ops.set_vfta = ixgbe_set_vfta_vf; + hw->mac.ops.set_rlpml = ixgbevf_rlpml_set_vf; hw->mac.max_tx_queues = 1; hw->mac.max_rx_queues = 1; @@ -322,15 +325,16 @@ STATIC s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr) return vector; } -STATIC void ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw, - u32 *msg, u16 size) +STATIC s32 ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw, u32 *msg, + u32 *retmsg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - u32 retmsg[IXGBE_VFMAILBOX_SIZE]; s32 retval = mbx->ops.write_posted(hw, msg, size, 0); - if (!retval) - mbx->ops.read_posted(hw, retmsg, size, 0); + if (retval) + return retval; + + return mbx->ops.read_posted(hw, retmsg, size, 0); } /** @@ -344,7 +348,6 @@ STATIC void ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw, s32 ixgbe_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, u32 enable_addr) { - struct ixgbe_mbx_info *mbx = &hw->mbx; u32 msgbuf[3]; u8 *msg_addr = (u8 *)(&msgbuf[1]); s32 ret_val; @@ -353,10 +356,7 @@ s32 ixgbe_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, memset(msgbuf, 0, 12); msgbuf[0] = IXGBE_VF_SET_MAC_ADDR; memcpy(msg_addr, addr, 6); - ret_val = mbx->ops.write_posted(hw, msgbuf, 3, 0); - - if (!ret_val) - ret_val = mbx->ops.read_posted(hw, msgbuf, 3, 0); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3); msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -418,6 +418,39 @@ s32 ixgbe_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list, return mbx->ops.write_posted(hw, msgbuf, IXGBE_VFMAILBOX_SIZE, 0); } +/** + * ixgbevf_update_xcast_mode - Update Multicast mode + * @hw: pointer to the HW structure + * @xcast_mode: new multicast mode + * + * Updates the Multicast Mode of VF. + **/ +s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) +{ + u32 msgbuf[2]; + s32 err; + + switch (hw->api_version) { + case ixgbe_mbox_api_12: + case ixgbe_mbox_api_13: + break; + default: + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; + } + + msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE; + msgbuf[1] = xcast_mode; + + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + if (err) + return err; + + msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; + if (msgbuf[0] == (IXGBE_VF_UPDATE_XCAST_MODE | IXGBE_VT_MSGTYPE_NACK)) + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; + return IXGBE_SUCCESS; +} + /** * ixgbe_set_vfta_vf - Set/Unset vlan filter table address * @hw: pointer to the HW structure @@ -425,11 +458,12 @@ s32 ixgbe_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list, * @vind: unused by VF drivers * @vlan_on: if true then set bit, else clear bit * @vlvf_bypass: boolean flag indicating updating default pool is okay + * + * Turn on/off specified VLAN in the VLAN filter table. **/ s32 ixgbe_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, bool vlvf_bypass) { - struct ixgbe_mbx_info *mbx = &hw->mbx; u32 msgbuf[2]; s32 ret_val; UNREFERENCED_2PARAMETER(vind, vlvf_bypass); @@ -439,10 +473,7 @@ s32 ixgbe_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */ msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT; - ret_val = mbx->ops.write_posted(hw, msgbuf, 2, 0); - if (!ret_val) - ret_val = mbx->ops.read_posted(hw, msgbuf, 1, 0); - + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); if (!ret_val && (msgbuf[0] & IXGBE_VT_MSGTYPE_ACK)) return IXGBE_SUCCESS; @@ -489,8 +520,7 @@ s32 ixgbe_get_mac_addr_vf(struct ixgbe_hw *hw, u8 *mac_addr) s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr) { - struct ixgbe_mbx_info *mbx = &hw->mbx; - u32 msgbuf[3]; + u32 msgbuf[3], msgbuf_chk; u8 *msg_addr = (u8 *)(&msgbuf[1]); s32 ret_val; @@ -503,18 +533,17 @@ s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr) */ msgbuf[0] |= index << IXGBE_VT_MSGINFO_SHIFT; msgbuf[0] |= IXGBE_VF_SET_MACVLAN; + msgbuf_chk = msgbuf[0]; if (addr) memcpy(msg_addr, addr, 6); - ret_val = mbx->ops.write_posted(hw, msgbuf, 3, 0); - if (!ret_val) - ret_val = mbx->ops.read_posted(hw, msgbuf, 3, 0); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3); + if (!ret_val) { + msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; - msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; - - if (!ret_val) - if (msgbuf[0] == (IXGBE_VF_SET_MACVLAN | IXGBE_VT_MSGTYPE_NACK)) - ret_val = IXGBE_ERR_OUT_OF_MEM; + if (msgbuf[0] == (msgbuf_chk | IXGBE_VT_MSGTYPE_NACK)) + return IXGBE_ERR_OUT_OF_MEM; + } return ret_val; } @@ -584,13 +613,29 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw, ixgbe_link_speed *speed, switch (links_reg & IXGBE_LINKS_SPEED_82599) { case IXGBE_LINKS_SPEED_10G_82599: *speed = IXGBE_LINK_SPEED_10GB_FULL; + if (hw->mac.type >= ixgbe_mac_X550) { + if (links_reg & IXGBE_LINKS_SPEED_NON_STD) + *speed = IXGBE_LINK_SPEED_2_5GB_FULL; + } break; case IXGBE_LINKS_SPEED_1G_82599: *speed = IXGBE_LINK_SPEED_1GB_FULL; break; case IXGBE_LINKS_SPEED_100_82599: *speed = IXGBE_LINK_SPEED_100_FULL; + if (hw->mac.type == ixgbe_mac_X550) { + if (links_reg & IXGBE_LINKS_SPEED_NON_STD) + *speed = IXGBE_LINK_SPEED_5GB_FULL; + } + break; + case IXGBE_LINKS_SPEED_10_X550EM_A: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + /* Since Reserved in older MAC's */ + if (hw->mac.type >= ixgbe_mac_X550) + *speed = IXGBE_LINK_SPEED_10_FULL; break; + default: + *speed = IXGBE_LINK_SPEED_UNKNOWN; } /* if the read failed it could just be a mailbox collision, best wait @@ -627,13 +672,22 @@ out: * @hw: pointer to the HW structure * @max_size: value to assign to max frame size **/ -void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size) +s32 ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size) { u32 msgbuf[2]; + s32 retval; msgbuf[0] = IXGBE_VF_SET_LPE; msgbuf[1] = max_size; - ixgbevf_write_msg_read_ack(hw, msgbuf, 2); + + retval = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + if (retval) + return retval; + if ((msgbuf[0] & IXGBE_VF_SET_LPE) && + (msgbuf[0] & IXGBE_VT_MSGTYPE_NACK)) + return IXGBE_ERR_MBX; + + return 0; } /** @@ -650,11 +704,8 @@ int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api) msg[0] = IXGBE_VF_API_NEGOTIATE; msg[1] = api; msg[2] = 0; - err = hw->mbx.ops.write_posted(hw, msg, 3, 0); - - if (!err) - err = hw->mbx.ops.read_posted(hw, msg, 3, 0); + err = ixgbevf_write_msg_read_ack(hw, msg, msg, 3); if (!err) { msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -680,6 +731,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, switch (hw->api_version) { case ixgbe_mbox_api_11: case ixgbe_mbox_api_12: + case ixgbe_mbox_api_13: break; default: return 0; @@ -688,11 +740,8 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, /* Fetch queue configuration from the PF */ msg[0] = IXGBE_VF_GET_QUEUES; msg[1] = msg[2] = msg[3] = msg[4] = 0; - err = hw->mbx.ops.write_posted(hw, msg, 5, 0); - - if (!err) - err = hw->mbx.ops.read_posted(hw, msg, 5, 0); + err = ixgbevf_write_msg_read_ack(hw, msg, msg, 5); if (!err) { msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h index 8851cb82..3efffe82 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h @@ -34,6 +34,8 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef _IXGBE_VF_H_ #define _IXGBE_VF_H_ +#include "ixgbe_type.h" + #define IXGBE_VF_IRQ_CLEAR_MASK 7 #define IXGBE_VF_MAX_TX_QUEUES 8 #define IXGBE_VF_MAX_RX_QUEUES 8 @@ -114,6 +116,7 @@ struct ixgbevf_hw_stats { u64 saved_reset_vfmprc; }; +s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw); s32 ixgbe_init_hw_vf(struct ixgbe_hw *hw); s32 ixgbe_start_hw_vf(struct ixgbe_hw *hw); s32 ixgbe_reset_hw_vf(struct ixgbe_hw *hw); @@ -131,9 +134,10 @@ s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr); s32 ixgbe_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list, u32 mc_addr_count, ixgbe_mc_addr_itr, bool clear); +s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode); s32 ixgbe_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, bool vlvf_bypass); -void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size); +s32 ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size); int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api); int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, unsigned int *default_tc); diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c index 31dead0d..499b1fac 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c @@ -271,6 +271,7 @@ mac_reset_top: if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { /* Save the SAN MAC RAR index */ hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index, hw->mac.san_addr, 0, IXGBE_RAH_AV); @@ -490,7 +491,6 @@ s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) u16 length = 0; u16 pointer = 0; u16 word = 0; - u16 checksum_last_word = IXGBE_EEPROM_CHECKSUM; u16 ptr_start = IXGBE_PCIE_ANALOG_PTR; /* Do not use hw->eeprom.ops.read because we do not want to take @@ -500,14 +500,15 @@ s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) DEBUGFUNC("ixgbe_calc_eeprom_checksum_X540"); - /* Include 0x0-0x3F in the checksum */ - for (i = 0; i <= checksum_last_word; i++) { + /* Include 0x0 up to IXGBE_EEPROM_CHECKSUM; do not include the + * checksum itself + */ + for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) { if (ixgbe_read_eerd_generic(hw, i, &word)) { DEBUGOUT("EEPROM read failed\n"); return IXGBE_ERR_EEPROM; } - if (i != IXGBE_EEPROM_CHECKSUM) - checksum += word; + checksum += word; } /* Include all data from pointers 0x3, 0x6-0xE. This excludes the @@ -774,8 +775,10 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) /* SW NVM semaphore bit is used for access to all * SW_FW_SYNC bits (not just NVM) */ - if (ixgbe_get_swfw_sync_semaphore(hw)) + if (ixgbe_get_swfw_sync_semaphore(hw)) { + DEBUGOUT("Failed to get NVM access and register semaphore, returning IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; + } swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw)); if (!(swfw_sync & (fwmask | swmask | hwmask))) { @@ -783,7 +786,6 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw), swfw_sync); ixgbe_release_swfw_sync_semaphore(hw); - msec_delay(5); return IXGBE_SUCCESS; } /* Firmware currently using resource (fwmask), hardware @@ -798,6 +800,7 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) if (swmask == IXGBE_GSSR_SW_MNG_SM) { ERROR_REPORT1(IXGBE_ERROR_POLLING, "Failed to get SW only semaphore"); + DEBUGOUT("Failed to get SW only semaphore, returning IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; } @@ -806,8 +809,10 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) * of the requested resource(s) while ignoring the corresponding FW/HW * bits in the SW_FW_SYNC register. */ - if (ixgbe_get_swfw_sync_semaphore(hw)) + if (ixgbe_get_swfw_sync_semaphore(hw)) { + DEBUGOUT("Failed to get NVM sempahore and register semaphore while forcefully ignoring FW sempahore bit(s) and setting SW semaphore bit(s), returning IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; + } swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw)); if (swfw_sync & (fwmask | hwmask)) { swfw_sync |= swmask; @@ -829,9 +834,11 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) rmask |= IXGBE_GSSR_I2C_MASK; ixgbe_release_swfw_sync_X540(hw, rmask); ixgbe_release_swfw_sync_semaphore(hw); + DEBUGOUT("Resource not released by other SW, returning IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; } ixgbe_release_swfw_sync_semaphore(hw); + DEBUGOUT("Returning error IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; } @@ -860,7 +867,7 @@ void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw), swfw_sync); ixgbe_release_swfw_sync_semaphore(hw); - msec_delay(5); + msec_delay(2); } /** @@ -982,6 +989,9 @@ s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_blink_led_start_X540"); + if (index > 3) + return IXGBE_ERR_PARAM; + /* * Link should be up in order for the blink bit in the LED control * register to work. Force link and speed in the MAC if link is down. @@ -1016,6 +1026,9 @@ s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index) u32 macc_reg; u32 ledctl_reg; + if (index > 3) + return IXGBE_ERR_PARAM; + DEBUGFUNC("ixgbe_blink_led_stop_X540"); /* Restore the LED to its default value. */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c index aa6e859f..6f9c034b 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c @@ -41,6 +41,7 @@ POSSIBILITY OF SUCH DAMAGE. STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed); STATIC s32 ixgbe_acquire_swfw_sync_X550a(struct ixgbe_hw *, u32 mask); STATIC void ixgbe_release_swfw_sync_X550a(struct ixgbe_hw *, u32 mask); +STATIC s32 ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw); /** * ixgbe_init_ops_X550 - Inits func ptrs and MAC type @@ -61,7 +62,7 @@ s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw) mac->ops.dmac_config = ixgbe_dmac_config_X550; mac->ops.dmac_config_tcs = ixgbe_dmac_config_tcs_X550; mac->ops.dmac_update_tcs = ixgbe_dmac_update_tcs_X550; - mac->ops.setup_eee = ixgbe_setup_eee_X550; + mac->ops.setup_eee = NULL; mac->ops.set_source_address_pruning = ixgbe_set_source_address_pruning_X550; mac->ops.set_ethertype_anti_spoofing = @@ -82,6 +83,8 @@ s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw) mac->ops.mdd_event = ixgbe_mdd_event_X550; mac->ops.restore_mdd_vf = ixgbe_restore_mdd_vf_X550; mac->ops.disable_rx = ixgbe_disable_rx_x550; + /* Manageability interface */ + mac->ops.set_fw_drv_ver = ixgbe_set_fw_drv_ver_x550; switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_X_10G_T: case IXGBE_DEV_ID_X550EM_A_10G_T: @@ -342,11 +345,10 @@ STATIC s32 ixgbe_read_phy_reg_mdi_22(struct ixgbe_hw *hw, u32 reg_addr, UNREFERENCED_1PARAMETER(dev_type); /* Setup and write the read command */ - command = (reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | - (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) | - (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | - IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_READ | - IXGBE_MSCA_MDI_COMMAND; + command = (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_READ_AUTOINC | + IXGBE_MSCA_MDI_COMMAND; IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); @@ -393,11 +395,10 @@ STATIC s32 ixgbe_write_phy_reg_mdi_22(struct ixgbe_hw *hw, u32 reg_addr, IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data); /* Setup and write the write command */ - command = (reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | - (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) | - (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | - IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_WRITE | - IXGBE_MSCA_MDI_COMMAND; + command = (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_WRITE | + IXGBE_MSCA_MDI_COMMAND; IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); @@ -422,43 +423,6 @@ STATIC s32 ixgbe_write_phy_reg_mdi_22(struct ixgbe_hw *hw, u32 reg_addr, return IXGBE_SUCCESS; } -/** - * ixgbe_identify_phy_1g - Get 1g PHY type based on device id - * @hw: pointer to hardware structure - * - * Returns error code - */ -STATIC s32 ixgbe_identify_phy_1g(struct ixgbe_hw *hw) -{ - u32 swfw_mask = hw->phy.phy_semaphore_mask; - u16 phy_id_high; - u16 phy_id_low; - s32 rc; - - rc = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); - if (rc) - return rc; - - rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_MDIO_PHY_ID_HIGH, 0, - &phy_id_high); - if (rc) - goto rel_out; - - rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_MDIO_PHY_ID_LOW, 0, - &phy_id_low); - if (rc) - goto rel_out; - - hw->phy.id = (u32)phy_id_high << 16; - hw->phy.id |= phy_id_low & IXGBE_PHY_REVISION_MASK; - hw->phy.revision = (u32)phy_id_low & ~IXGBE_PHY_REVISION_MASK; - -rel_out: - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - - return rc; -} - /** * ixgbe_identify_phy_x550em - Get PHY type based on device id * @hw: pointer to hardware structure @@ -467,18 +431,15 @@ rel_out: */ STATIC s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) { + hw->mac.ops.set_lan_id(hw); + + ixgbe_read_mng_if_sel_x550em(hw); + switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_A_SFP: - hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a; - hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a; - if (hw->bus.lan_id) - hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM; - else - hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM; return ixgbe_identify_module_generic(hw); case IXGBE_DEV_ID_X550EM_X_SFP: /* set up for CS4227 usage */ - hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM; ixgbe_setup_mux_ctl(hw); ixgbe_check_cs4227(hw); /* Fallthrough */ @@ -489,30 +450,161 @@ STATIC s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_X_KX4: hw->phy.type = ixgbe_phy_x550em_kx4; break; + case IXGBE_DEV_ID_X550EM_X_XFI: + hw->phy.type = ixgbe_phy_x550em_xfi; + break; case IXGBE_DEV_ID_X550EM_X_KR: case IXGBE_DEV_ID_X550EM_A_KR: case IXGBE_DEV_ID_X550EM_A_KR_L: hw->phy.type = ixgbe_phy_x550em_kr; break; + case IXGBE_DEV_ID_X550EM_A_10G_T: case IXGBE_DEV_ID_X550EM_X_1G_T: case IXGBE_DEV_ID_X550EM_X_10G_T: - case IXGBE_DEV_ID_X550EM_A_10G_T: return ixgbe_identify_phy_generic(hw); case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: - hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a; - hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a; + hw->phy.type = ixgbe_phy_fw; + hw->phy.ops.read_reg = NULL; + hw->phy.ops.write_reg = NULL; if (hw->bus.lan_id) hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM; else hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM; - return ixgbe_identify_phy_1g(hw); + break; default: break; } return IXGBE_SUCCESS; } +/** + * ixgbe_fw_phy_activity - Perform an activity on a PHY + * @hw: pointer to hardware structure + * @activity: activity to perform + * @data: Pointer to 4 32-bit words of data + */ +s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity, + u32 (*data)[FW_PHY_ACT_DATA_COUNT]) +{ + union { + struct ixgbe_hic_phy_activity_req cmd; + struct ixgbe_hic_phy_activity_resp rsp; + } hic; + u16 retries = FW_PHY_ACT_RETRIES; + s32 rc; + u16 i; + + do { + memset(&hic, 0, sizeof(hic)); + hic.cmd.hdr.cmd = FW_PHY_ACT_REQ_CMD; + hic.cmd.hdr.buf_len = FW_PHY_ACT_REQ_LEN; + hic.cmd.hdr.checksum = FW_DEFAULT_CHECKSUM; + hic.cmd.port_number = hw->bus.lan_id; + hic.cmd.activity_id = IXGBE_CPU_TO_LE16(activity); + for (i = 0; i < FW_PHY_ACT_DATA_COUNT; ++i) + hic.cmd.data[i] = IXGBE_CPU_TO_BE32((*data)[i]); + + rc = ixgbe_host_interface_command(hw, (u32 *)&hic.cmd, + sizeof(hic.cmd), + IXGBE_HI_COMMAND_TIMEOUT, + true); + if (rc != IXGBE_SUCCESS) + return rc; + if (hic.rsp.hdr.cmd_or_resp.ret_status == + FW_CEM_RESP_STATUS_SUCCESS) { + for (i = 0; i < FW_PHY_ACT_DATA_COUNT; ++i) + (*data)[i] = IXGBE_BE32_TO_CPU(hic.rsp.data[i]); + return IXGBE_SUCCESS; + } + usec_delay(20); + --retries; + } while (retries > 0); + + return IXGBE_ERR_HOST_INTERFACE_COMMAND; +} + +static const struct { + u16 fw_speed; + ixgbe_link_speed phy_speed; +} ixgbe_fw_map[] = { + { FW_PHY_ACT_LINK_SPEED_10, IXGBE_LINK_SPEED_10_FULL }, + { FW_PHY_ACT_LINK_SPEED_100, IXGBE_LINK_SPEED_100_FULL }, + { FW_PHY_ACT_LINK_SPEED_1G, IXGBE_LINK_SPEED_1GB_FULL }, + { FW_PHY_ACT_LINK_SPEED_2_5G, IXGBE_LINK_SPEED_2_5GB_FULL }, + { FW_PHY_ACT_LINK_SPEED_5G, IXGBE_LINK_SPEED_5GB_FULL }, + { FW_PHY_ACT_LINK_SPEED_10G, IXGBE_LINK_SPEED_10GB_FULL }, +}; + +/** + * ixgbe_get_phy_id_fw - Get the phy ID via firmware command + * @hw: pointer to hardware structure + * + * Returns error code + */ +static s32 ixgbe_get_phy_id_fw(struct ixgbe_hw *hw) +{ + u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 }; + u16 phy_speeds; + u16 phy_id_lo; + s32 rc; + u16 i; + + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_PHY_INFO, &info); + if (rc) + return rc; + + hw->phy.speeds_supported = 0; + phy_speeds = info[0] & FW_PHY_INFO_SPEED_MASK; + for (i = 0; i < sizeof(ixgbe_fw_map) / sizeof(ixgbe_fw_map[0]); ++i) { + if (phy_speeds & ixgbe_fw_map[i].fw_speed) + hw->phy.speeds_supported |= ixgbe_fw_map[i].phy_speed; + } + if (!hw->phy.autoneg_advertised) + hw->phy.autoneg_advertised = hw->phy.speeds_supported; + + hw->phy.id = info[0] & FW_PHY_INFO_ID_HI_MASK; + phy_id_lo = info[1] & FW_PHY_INFO_ID_LO_MASK; + hw->phy.id |= phy_id_lo & IXGBE_PHY_REVISION_MASK; + hw->phy.revision = phy_id_lo & ~IXGBE_PHY_REVISION_MASK; + if (!hw->phy.id || hw->phy.id == IXGBE_PHY_REVISION_MASK) + return IXGBE_ERR_PHY_ADDR_INVALID; + return IXGBE_SUCCESS; +} + +/** + * ixgbe_identify_phy_fw - Get PHY type based on firmware command + * @hw: pointer to hardware structure + * + * Returns error code + */ +static s32 ixgbe_identify_phy_fw(struct ixgbe_hw *hw) +{ + if (hw->bus.lan_id) + hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM; + else + hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM; + + hw->phy.type = ixgbe_phy_fw; + hw->phy.ops.read_reg = NULL; + hw->phy.ops.write_reg = NULL; + return ixgbe_get_phy_id_fw(hw); +} + +/** + * ixgbe_shutdown_fw_phy - Shutdown a firmware-controlled PHY + * @hw: pointer to hardware structure + * + * Returns error code + */ +s32 ixgbe_shutdown_fw_phy(struct ixgbe_hw *hw) +{ + u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 }; + + setup[0] = FW_PHY_ACT_FORCE_LINK_DOWN_OFF; + return ixgbe_fw_phy_activity(hw, FW_PHY_ACT_FORCE_LINK_DOWN, &setup); +} + STATIC s32 ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u16 *phy_data) { @@ -601,7 +693,6 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) struct ixgbe_mac_info *mac = &hw->mac; struct ixgbe_eeprom_info *eeprom = &hw->eeprom; struct ixgbe_phy_info *phy = &hw->phy; - struct ixgbe_link_info *link = &hw->link; s32 ret_val; DEBUGFUNC("ixgbe_init_ops_X550EM"); @@ -637,25 +728,6 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) hw->bus.type = ixgbe_bus_type_internal; mac->ops.get_bus_info = ixgbe_get_bus_info_X550em; - if (hw->mac.type == ixgbe_mac_X550EM_x) { - mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550; - mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550; - mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550em; - mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550em; - link->ops.read_link = ixgbe_read_i2c_combined_generic; - link->ops.read_link_unlocked = - ixgbe_read_i2c_combined_generic_unlocked; - link->ops.write_link = ixgbe_write_i2c_combined_generic; - link->ops.write_link_unlocked = - ixgbe_write_i2c_combined_generic_unlocked; - link->addr = IXGBE_CS4227; - } - if (hw->mac.type == ixgbe_mac_X550EM_a) { - mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550; - mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550; - mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550a; - mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550a; - } mac->ops.get_media_type = ixgbe_get_media_type_X550em; mac->ops.setup_sfp = ixgbe_setup_sfp_modules_X550em; @@ -666,25 +738,23 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) mac->ops.setup_fc = ixgbe_setup_fc_generic; - else if (hw->mac.type == ixgbe_mac_X550EM_a) { - mac->ops.setup_fc = ixgbe_setup_fc_x550a; - mac->ops.fc_autoneg = ixgbe_fc_autoneg_x550a; - } else mac->ops.setup_fc = ixgbe_setup_fc_X550em; + /* PHY */ + phy->ops.init = ixgbe_init_phy_ops_X550em; switch (hw->device_id) { - case IXGBE_DEV_ID_X550EM_X_KR: - case IXGBE_DEV_ID_X550EM_A_KR: - case IXGBE_DEV_ID_X550EM_A_KR_L: + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: + mac->ops.setup_fc = NULL; + phy->ops.identify = ixgbe_identify_phy_fw; + phy->ops.set_phy_power = NULL; + phy->ops.get_firmware_version = NULL; break; default: - mac->ops.setup_eee = NULL; + phy->ops.identify = ixgbe_identify_phy_x550em; } - /* PHY */ - phy->ops.init = ixgbe_init_phy_ops_X550em; - phy->ops.identify = ixgbe_identify_phy_x550em; if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper) phy->ops.set_phy_power = NULL; @@ -702,6 +772,183 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) return ret_val; } +/** + * ixgbe_setup_fw_link - Setup firmware-controlled PHYs + * @hw: pointer to hardware structure + */ +static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw) +{ + u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 }; + s32 rc; + u16 i; + + if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw)) + return 0; + + if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { + ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED, + "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); + return IXGBE_ERR_INVALID_LINK_SETTINGS; + } + + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_RXTX << + FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT; + break; + case ixgbe_fc_rx_pause: + setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_RX << + FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT; + break; + case ixgbe_fc_tx_pause: + setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_TX << + FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT; + break; + default: + break; + } + + for (i = 0; i < sizeof(ixgbe_fw_map) / sizeof(ixgbe_fw_map[0]); ++i) { + if (hw->phy.autoneg_advertised & ixgbe_fw_map[i].phy_speed) + setup[0] |= ixgbe_fw_map[i].fw_speed; + } + setup[0] |= FW_PHY_ACT_SETUP_LINK_HP | FW_PHY_ACT_SETUP_LINK_AN; + + if (hw->phy.eee_speeds_advertised) + setup[0] |= FW_PHY_ACT_SETUP_LINK_EEE; + + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_SETUP_LINK, &setup); + if (rc) + return rc; + if (setup[0] == FW_PHY_ACT_SETUP_LINK_RSP_DOWN) + return IXGBE_ERR_OVERTEMP; + return IXGBE_SUCCESS; +} + +/** + * ixgbe_fc_autoneg_fw _ Set up flow control for FW-controlled PHYs + * @hw: pointer to hardware structure + * + * Called at init time to set up flow control. + */ +static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw) +{ + if (hw->fc.requested_mode == ixgbe_fc_default) + hw->fc.requested_mode = ixgbe_fc_full; + + return ixgbe_setup_fw_link(hw); +} + +/** + * ixgbe_setup_eee_fw - Enable/disable EEE support + * @hw: pointer to the HW structure + * @enable_eee: boolean flag to enable EEE + * + * Enable/disable EEE based on enable_eee flag. + * This function controls EEE for firmware-based PHY implementations. + */ +static s32 ixgbe_setup_eee_fw(struct ixgbe_hw *hw, bool enable_eee) +{ + if (!!hw->phy.eee_speeds_advertised == enable_eee) + return IXGBE_SUCCESS; + if (enable_eee) + hw->phy.eee_speeds_advertised = hw->phy.eee_speeds_supported; + else + hw->phy.eee_speeds_advertised = 0; + return hw->phy.ops.setup_link(hw); +} + +/** +* ixgbe_init_ops_X550EM_a - Inits func ptrs and MAC type +* @hw: pointer to hardware structure +* +* Initialize the function pointers and for MAC type X550EM_a. +* Does not touch the hardware. +**/ +s32 ixgbe_init_ops_X550EM_a(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + s32 ret_val; + + DEBUGFUNC("ixgbe_init_ops_X550EM_a"); + + /* Start with generic X550EM init */ + ret_val = ixgbe_init_ops_X550EM(hw); + + if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII || + hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII_L) { + mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550; + mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550; + } else { + mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550a; + mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a; + } + mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550a; + mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550a; + + switch (mac->ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + mac->ops.setup_fc = NULL; + mac->ops.fc_autoneg = ixgbe_fc_autoneg_fiber_x550em_a; + break; + case ixgbe_media_type_backplane: + mac->ops.fc_autoneg = ixgbe_fc_autoneg_backplane_x550em_a; + mac->ops.setup_fc = ixgbe_setup_fc_backplane_x550em_a; + break; + default: + break; + } + + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: + mac->ops.fc_autoneg = ixgbe_fc_autoneg_sgmii_x550em_a; + mac->ops.setup_fc = ixgbe_fc_autoneg_fw; + mac->ops.setup_eee = ixgbe_setup_eee_fw; + hw->phy.eee_speeds_supported = IXGBE_LINK_SPEED_100_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + hw->phy.eee_speeds_advertised = hw->phy.eee_speeds_supported; + break; + default: + break; + } + + return ret_val; +} + +/** +* ixgbe_init_ops_X550EM_x - Inits func ptrs and MAC type +* @hw: pointer to hardware structure +* +* Initialize the function pointers and for MAC type X550EM_x. +* Does not touch the hardware. +**/ +s32 ixgbe_init_ops_X550EM_x(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_link_info *link = &hw->link; + s32 ret_val; + + DEBUGFUNC("ixgbe_init_ops_X550EM_x"); + + /* Start with generic X550EM init */ + ret_val = ixgbe_init_ops_X550EM(hw); + + mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550; + mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550; + mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550em; + mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550em; + link->ops.read_link = ixgbe_read_i2c_combined_generic; + link->ops.read_link_unlocked = ixgbe_read_i2c_combined_generic_unlocked; + link->ops.write_link = ixgbe_write_i2c_combined_generic; + link->ops.write_link_unlocked = + ixgbe_write_i2c_combined_generic_unlocked; + link->addr = IXGBE_CS4227; + + + return ret_val; +} + /** * ixgbe_dmac_config_X550 * @hw: pointer to hardware structure @@ -765,6 +1012,7 @@ s32 ixgbe_dmac_config_tcs_X550(struct ixgbe_hw *hw) /* Configure DMA coalescing enabled */ switch (hw->mac.dmac_config.link_speed) { + case IXGBE_LINK_SPEED_10_FULL: case IXGBE_LINK_SPEED_100_FULL: pb_headroom = IXGBE_DMACRXT_100M; break; @@ -864,158 +1112,6 @@ s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw) return IXGBE_SUCCESS; } -/** - * ixgbe_enable_eee_x550 - Enable EEE support - * @hw: pointer to hardware structure - */ -STATIC s32 ixgbe_enable_eee_x550(struct ixgbe_hw *hw) -{ - u16 autoneg_eee_reg; - u32 link_reg; - s32 status; - - if (hw->mac.type == ixgbe_mac_X550) { - /* Advertise EEE capability */ - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_eee_reg); - - autoneg_eee_reg |= (IXGBE_AUTO_NEG_10GBASE_EEE_ADVT | - IXGBE_AUTO_NEG_1000BASE_EEE_ADVT | - IXGBE_AUTO_NEG_100BASE_EEE_ADVT); - - hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_eee_reg); - return IXGBE_SUCCESS; - } - - switch (hw->device_id) { - case IXGBE_DEV_ID_X550EM_X_KR: - case IXGBE_DEV_ID_X550EM_A_KR: - case IXGBE_DEV_ID_X550EM_A_KR_L: - status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &link_reg); - if (status != IXGBE_SUCCESS) - return status; - - link_reg |= IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR | - IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX; - - /* Don't advertise FEC capability when EEE enabled. */ - link_reg &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC; - - status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, link_reg); - if (status != IXGBE_SUCCESS) - return status; - break; - default: - break; - } - - return IXGBE_SUCCESS; -} - -/** - * ixgbe_disable_eee_x550 - Disable EEE support - * @hw: pointer to hardware structure - */ -STATIC s32 ixgbe_disable_eee_x550(struct ixgbe_hw *hw) -{ - u16 autoneg_eee_reg; - u32 link_reg; - s32 status; - - if (hw->mac.type == ixgbe_mac_X550) { - /* Disable advertised EEE capability */ - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_eee_reg); - - autoneg_eee_reg &= ~(IXGBE_AUTO_NEG_10GBASE_EEE_ADVT | - IXGBE_AUTO_NEG_1000BASE_EEE_ADVT | - IXGBE_AUTO_NEG_100BASE_EEE_ADVT); - - hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_eee_reg); - return IXGBE_SUCCESS; - } - - switch (hw->device_id) { - case IXGBE_DEV_ID_X550EM_X_KR: - case IXGBE_DEV_ID_X550EM_A_KR: - case IXGBE_DEV_ID_X550EM_A_KR_L: - status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &link_reg); - if (status != IXGBE_SUCCESS) - return status; - - link_reg &= ~(IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR | - IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX); - - /* Advertise FEC capability when EEE is disabled. */ - link_reg |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC; - - status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, link_reg); - if (status != IXGBE_SUCCESS) - return status; - break; - default: - break; - } - - return IXGBE_SUCCESS; -} - -/** - * ixgbe_setup_eee_X550 - Enable/disable EEE support - * @hw: pointer to the HW structure - * @enable_eee: boolean flag to enable EEE - * - * Enable/disable EEE based on enable_eee flag. - * Auto-negotiation must be started after BASE-T EEE bits in PHY register 7.3C - * are modified. - * - **/ -s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee) -{ - s32 status; - u32 eeer; - - DEBUGFUNC("ixgbe_setup_eee_X550"); - - eeer = IXGBE_READ_REG(hw, IXGBE_EEER); - /* Enable or disable EEE per flag */ - if (enable_eee) { - eeer |= (IXGBE_EEER_TX_LPI_EN | IXGBE_EEER_RX_LPI_EN); - - /* Not supported on first revision of X550EM_x. */ - if ((hw->mac.type == ixgbe_mac_X550EM_x) && - !(IXGBE_FUSES0_REV_MASK & - IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)))) - return IXGBE_SUCCESS; - status = ixgbe_enable_eee_x550(hw); - if (status) - return status; - } else { - eeer &= ~(IXGBE_EEER_TX_LPI_EN | IXGBE_EEER_RX_LPI_EN); - - status = ixgbe_disable_eee_x550(hw); - if (status) - return status; - } - IXGBE_WRITE_REG(hw, IXGBE_EEER, eeer); - - return IXGBE_SUCCESS; -} - /** * ixgbe_set_source_address_pruning_X550 - Enable/Disbale source address pruning * @hw: pointer to hardware structure @@ -1102,8 +1198,8 @@ STATIC s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl) } /** - * ixgbe_write_iosf_sb_reg_x550 - Writes a value to specified register of the IOSF - * device + * ixgbe_write_iosf_sb_reg_x550 - Writes a value to specified register + * of the IOSF device * @hw: pointer to hardware structure * @reg_addr: 32 bit PHY register to write * @device_type: 3 bit device type @@ -1149,12 +1245,11 @@ out: } /** - * ixgbe_read_iosf_sb_reg_x550 - Writes a value to specified register of the IOSF - * device + * ixgbe_read_iosf_sb_reg_x550 - Reads specified register of the IOSF device * @hw: pointer to hardware structure * @reg_addr: 32 bit PHY register to write * @device_type: 3 bit device type - * @phy_data: Pointer to read data from the register + * @data: Pointer to read data from the register **/ s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 *data) @@ -1216,13 +1311,20 @@ s32 ixgbe_get_phy_token(struct ixgbe_hw *hw) sizeof(token_cmd), IXGBE_HI_COMMAND_TIMEOUT, true); - if (status) + if (status) { + DEBUGOUT1("Issuing host interface command failed with Status = %d\n", + status); return status; + } if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK) return IXGBE_SUCCESS; - if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY) + if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY) { + DEBUGOUT1("Host interface command returned 0x%08x , returning IXGBE_ERR_FW_RESP_INVALID\n", + token_cmd.hdr.cmd_or_resp.ret_status); return IXGBE_ERR_FW_RESP_INVALID; + } + DEBUGOUT("Returning IXGBE_ERR_TOKEN_RETRY\n"); return IXGBE_ERR_TOKEN_RETRY; } @@ -1278,7 +1380,7 @@ s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, write_cmd.port_number = hw->bus.lan_id; write_cmd.command_type = FW_INT_PHY_REQ_WRITE; write_cmd.address = IXGBE_CPU_TO_BE16(reg_addr); - write_cmd.write_data = IXGBE_CPU_TO_LE32(data); + write_cmd.write_data = IXGBE_CPU_TO_BE32(data); status = ixgbe_host_interface_command(hw, (u32 *)&write_cmd, sizeof(write_cmd), @@ -1288,8 +1390,7 @@ s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, } /** - * ixgbe_read_iosf_sb_reg_x550a - Writes a value to specified register - * of the IOSF device. + * ixgbe_read_iosf_sb_reg_x550a - Reads specified register of the IOSF device * @hw: pointer to hardware structure * @reg_addr: 32 bit PHY register to write * @device_type: 3 bit device type @@ -1318,7 +1419,7 @@ s32 ixgbe_read_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, IXGBE_HI_COMMAND_TIMEOUT, true); /* Extract the register value from the response. */ - *data = IXGBE_LE32_TO_CPU(hic.rsp.read_data); + *data = IXGBE_BE32_TO_CPU(hic.rsp.read_data); return status; } @@ -1482,6 +1583,7 @@ enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw) switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_X_KR: case IXGBE_DEV_ID_X550EM_X_KX4: + case IXGBE_DEV_ID_X550EM_X_XFI: case IXGBE_DEV_ID_X550EM_A_KR: case IXGBE_DEV_ID_X550EM_A_KR_L: media_type = ixgbe_media_type_backplane; @@ -1506,7 +1608,6 @@ enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: media_type = ixgbe_media_type_copper; - hw->phy.type = ixgbe_phy_m88; break; default: media_type = ixgbe_media_type_unknown; @@ -1598,17 +1699,63 @@ s32 ixgbe_setup_sfp_modules_X550em(struct ixgbe_hw *hw) return IXGBE_SUCCESS; } +/** +* ixgbe_restart_an_internal_phy_x550em - restart autonegotiation for the +* internal PHY +* @hw: pointer to hardware structure +**/ +STATIC s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw) +{ + s32 status; + u32 link_ctrl; + + /* Restart auto-negotiation. */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl); + + if (status) { + DEBUGOUT("Auto-negotiation did not complete\n"); + return status; + } + + link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl); + + if (hw->mac.type == ixgbe_mac_X550EM_a) { + u32 flx_mask_st20; + + /* Indicate to FW that AN restart has been asserted */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_mask_st20); + + if (status) { + DEBUGOUT("Auto-negotiation did not complete\n"); + return status; + } + + flx_mask_st20 |= IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART; + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_mask_st20); + } + + return status; +} + /** * ixgbe_setup_sgmii - Set up link for sgmii * @hw: pointer to hardware structure */ STATIC s32 ixgbe_setup_sgmii(struct ixgbe_hw *hw, ixgbe_link_speed speed, - bool autoneg_wait_to_complete) + bool autoneg_wait) { struct ixgbe_mac_info *mac = &hw->mac; - u32 lval, sval; + u32 lval, sval, flx_val; s32 rc; - UNREFERENCED_2PARAMETER(speed, autoneg_wait_to_complete); rc = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), @@ -1641,12 +1788,100 @@ STATIC s32 ixgbe_setup_sgmii(struct ixgbe_hw *hw, ixgbe_link_speed speed, if (rc) return rc; - lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val); + if (rc) + return rc; + + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G; + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + + rc = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val); + if (rc) + return rc; + + rc = ixgbe_restart_an_internal_phy_x550em(hw); + if (rc) + return rc; + + return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait); +} + +/** + * ixgbe_setup_sgmii_fw - Set up link for sgmii with firmware-controlled PHYs + * @hw: pointer to hardware structure + */ +STATIC s32 ixgbe_setup_sgmii_fw(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg_wait) +{ + struct ixgbe_mac_info *mac = &hw->mac; + u32 lval, sval, flx_val; + s32 rc; + + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &lval); + if (rc) + return rc; + + lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; + lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; + lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_SGMII_EN; + lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CLAUSE_37_EN; + lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G; rc = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, lval); + if (rc) + return rc; - return rc; + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &sval); + if (rc) + return rc; + + sval &= ~IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D; + sval &= ~IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D; + rc = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, sval); + if (rc) + return rc; + + rc = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, lval); + if (rc) + return rc; + + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val); + if (rc) + return rc; + + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN; + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + + rc = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val); + if (rc) + return rc; + + rc = ixgbe_restart_an_internal_phy_x550em(hw); + + return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait); } /** @@ -1670,17 +1905,30 @@ void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw) mac->ops.setup_link = ixgbe_setup_mac_link_multispeed_fiber; mac->ops.set_rate_select_speed = ixgbe_set_soft_rate_select_speed; + if ((hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N) || (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP)) mac->ops.setup_mac_link = - ixgbe_setup_mac_link_sfp_x550a; + ixgbe_setup_mac_link_sfp_x550a; else mac->ops.setup_mac_link = - ixgbe_setup_mac_link_sfp_x550em; + ixgbe_setup_mac_link_sfp_x550em; break; case ixgbe_media_type_copper: - mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em; - mac->ops.check_link = ixgbe_check_link_t_X550em; + if (hw->mac.type == ixgbe_mac_X550EM_a) { + if (hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T || + hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L) { + mac->ops.setup_link = ixgbe_setup_sgmii_fw; + mac->ops.check_link = + ixgbe_check_mac_link_generic; + } else { + mac->ops.setup_link = + ixgbe_setup_mac_link_t_X550em; + } + } else { + mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em; + mac->ops.check_link = ixgbe_check_link_t_X550em; + } break; case ixgbe_media_type_backplane: if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII || @@ -1704,6 +1952,13 @@ s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, { DEBUGFUNC("ixgbe_get_link_capabilities_X550em"); + + if (hw->phy.type == ixgbe_phy_fw) { + *autoneg = true; + *speed = hw->phy.speeds_supported; + return 0; + } + /* SFP */ if (hw->phy.media_type == ixgbe_media_type_fiber) { @@ -1727,13 +1982,24 @@ s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, *speed = IXGBE_LINK_SPEED_10GB_FULL; } else { switch (hw->phy.type) { - case ixgbe_phy_m88: - *speed = IXGBE_LINK_SPEED_100_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - break; + case ixgbe_phy_ext_1g_t: case ixgbe_phy_sgmii: *speed = IXGBE_LINK_SPEED_1GB_FULL; break; + case ixgbe_phy_x550em_kr: + if (hw->mac.type == ixgbe_mac_X550EM_a) { + /* check different backplane modes */ + if (hw->phy.nw_mng_if_sel & + IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G) { + *speed = IXGBE_LINK_SPEED_2_5GB_FULL; + break; + } else if (hw->device_id == + IXGBE_DEV_ID_X550EM_A_KR_L) { + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + } + } + /* fall through */ default: *speed = IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL; @@ -1854,19 +2120,32 @@ STATIC s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc); /* Enable link status change alarm */ - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); - if (status != IXGBE_SUCCESS) - return status; + /* Enable the LASI interrupts on X552 devices to receive notifications + * of the link configurations of the external PHY and correspondingly + * support the configuration of the internal iXFI link, since iXFI does + * not support auto-negotiation. This is not required for X553 devices + * having KR support, which performs auto-negotiations and which is used + * as the internal link to the external PHY. Hence adding a check here + * to avoid enabling LASI interrupts for X553 devices. + */ + if (hw->mac.type != ixgbe_mac_X550EM_a) { + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); - reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN; + if (status != IXGBE_SUCCESS) + return status; - status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg); + reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN; - if (status != IXGBE_SUCCESS) - return status; + status = hw->phy.ops.write_reg(hw, + IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg); + + if (status != IXGBE_SUCCESS) + return status; + } /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, @@ -1935,8 +2214,8 @@ STATIC s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw, u32 reg_val; status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status) return status; @@ -1952,182 +2231,75 @@ STATIC s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw, if (speed & IXGBE_LINK_SPEED_1GB_FULL) reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX; - /* Restart auto-negotiation. */ - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - - return status; -} + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); -/** - * ixgbe_set_master_slave_mode - Set up PHY for master/slave mode - * @hw: pointer to hardware structure - * - * Must be called while holding the PHY semaphore and token - */ -STATIC s32 ixgbe_set_master_slave_mode(struct ixgbe_hw *hw) -{ - u16 phy_data; - s32 rc; + if (hw->mac.type == ixgbe_mac_X550EM_a) { + /* Set lane mode to KR auto negotiation */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - /* Resolve master/slave mode */ - rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_M88E1500_1000T_CTRL, 0, - &phy_data); - if (rc) - return rc; + if (status) + return status; - /* load defaults for future use */ - if (phy_data & IXGBE_M88E1500_1000T_CTRL_MS_ENABLE) { - if (phy_data & IXGBE_M88E1500_1000T_CTRL_MS_VALUE) - hw->phy.original_ms_type = ixgbe_ms_force_master; - else - hw->phy.original_ms_type = ixgbe_ms_force_slave; - } else { - hw->phy.original_ms_type = ixgbe_ms_auto; - } + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN; + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; - switch (hw->phy.ms_type) { - case ixgbe_ms_force_master: - phy_data |= IXGBE_M88E1500_1000T_CTRL_MS_ENABLE; - phy_data |= IXGBE_M88E1500_1000T_CTRL_MS_VALUE; - break; - case ixgbe_ms_force_slave: - phy_data |= IXGBE_M88E1500_1000T_CTRL_MS_ENABLE; - phy_data &= ~IXGBE_M88E1500_1000T_CTRL_MS_VALUE; - break; - case ixgbe_ms_auto: - phy_data &= ~IXGBE_M88E1500_1000T_CTRL_MS_ENABLE; - break; - default: - break; + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); } - return ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_1000T_CTRL, 0, - phy_data); -} - -/** - * ixgbe_reset_phy_m88_nolock - Reset m88 PHY without locking - * @hw: pointer to hardware structure - * - * Must be called while holding the PHY semaphore and token - */ -STATIC s32 ixgbe_reset_phy_m88_nolock(struct ixgbe_hw *hw) -{ - s32 rc; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 1); - if (rc) - return rc; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_FIBER_CTRL, 0, - IXGBE_M88E1500_FIBER_CTRL_RESET | - IXGBE_M88E1500_FIBER_CTRL_DUPLEX_FULL | - IXGBE_M88E1500_FIBER_CTRL_SPEED_MSB); - if (rc) - goto res_out; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 18); - if (rc) - goto res_out; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_GEN_CTRL, 0, - IXGBE_M88E1500_GEN_CTRL_RESET | - IXGBE_M88E1500_GEN_CTRL_SGMII_COPPER); - if (rc) - goto res_out; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0); - if (rc) - goto res_out; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_COPPER_CTRL, 0, - IXGBE_M88E1500_COPPER_CTRL_RESET | - IXGBE_M88E1500_COPPER_CTRL_AN_EN | - IXGBE_M88E1500_COPPER_CTRL_RESTART_AN | - IXGBE_M88E1500_COPPER_CTRL_FULL_DUPLEX | - IXGBE_M88E1500_COPPER_CTRL_SPEED_MSB); - -res_out: - ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0); - return rc; + return ixgbe_restart_an_internal_phy_x550em(hw); } /** - * ixgbe_reset_phy_m88 - Reset m88 PHY + * ixgbe_reset_phy_fw - Reset firmware-controlled PHYs * @hw: pointer to hardware structure */ -STATIC s32 ixgbe_reset_phy_m88(struct ixgbe_hw *hw) +static s32 ixgbe_reset_phy_fw(struct ixgbe_hw *hw) { - u32 swfw_mask = hw->phy.phy_semaphore_mask; + u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 }; s32 rc; if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw)) return IXGBE_SUCCESS; - rc = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_PHY_SW_RESET, &store); if (rc) return rc; + memset(store, 0, sizeof(store)); - rc = ixgbe_reset_phy_m88_nolock(hw); + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_INIT_PHY, &store); + if (rc) + return rc; - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - return rc; + return ixgbe_setup_fw_link(hw); } /** - * ixgbe_setup_m88 - setup m88 PHY + * ixgbe_check_overtemp_fw - Check firmware-controlled PHYs for overtemp * @hw: pointer to hardware structure */ -STATIC s32 ixgbe_setup_m88(struct ixgbe_hw *hw) +static s32 ixgbe_check_overtemp_fw(struct ixgbe_hw *hw) { - u32 swfw_mask = hw->phy.phy_semaphore_mask; - struct ixgbe_phy_info *phy = &hw->phy; - u16 phy_data; + u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 }; s32 rc; - if (phy->reset_disable || ixgbe_check_reset_blocked(hw)) - return IXGBE_SUCCESS; - - rc = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &store); if (rc) return rc; - rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_M88E1500_PHY_SPEC_CTRL, 0, - &phy_data); - if (rc) - goto rel_out; - - /* Enable downshift and setting it to X6 */ - phy_data &= ~IXGBE_M88E1500_PSCR_DOWNSHIFT_ENABLE; - phy_data |= IXGBE_M88E1500_PSCR_DOWNSHIFT_6X; - phy_data |= IXGBE_M88E1500_PSCR_DOWNSHIFT_ENABLE; - rc = ixgbe_write_phy_reg_mdi_22(hw, - IXGBE_M88E1500_PHY_SPEC_CTRL, 0, - phy_data); - if (rc) - goto rel_out; - - ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0); - - /* Commit the changes */ - rc = ixgbe_reset_phy_m88_nolock(hw); - if (rc) { - DEBUGOUT("Error committing the PHY changes\n"); - goto rel_out; + if (store[0] & FW_PHY_ACT_GET_LINK_INFO_TEMP) { + ixgbe_shutdown_fw_phy(hw); + return IXGBE_ERR_OVERTEMP; } - - rc = ixgbe_set_master_slave_mode(hw); - - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - return rc; - -rel_out: - ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0); - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - return rc; + return IXGBE_SUCCESS; } /** @@ -2151,7 +2323,7 @@ STATIC s32 ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw) hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) { hw->phy.addr = (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; } return IXGBE_SUCCESS; @@ -2173,7 +2345,6 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) DEBUGFUNC("ixgbe_init_phy_ops_X550em"); hw->mac.ops.set_lan_id(hw); - ixgbe_read_mng_if_sel_x550em(hw); if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) { @@ -2182,9 +2353,45 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) phy->ops.identify_sfp = ixgbe_identify_sfp_module_X550em; } + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: + phy->ops.read_reg_mdi = ixgbe_read_phy_reg_mdi_22; + phy->ops.write_reg_mdi = ixgbe_write_phy_reg_mdi_22; + hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a; + hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a; + phy->ops.check_overtemp = ixgbe_check_overtemp_fw; + if (hw->bus.lan_id) + hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM; + else + hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM; + + break; + case IXGBE_DEV_ID_X550EM_A_10G_T: + case IXGBE_DEV_ID_X550EM_A_SFP: + hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a; + hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a; + if (hw->bus.lan_id) + hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM; + else + hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM; + break; + case IXGBE_DEV_ID_X550EM_X_SFP: + /* set up for CS4227 usage */ + hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM; + break; + case IXGBE_DEV_ID_X550EM_X_1G_T: + phy->ops.read_reg_mdi = ixgbe_read_phy_reg_mdi_22; + phy->ops.write_reg_mdi = ixgbe_write_phy_reg_mdi_22; + break; + default: + break; + } + /* Identify the PHY or SFP module */ ret_val = phy->ops.identify(hw); - if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED || + ret_val == IXGBE_ERR_PHY_ADDR_INVALID) return ret_val; /* Setup function pointers based on detected hardware */ @@ -2204,6 +2411,16 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) phy->ops.read_reg = ixgbe_read_phy_reg_x550em; phy->ops.write_reg = ixgbe_write_phy_reg_x550em; break; + case ixgbe_phy_ext_1g_t: + /* link is managed by FW */ + phy->ops.setup_link = NULL; + break; + case ixgbe_phy_x550em_xfi: + /* link is managed by HW */ + phy->ops.setup_link = NULL; + phy->ops.read_reg = ixgbe_read_phy_reg_x550em; + phy->ops.write_reg = ixgbe_write_phy_reg_x550em; + break; case ixgbe_phy_x550em_ext_t: /* If internal link mode is XFI, then setup iXFI internal link, * else setup KR now. @@ -2223,11 +2440,9 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) case ixgbe_phy_sgmii: phy->ops.setup_link = NULL; break; - case ixgbe_phy_m88: - phy->ops.setup_link = ixgbe_setup_m88; - phy->ops.read_reg_mdi = ixgbe_read_phy_reg_mdi_22; - phy->ops.write_reg_mdi = ixgbe_write_phy_reg_mdi_22; - phy->ops.reset = ixgbe_reset_phy_m88; + case ixgbe_phy_fw: + phy->ops.setup_link = ixgbe_setup_fw_link; + phy->ops.reset = ixgbe_reset_phy_fw; break; default: break; @@ -2247,8 +2462,6 @@ STATIC void ixgbe_set_mdio_speed(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_X_10G_T: case IXGBE_DEV_ID_X550EM_A_SGMII: case IXGBE_DEV_ID_X550EM_A_SGMII_L: - case IXGBE_DEV_ID_X550EM_A_1G_T: - case IXGBE_DEV_ID_X550EM_A_1G_T_L: case IXGBE_DEV_ID_X550EM_A_10G_T: case IXGBE_DEV_ID_X550EM_A_SFP: case IXGBE_DEV_ID_X550EM_A_QSFP: @@ -2257,6 +2470,13 @@ STATIC void ixgbe_set_mdio_speed(struct ixgbe_hw *hw) hlreg0 &= ~IXGBE_HLREG0_MDCSPD; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); break; + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: + /* Select fast MDIO clock speed for these devices */ + hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); + hlreg0 |= IXGBE_HLREG0_MDCSPD; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); + break; default: break; } @@ -2282,9 +2502,10 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* Call adapter stop to disable Tx/Rx and clear interrupts */ status = hw->mac.ops.stop_adapter(hw); - if (status != IXGBE_SUCCESS) + if (status != IXGBE_SUCCESS) { + DEBUGOUT1("Failed to stop adapter, STATUS = %d\n", status); return status; - + } /* flush pending Tx transactions */ ixgbe_clear_tx_pending(hw); @@ -2293,14 +2514,23 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status) + DEBUGOUT1("Failed to initialize PHY ops, STATUS = %d\n", + status); + + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) { + DEBUGOUT("Returning from reset HW since PHY ops init returned IXGBE_ERR_SFP_NOT_SUPPORTED\n"); return status; + } /* start the external PHY */ if (hw->phy.type == ixgbe_phy_x550em_ext_t) { status = ixgbe_init_ext_t_x550em(hw); - if (status) + if (status) { + DEBUGOUT1("Failed to start the external PHY, STATUS = %d\n", + status); return status; + } } /* Setup SFP module if there is one present. */ @@ -2313,8 +2543,10 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) return status; /* Reset PHY */ - if (!hw->phy.reset_disable && hw->phy.ops.reset) - hw->phy.ops.reset(hw); + if (!hw->phy.reset_disable && hw->phy.ops.reset) { + if (hw->phy.ops.reset(hw) == IXGBE_ERR_OVERTEMP) + return IXGBE_ERR_OVERTEMP; + } mac_reset_top: /* Issue global reset to the MAC. Needs to be SW reset if link is up. @@ -2372,6 +2604,9 @@ mac_reset_top: if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP) ixgbe_setup_mux_ctl(hw); + if (status != IXGBE_SUCCESS) + DEBUGOUT1("Reset HW failed, STATUS = %d\n", status); + return status; } @@ -2421,12 +2656,11 @@ s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) /** * ixgbe_setup_kr_x550em - Configure the KR PHY. * @hw: pointer to hardware structure - * - * Configures the integrated KR PHY for X550EM_x. **/ s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw) { - if (hw->mac.type != ixgbe_mac_X550EM_x) + /* leave link alone for 2.5G */ + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL) return IXGBE_SUCCESS; return ixgbe_setup_kr_speed_x550em(hw, hw->phy.autoneg_advertised); @@ -2510,6 +2744,55 @@ s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, return ret_val; } +/** + * ixgbe_setup_sfi_x550a - Configure the internal PHY for native SFI mode + * @hw: pointer to hardware structure + * @speed: the link speed to force + * + * Configures the integrated PHY for native SFI mode. Used to connect the + * internal PHY directly to an SFP cage, without autonegotiation. + **/ +STATIC s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +{ + struct ixgbe_mac_info *mac = &hw->mac; + s32 status; + u32 reg_val; + + /* Disable all AN and force speed to 10G Serial. */ + status = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + if (status != IXGBE_SUCCESS) + return status; + + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + + /* Select forced link speed for internal PHY. */ + switch (*speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G; + break; + default: + /* Other link speeds are not supported by internal PHY. */ + return IXGBE_ERR_LINK_SETUP; + } + + status = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + + /* Toggle port SW reset by AN reset. */ + status = ixgbe_restart_an_internal_phy_x550em(hw); + + return status; +} + /** * ixgbe_setup_mac_link_sfp_x550a - Setup internal PHY for SFP * @hw: pointer to hardware structure @@ -2517,8 +2800,8 @@ s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, * Configure the the integrated PHY for SFP support. **/ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg_wait_to_complete) + ixgbe_link_speed speed, + bool autoneg_wait_to_complete) { s32 ret_val; u16 reg_phy_ext; @@ -2540,31 +2823,27 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, return ret_val; if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N) { - /* Configure internal PHY for native SFI */ + /* Configure internal PHY for native SFI based on module type */ ret_val = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_phy_int); + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_phy_int); if (ret_val != IXGBE_SUCCESS) return ret_val; - if (setup_linear) { - reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LIMITING; - reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LINEAR; - } else { - reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LIMITING; - reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LINEAR; - } + reg_phy_int &= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA; + if (!setup_linear) + reg_phy_int |= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR; ret_val = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int); + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int); if (ret_val != IXGBE_SUCCESS) return ret_val; - /* Setup XFI/SFI internal link. */ - ret_val = ixgbe_setup_ixfi_x550em(hw, &speed); + /* Setup SFI internal link. */ + ret_val = ixgbe_setup_sfi_x550a(hw, &speed); } else { /* Configure internal PHY for KR/KX. */ ixgbe_setup_kr_speed_x550em(hw, speed); @@ -2575,9 +2854,9 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, return IXGBE_ERR_PHY_ADDR_INVALID; } - /* Get external PHY device id */ - ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB, - IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); + /* Get external PHY SKU id */ + ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_EFUSE_PDF_SKU, + IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); if (ret_val != IXGBE_SUCCESS) return ret_val; @@ -2585,7 +2864,7 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, /* When configuring quad port CS4223, the MAC instance is part * of the slice offset. */ - if (reg_phy_ext == IXGBE_CS4223_PHY_ID) + if (reg_phy_ext == IXGBE_CS4223_SKU_ID) slice_offset = (hw->bus.lan_id + (hw->bus.instance_id << 1)) << 12; else @@ -2593,12 +2872,26 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, /* Configure CS4227/CS4223 LINE side to proper mode. */ reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + slice_offset; + + ret_val = hw->phy.ops.read_reg(hw, reg_slice, + IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); + + if (ret_val != IXGBE_SUCCESS) + return ret_val; + + reg_phy_ext &= ~((IXGBE_CS4227_EDC_MODE_CX1 << 1) | + (IXGBE_CS4227_EDC_MODE_SR << 1)); + if (setup_linear) reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1; else reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1; ret_val = hw->phy.ops.write_reg(hw, reg_slice, - IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext); + IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext); + + /* Flush previous write with a read */ + ret_val = hw->phy.ops.read_reg(hw, reg_slice, + IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); } return ret_val; } @@ -2611,24 +2904,25 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, **/ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) { + struct ixgbe_mac_info *mac = &hw->mac; s32 status; u32 reg_val; /* Disable training protocol FSM. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val |= IXGBE_KRM_RX_TRN_LINKUP_CTRL_CONV_WO_PROTOCOL; - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Disable Flex from training TXFFE. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) @@ -2636,12 +2930,12 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN; reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN; reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN; - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) @@ -2649,14 +2943,14 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN; reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN; reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN; - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Enable override for coefficients. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) @@ -2665,7 +2959,7 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CZERO_EN; reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CPLUS1_OVRRD_EN; reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CMINUS1_OVRRD_EN; - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); return status; @@ -2681,11 +2975,16 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) **/ STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) { + struct ixgbe_mac_info *mac = &hw->mac; s32 status; u32 reg_val; + /* iXFI is only supported with X552 */ + if (mac->type != ixgbe_mac_X550EM_x) + return IXGBE_ERR_LINK_SETUP; + /* Disable AN and force speed to 10G Serial. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) @@ -2707,7 +3006,7 @@ STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) return IXGBE_ERR_LINK_SETUP; } - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) @@ -2721,15 +3020,7 @@ STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) } /* Toggle port SW reset by AN reset. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - if (status != IXGBE_SUCCESS) - return status; - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; - status = ixgbe_write_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + status = ixgbe_restart_an_internal_phy_x550em(hw); return status; } @@ -2788,7 +3079,8 @@ s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { + if (hw->mac.type == ixgbe_mac_X550EM_x && + !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { /* If link is down, there is no setup necessary so return */ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); if (status != IXGBE_SUCCESS) @@ -2847,56 +3139,56 @@ s32 ixgbe_setup_phy_loopback_x550em(struct ixgbe_hw *hw) /* Disable AN and force speed to 10G Serial. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Set near-end loopback clocks. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val |= IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B; reg_val |= IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Set loopback enable. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val |= IXGBE_KRM_PMD_DFX_BURNIN_TX_RX_KR_LB_MASK; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Training bypass. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val |= IXGBE_KRM_RX_TRN_LINKUP_CTRL_PROTOCOL_BYPASS; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); return status; } @@ -2910,13 +3202,13 @@ s32 ixgbe_setup_phy_loopback_x550em(struct ixgbe_hw *hw) * * Reads a 16 bit word from the EEPROM using the hostif. **/ -s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset, - u16 *data) +s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data) { - s32 status; + const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM; struct ixgbe_hic_read_shadow_ram buffer; + s32 status; - DEBUGFUNC("ixgbe_read_ee_hostif_data_X550"); + DEBUGFUNC("ixgbe_read_ee_hostif_X550"); buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD; buffer.hdr.req.buf_lenh = 0; buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN; @@ -2927,42 +3219,18 @@ s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset, /* one word */ buffer.length = IXGBE_CPU_TO_BE16(sizeof(u16)); - status = ixgbe_host_interface_command(hw, (u32 *)&buffer, - sizeof(buffer), - IXGBE_HI_COMMAND_TIMEOUT, false); - + status = hw->mac.ops.acquire_swfw_sync(hw, mask); if (status) return status; - *data = (u16)IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, - FW_NVM_DATA_OFFSET); - - return 0; -} - -/** - * ixgbe_read_ee_hostif_X550 - Read EEPROM word using a host interface command - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to read - * @data: word read from the EEPROM - * - * Reads a 16 bit word from the EEPROM using the hostif. - **/ -s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, - u16 *data) -{ - s32 status = IXGBE_SUCCESS; - - DEBUGFUNC("ixgbe_read_ee_hostif_X550"); - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == - IXGBE_SUCCESS) { - status = ixgbe_read_ee_hostif_data_X550(hw, offset, data); - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - } else { - status = IXGBE_ERR_SWFW_SYNC; + status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer), + IXGBE_HI_COMMAND_TIMEOUT); + if (!status) { + *data = (u16)IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, + FW_NVM_DATA_OFFSET); } + hw->mac.ops.release_swfw_sync(hw, mask); return status; } @@ -2978,6 +3246,7 @@ s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, u16 offset, u16 words, u16 *data) { + const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM; struct ixgbe_hic_read_shadow_ram buffer; u32 current_word = 0; u16 words_to_read; @@ -2987,11 +3256,12 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, DEBUGFUNC("ixgbe_read_ee_hostif_buffer_X550"); /* Take semaphore for the entire operation. */ - status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + status = hw->mac.ops.acquire_swfw_sync(hw, mask); if (status) { DEBUGOUT("EEPROM read buffer - semaphore failed\n"); return status; } + while (words) { if (words > FW_MAX_READ_BUFFER_SIZE / 2) words_to_read = FW_MAX_READ_BUFFER_SIZE / 2; @@ -3007,10 +3277,8 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, buffer.address = IXGBE_CPU_TO_BE32((offset + current_word) * 2); buffer.length = IXGBE_CPU_TO_BE16(words_to_read * 2); - status = ixgbe_host_interface_command(hw, (u32 *)&buffer, - sizeof(buffer), - IXGBE_HI_COMMAND_TIMEOUT, - false); + status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer), + IXGBE_HI_COMMAND_TIMEOUT); if (status) { DEBUGOUT("Host interface command failed\n"); @@ -3035,7 +3303,7 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, } out: - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + hw->mac.ops.release_swfw_sync(hw, mask); return status; } @@ -3439,6 +3707,7 @@ u32 ixgbe_get_supported_physical_layer_X550em(struct ixgbe_hw *hw) switch (hw->phy.type) { case ixgbe_phy_x550em_kr: + case ixgbe_phy_x550em_xfi: physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR | IXGBE_PHYSICAL_LAYER_1000BASE_KX; break; @@ -3455,6 +3724,20 @@ u32 ixgbe_get_supported_physical_layer_X550em(struct ixgbe_hw *hw) if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; break; + case ixgbe_phy_fw: + if (hw->phy.speeds_supported & IXGBE_LINK_SPEED_1GB_FULL) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + if (hw->phy.speeds_supported & IXGBE_LINK_SPEED_100_FULL) + physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; + if (hw->phy.speeds_supported & IXGBE_LINK_SPEED_10_FULL) + physical_layer |= IXGBE_PHYSICAL_LAYER_10BASE_T; + break; + case ixgbe_phy_sgmii: + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX; + break; + case ixgbe_phy_ext_1g_t: + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + break; default: break; } @@ -3737,19 +4020,19 @@ s32 ixgbe_setup_fc_X550em(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_A_KR: case IXGBE_DEV_ID_X550EM_A_KR_L: ret_val = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (ret_val != IXGBE_SUCCESS) goto out; reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | - IXGBE_KRM_AN_CNTL_1_ASM_PAUSE); + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE); if (pause) reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE; if (asm_dir) reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; ret_val = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); /* This device does not fully support AN. */ hw->fc.disable_fc_autoneg = true; @@ -3763,12 +4046,12 @@ out: } /** - * ixgbe_fc_autoneg_x550a - Enable flow control IEEE clause 37 + * ixgbe_fc_autoneg_backplane_x550em_a - Enable flow control IEEE clause 37 * @hw: pointer to hardware structure * * Enable flow control according to IEEE clause 37. **/ -void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) +void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) { u32 link_s1, lp_an_page_low, an_cntl_1; s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; @@ -3782,7 +4065,7 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) */ if (hw->fc.disable_fc_autoneg) { ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED, - "Flow control autoneg is disabled"); + "Flow control autoneg is disabled"); goto out; } @@ -3794,12 +4077,13 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) /* Check at auto-negotiation has completed */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_S1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1); + IXGBE_KRM_LINK_S1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1); if (status != IXGBE_SUCCESS || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) { DEBUGOUT("Auto-Negotiation did not complete\n"); + status = IXGBE_ERR_FC_NOT_NEGOTIATED; goto out; } @@ -3807,8 +4091,8 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) * local flow control settings accordingly */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1); if (status != IXGBE_SUCCESS) { DEBUGOUT("Auto-Negotiation did not complete\n"); @@ -3816,8 +4100,8 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) } status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low); + IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low); if (status != IXGBE_SUCCESS) { DEBUGOUT("Auto-Negotiation did not complete\n"); @@ -3840,22 +4124,88 @@ out: } /** - * ixgbe_setup_fc_x550em - Set up flow control + * ixgbe_fc_autoneg_fiber_x550em_a - passthrough FC settings + * @hw: pointer to hardware structure + * + **/ +void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw) +{ + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; +} + +/** + * ixgbe_fc_autoneg_sgmii_x550em_a - Enable flow control IEEE clause 37 + * @hw: pointer to hardware structure + * + * Enable flow control according to IEEE clause 37. + **/ +void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; + u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 }; + ixgbe_link_speed speed; + bool link_up; + + /* AN should have completed when the cable was plugged in. + * Look for reasons to bail out. Bail out if: + * - FC autoneg is disabled, or if + * - link is not up. + */ + if (hw->fc.disable_fc_autoneg) { + ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED, + "Flow control autoneg is disabled"); + goto out; + } + + hw->mac.ops.check_link(hw, &speed, &link_up, false); + if (!link_up) { + ERROR_REPORT1(IXGBE_ERROR_SOFTWARE, "The link is down"); + goto out; + } + + /* Check if auto-negotiation has completed */ + status = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &info); + if (status != IXGBE_SUCCESS || + !(info[0] & FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE)) { + DEBUGOUT("Auto-Negotiation did not complete\n"); + status = IXGBE_ERR_FC_NOT_NEGOTIATED; + goto out; + } + + /* Negotiate the flow control */ + status = ixgbe_negotiate_fc(hw, info[0], info[0], + FW_PHY_ACT_GET_LINK_INFO_FC_RX, + FW_PHY_ACT_GET_LINK_INFO_FC_TX, + FW_PHY_ACT_GET_LINK_INFO_LP_FC_RX, + FW_PHY_ACT_GET_LINK_INFO_LP_FC_TX); + +out: + if (status == IXGBE_SUCCESS) { + hw->fc.fc_was_autonegged = true; + } else { + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; + } +} + +/** + * ixgbe_setup_fc_backplane_x550em_a - Set up flow control * @hw: pointer to hardware structure * * Called at init time to set up flow control. **/ -s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw) +s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) { s32 status = IXGBE_SUCCESS; - u32 an_cntl, link_ctrl = 0; + u32 an_cntl = 0; - DEBUGFUNC("ixgbe_setup_fc_x550em"); + DEBUGFUNC("ixgbe_setup_fc_backplane_x550em_a"); /* Validate the requested mode */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED, - "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); + "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); return IXGBE_ERR_INVALID_LINK_SETTINGS; } @@ -3867,8 +4217,8 @@ s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw) * we link at 10G, the 1G advertisement is harmless and vice versa. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl); if (status != IXGBE_SUCCESS) { DEBUGOUT("Auto-Negotiation did not complete\n"); @@ -3909,7 +4259,7 @@ s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw) case ixgbe_fc_full: /* Flow control (both Rx and Tx) is enabled by SW override. */ an_cntl |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | - IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; break; default: ERROR_REPORT1(IXGBE_ERROR_ARGUMENT, @@ -3918,23 +4268,11 @@ s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw) } status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl); /* Restart auto-negotiation. */ - status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl); - - if (status != IXGBE_SUCCESS) { - DEBUGOUT("Auto-Negotiation did not complete\n"); - return status; - } - - link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; - status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl); + status = ixgbe_restart_an_internal_phy_x550em(hw); return status; } @@ -4018,22 +4356,34 @@ STATIC s32 ixgbe_acquire_swfw_sync_X550a(struct ixgbe_hw *hw, u32 mask) status = IXGBE_SUCCESS; if (hmask) status = ixgbe_acquire_swfw_sync_X540(hw, hmask); - if (status) + if (status) { + DEBUGOUT1("Could not acquire SWFW semaphore, Status = %d\n", + status); return status; + } if (!(mask & IXGBE_GSSR_TOKEN_SM)) return IXGBE_SUCCESS; status = ixgbe_get_phy_token(hw); + if (status == IXGBE_ERR_TOKEN_RETRY) + DEBUGOUT1("Could not acquire PHY token, Status = %d\n", + status); + if (status == IXGBE_SUCCESS) return IXGBE_SUCCESS; if (hmask) ixgbe_release_swfw_sync_X540(hw, hmask); - if (status != IXGBE_ERR_TOKEN_RETRY) + + if (status != IXGBE_ERR_TOKEN_RETRY) { + DEBUGOUT1("Unable to retry acquiring the PHY token, Status = %d\n", + status); return status; - msec_delay(FW_PHY_TOKEN_DELAY); + } } + DEBUGOUT1("Semaphore acquisition retries failed!: PHY ID = 0x%08X\n", + hw->phy.id); return status; } @@ -4068,7 +4418,7 @@ STATIC void ixgbe_release_swfw_sync_X550a(struct ixgbe_hw *hw, u32 mask) * instances. **/ s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 *phy_data) + u32 device_type, u16 *phy_data) { s32 status; u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM; @@ -4096,7 +4446,7 @@ s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, * The PHY Token is needed since the MDIO is shared between to MAC instances. **/ s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 phy_data) + u32 device_type, u16 phy_data) { s32 status; u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM; @@ -4104,7 +4454,7 @@ s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, DEBUGFUNC("ixgbe_write_phy_reg_x550a"); if (hw->mac.ops.acquire_swfw_sync(hw, mask) == IXGBE_SUCCESS) { - status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type, + status = hw->phy.ops.write_reg_mdi(hw, reg_addr, device_type, phy_data); hw->mac.ops.release_swfw_sync(hw, mask); } else { @@ -4169,8 +4519,10 @@ s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw, else force_speed = IXGBE_LINK_SPEED_1GB_FULL; - /* If internal link mode is XFI, then setup XFI internal link. */ - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { + /* If X552 and internal link mode is XFI, then setup XFI internal link. + */ + if (hw->mac.type == ixgbe_mac_X550EM_x && + !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { status = ixgbe_setup_ixfi_x550em(hw, &force_speed); if (status != IXGBE_SUCCESS) @@ -4193,7 +4545,7 @@ s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed, bool *link_up, bool link_up_wait_to_complete) { u32 status; - u16 autoneg_status; + u16 i, autoneg_status = 0; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; @@ -4206,21 +4558,18 @@ s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed, return status; /* MAC link is up, so check external PHY link. - * Read this twice back to back to indicate current status. + * X557 PHY. Link status is latching low, and can only be used to detect + * link drop, and not the current status of the link without performing + * back-to-back reads. */ - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_status); - - if (status != IXGBE_SUCCESS) - return status; - - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_status); + for (i = 0; i < 2; i++) { + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_status); - if (status != IXGBE_SUCCESS) - return status; + if (status != IXGBE_SUCCESS) + return status; + } /* If external PHY link is not up, then indicate link not up */ if (!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS)) @@ -4294,3 +4643,63 @@ s32 ixgbe_led_off_t_X550em(struct ixgbe_hw *hw, u32 led_idx) return IXGBE_SUCCESS; } +/** + * ixgbe_set_fw_drv_ver_x550 - Sends driver version to firmware + * @hw: pointer to the HW structure + * @maj: driver version major number + * @min: driver version minor number + * @build: driver version build number + * @sub: driver version sub build number + * @len: length of driver_ver string + * @driver_ver: driver string + * + * Sends driver version number to firmware through the manageability + * block. On success return IXGBE_SUCCESS + * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring + * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + **/ +s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 sub, u16 len, const char *driver_ver) +{ + struct ixgbe_hic_drv_info2 fw_cmd; + s32 ret_val = IXGBE_SUCCESS; + int i; + + DEBUGFUNC("ixgbe_set_fw_drv_ver_x550"); + + if ((len == 0) || (driver_ver == NULL) || + (len > sizeof(fw_cmd.driver_string))) + return IXGBE_ERR_INVALID_ARGUMENT; + + fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; + fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN + len; + fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED; + fw_cmd.port_num = (u8)hw->bus.func; + fw_cmd.ver_maj = maj; + fw_cmd.ver_min = min; + fw_cmd.ver_build = build; + fw_cmd.ver_sub = sub; + fw_cmd.hdr.checksum = 0; + memcpy(fw_cmd.driver_string, driver_ver, len); + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); + + for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { + ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd, + sizeof(fw_cmd), + IXGBE_HI_COMMAND_TIMEOUT, + true); + if (ret_val != IXGBE_SUCCESS) + continue; + + if (fw_cmd.hdr.cmd_or_resp.ret_status == + FW_CEM_RESP_STATUS_SUCCESS) + ret_val = IXGBE_SUCCESS; + else + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + + break; + } + + return ret_val; +} diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h index 27d5d02f..30ca5df1 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h @@ -36,49 +36,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "ixgbe_type.h" -/* More phy definitions */ -#define IXGBE_M88E1500_COPPER_CTRL 0x0/* Page 0 reg */ -#define IXGBE_M88E1500_COPPER_CTRL_RESET 0x8000 -#define IXGBE_M88E1500_COPPER_CTRL_AN_EN 0x1000 -#define IXGBE_M88E1500_COPPER_CTRL_RESTART_AN 0x0200 -#define IXGBE_M88E1500_COPPER_CTRL_FULL_DUPLEX 0x0100 -#define IXGBE_M88E1500_COPPER_CTRL_SPEED_MSB 0x0040 -#define IXGBE_M88E1500_1000T_CTRL 0x09 /* 1000Base-T Ctrl Reg */ -/* 1=Configure PHY as Master 0=Configure PHY as Slave */ -#define IXGBE_M88E1500_1000T_CTRL_MS_VALUE 0x0800 -/* 1=Master/Slave manual config value 0=Automatic Master/Slave config */ -#define IXGBE_M88E1500_1000T_CTRL_MS_ENABLE 0x1000 -#define IXGBE_M88E1500_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ -#define IXGBE_M88E1500_AUTO_COPPER_SGMII 0x2 -#define IXGBE_M88E1500_AUTO_COPPER_BASEX 0x3 -#define IXGBE_M88E1500_STATUS_LINK 0x0004 /* Interface Link Bit */ -#define IXGBE_M88E1500_MAC_CTRL_1 0x10 -#define IXGBE_M88E1500_MAC_CTRL_1_MODE_MASK 0x0380 /* Mode Select */ -#define IXGBE_M88E1500_CFG_REG_1 0x0010 -#define IXGBE_M88E1500_CFG_REG_2 0x0011 -#define IXGBE_M88E1500_CFG_REG_3 0x0007 -#define IXGBE_M88E1500_MODE 0x0014 -#define IXGBE_M88E1500_PAGE_ADDR 0x16/* Page Offset reg */ -#define IXGBE_M88E1500_FIBER_CTRL 0x0/* Page 1 reg */ -#define IXGBE_M88E1500_FIBER_CTRL_RESET 0x8000 -#define IXGBE_M88E1500_FIBER_CTRL_SPEED_LSB 0x2000 -#define IXGBE_M88E1500_FIBER_CTRL_POWER_DOWN 0x0800 -#define IXGBE_M88E1500_FIBER_CTRL_DUPLEX_FULL 0x0100 -#define IXGBE_M88E1500_FIBER_CTRL_SPEED_MSB 0x0040 -#define IXGBE_M88E1500_EEE_CTRL_1 0x0/* Page 18 reg */ -#define IXGBE_M88E1500_EEE_CTRL_1_MS 0x0001/* EEE Master/Slave */ -#define IXGBE_M88E1500_GEN_CTRL 0x14/* Page 18 reg */ -#define IXGBE_M88E1500_GEN_CTRL_RESET 0x8000 -#define IXGBE_M88E1500_GEN_CTRL_SGMII_COPPER 0x0001/* Mode bits 0-2 */ - -/* M88E1500 Specific Registers */ -#define IXGBE_M88E1500_PHY_SPEC_CTRL 0x10 /* PHY Specific Ctrl Reg */ -#define IXGBE_M88E1500_PHY_SPEC_STATUS 0x11 /* PHY Specific Stat Reg */ - -#define IXGBE_M88E1500_PSCR_DOWNSHIFT_ENABLE 0x0800 -#define IXGBE_M88E1500_PSCR_DOWNSHIFT_MASK 0x7000 -#define IXGBE_M88E1500_PSCR_DOWNSHIFT_6X 0x5000 - s32 ixgbe_dmac_config_X550(struct ixgbe_hw *hw); s32 ixgbe_dmac_config_tcs_X550(struct ixgbe_hw *hw); s32 ixgbe_dmac_update_tcs_X550(struct ixgbe_hw *hw); @@ -98,12 +55,8 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, u16 offset, u16 words, u16 *data); s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data); -s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset, - u16 *data); s32 ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset, u16 data); -s32 ixgbe_set_eee_X550(struct ixgbe_hw *hw, bool enable_eee); -s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee); void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw, bool enable, unsigned int pool); void ixgbe_set_ethertype_anti_spoofing_X550(struct ixgbe_hw *hw, @@ -112,6 +65,8 @@ s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 data); s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 *data); +s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 ver, u16 len, const char *str); s32 ixgbe_get_phy_token(struct ixgbe_hw *); s32 ixgbe_put_phy_token(struct ixgbe_hw *); s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, @@ -144,14 +99,18 @@ s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, ixgbe_link_speed speed, bool autoneg_wait_to_complete); s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg_wait_to_complete); + ixgbe_link_speed speed, + bool autoneg_wait_to_complete); s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 *phy_data); + u32 device_type, u16 *phy_data); s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 phy_data); -s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw); -void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw); + u32 device_type, u16 phy_data); +s32 ixgbe_setup_fc_fiber_x550em_a(struct ixgbe_hw *hw); +s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw); +s32 ixgbe_setup_fc_sgmii_x550em_a(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw); s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw); s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed speed, diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c b/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c index 72963a89..5b625a3d 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,6 +60,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -72,6 +73,8 @@ #include "base/ixgbe_phy.h" #include "ixgbe_regs.h" +#include "rte_pmd_ixgbe.h" + /* * High threshold controlling when to start sending XOFF frames. Must be at * least 8 bytes less than receive packet buffer size. This value is in units @@ -163,6 +166,11 @@ enum ixgbevf_xcast_modes { static int eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev); static int eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev); +static int ixgbe_fdir_filter_init(struct rte_eth_dev *eth_dev); +static int ixgbe_fdir_filter_uninit(struct rte_eth_dev *eth_dev); +static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev); +static int ixgbe_l2_tn_filter_uninit(struct rte_eth_dev *eth_dev); +static int ixgbe_ntuple_filter_uninit(struct rte_eth_dev *eth_dev); static int ixgbe_dev_configure(struct rte_eth_dev *dev); static int ixgbe_dev_start(struct rte_eth_dev *dev); static void ixgbe_dev_stop(struct rte_eth_dev *dev); @@ -191,6 +199,8 @@ static int ixgbe_dev_queue_stats_mapping_set(struct rte_eth_dev *eth_dev, uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx); +static int ixgbe_fw_version_get(struct rte_eth_dev *dev, char *fw_version, + size_t fw_size); static void ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info); static const uint32_t *ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev); @@ -229,9 +239,11 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev, uint16_t reta_size); static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev); static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev); +static int ixgbe_dev_macsec_interrupt_setup(struct rte_eth_dev *dev); static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev); static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev); -static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev); +static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *handle); static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle, void *param); static void ixgbe_dev_interrupt_delayed_handler(void *param); @@ -241,6 +253,7 @@ static void ixgbe_remove_rar(struct rte_eth_dev *dev, uint32_t index); static void ixgbe_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr); static void ixgbe_dcb_init(struct ixgbe_hw *hw, struct ixgbe_dcb_config *dcb_config); +static int is_ixgbe_pmd(const char *driver_name); /* For Virtual Function support */ static int eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev); @@ -274,12 +287,6 @@ static void ixgbevf_dev_allmulticast_disable(struct rte_eth_dev *dev); static int ixgbe_uc_hash_table_set(struct rte_eth_dev *dev, struct ether_addr * mac_addr, uint8_t on); static int ixgbe_uc_all_hash_table_set(struct rte_eth_dev *dev, uint8_t on); -static int ixgbe_set_pool_rx_mode(struct rte_eth_dev *dev, uint16_t pool, - uint16_t rx_mask, uint8_t on); -static int ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on); -static int ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on); -static int ixgbe_set_pool_vlan_filter(struct rte_eth_dev *dev, uint16_t vlan, - uint64_t pool_mask, uint8_t vlan_on); static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, uint8_t on); @@ -295,8 +302,6 @@ static void ixgbe_configure_msix(struct rte_eth_dev *dev); static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t tx_rate); -static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf, - uint16_t tx_rate, uint64_t q_msk); static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr, @@ -304,9 +309,6 @@ static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev, static void ixgbevf_remove_mac_addr(struct rte_eth_dev *dev, uint32_t index); static void ixgbevf_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr); -static int ixgbe_syn_filter_set(struct rte_eth_dev *dev, - struct rte_eth_syn_filter *filter, - bool add); static int ixgbe_syn_filter_get(struct rte_eth_dev *dev, struct rte_eth_syn_filter *filter); static int ixgbe_syn_filter_handle(struct rte_eth_dev *dev, @@ -316,17 +318,11 @@ static int ixgbe_add_5tuple_filter(struct rte_eth_dev *dev, struct ixgbe_5tuple_filter *filter); static void ixgbe_remove_5tuple_filter(struct rte_eth_dev *dev, struct ixgbe_5tuple_filter *filter); -static int ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev, - struct rte_eth_ntuple_filter *filter, - bool add); static int ixgbe_ntuple_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg); static int ixgbe_get_ntuple_filter(struct rte_eth_dev *dev, struct rte_eth_ntuple_filter *filter); -static int ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, - struct rte_eth_ethertype_filter *filter, - bool add); static int ixgbe_ethertype_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg); @@ -387,6 +383,8 @@ static int ixgbe_dev_udp_tunnel_port_add(struct rte_eth_dev *dev, struct rte_eth_udp_tunnel *udp_tunnel); static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, struct rte_eth_udp_tunnel *udp_tunnel); +static int ixgbe_filter_restore(struct rte_eth_dev *dev); +static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev); /* * Define VF Stats MACRO for Non "cleared on read" register @@ -429,23 +427,80 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, * The set of PCI devices this driver supports */ static const struct rte_pci_id pci_id_ixgbe_map[] = { - -#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, -#include "rte_pci_dev_ids.h" - -{ .vendor_id = 0, /* sentinel */ }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_BX) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KR) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_RNDC) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_560FLR) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_ECNA_DP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_EM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_LS) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_1G_T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR_L) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP_N) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII_L) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_10G_T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_QSFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_QSFP_N) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T_L) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR) }, +#ifdef RTE_NIC_BYPASS + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BYPASS) }, +#endif + { .vendor_id = 0, /* sentinel */ }, }; - /* * The set of PCI devices this driver supports (for 82599 VF) */ static const struct rte_pci_id pci_id_ixgbevf_map[] = { - -#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, -#include "rte_pci_dev_ids.h" -{ .vendor_id = 0, /* sentinel */ }, - + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF_HV) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF_HV) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF_HV) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF_HV) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF_HV) }, + { .vendor_id = 0, /* sentinel */ }, }; static const struct rte_eth_desc_lim rx_desc_lim = { @@ -458,6 +513,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = { .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -478,6 +535,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = { .xstats_reset = ixgbe_dev_xstats_reset, .xstats_get_names = ixgbe_dev_xstats_get_names, .queue_stats_mapping_set = ixgbe_dev_queue_stats_mapping_set, + .fw_version_get = ixgbe_fw_version_get, .dev_infos_get = ixgbe_dev_info_get, .dev_supported_ptypes_get = ixgbe_dev_supported_ptypes_get, .mtu_set = ixgbe_dev_mtu_set, @@ -509,12 +567,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = { .uc_all_hash_table_set = ixgbe_uc_all_hash_table_set, .mirror_rule_set = ixgbe_mirror_rule_set, .mirror_rule_reset = ixgbe_mirror_rule_reset, - .set_vf_rx_mode = ixgbe_set_pool_rx_mode, - .set_vf_rx = ixgbe_set_pool_rx, - .set_vf_tx = ixgbe_set_pool_tx, - .set_vf_vlan_filter = ixgbe_set_pool_vlan_filter, .set_queue_rate_limit = ixgbe_set_queue_rate_limit, - .set_vf_rate_limit = ixgbe_set_vf_rate_limit, .reta_update = ixgbe_dev_rss_reta_update, .reta_query = ixgbe_dev_rss_reta_query, #ifdef RTE_NIC_BYPASS @@ -685,6 +738,51 @@ static const struct rte_ixgbe_xstats_name_off rte_ixgbe_stats_strings[] = { #define IXGBE_NB_HW_STATS (sizeof(rte_ixgbe_stats_strings) / \ sizeof(rte_ixgbe_stats_strings[0])) +/* MACsec statistics */ +static const struct rte_ixgbe_xstats_name_off rte_ixgbe_macsec_strings[] = { + {"out_pkts_untagged", offsetof(struct ixgbe_macsec_stats, + out_pkts_untagged)}, + {"out_pkts_encrypted", offsetof(struct ixgbe_macsec_stats, + out_pkts_encrypted)}, + {"out_pkts_protected", offsetof(struct ixgbe_macsec_stats, + out_pkts_protected)}, + {"out_octets_encrypted", offsetof(struct ixgbe_macsec_stats, + out_octets_encrypted)}, + {"out_octets_protected", offsetof(struct ixgbe_macsec_stats, + out_octets_protected)}, + {"in_pkts_untagged", offsetof(struct ixgbe_macsec_stats, + in_pkts_untagged)}, + {"in_pkts_badtag", offsetof(struct ixgbe_macsec_stats, + in_pkts_badtag)}, + {"in_pkts_nosci", offsetof(struct ixgbe_macsec_stats, + in_pkts_nosci)}, + {"in_pkts_unknownsci", offsetof(struct ixgbe_macsec_stats, + in_pkts_unknownsci)}, + {"in_octets_decrypted", offsetof(struct ixgbe_macsec_stats, + in_octets_decrypted)}, + {"in_octets_validated", offsetof(struct ixgbe_macsec_stats, + in_octets_validated)}, + {"in_pkts_unchecked", offsetof(struct ixgbe_macsec_stats, + in_pkts_unchecked)}, + {"in_pkts_delayed", offsetof(struct ixgbe_macsec_stats, + in_pkts_delayed)}, + {"in_pkts_late", offsetof(struct ixgbe_macsec_stats, + in_pkts_late)}, + {"in_pkts_ok", offsetof(struct ixgbe_macsec_stats, + in_pkts_ok)}, + {"in_pkts_invalid", offsetof(struct ixgbe_macsec_stats, + in_pkts_invalid)}, + {"in_pkts_notvalid", offsetof(struct ixgbe_macsec_stats, + in_pkts_notvalid)}, + {"in_pkts_unusedsa", offsetof(struct ixgbe_macsec_stats, + in_pkts_unusedsa)}, + {"in_pkts_notusingsa", offsetof(struct ixgbe_macsec_stats, + in_pkts_notusingsa)}, +}; + +#define IXGBE_NB_MACSEC_STATS (sizeof(rte_ixgbe_macsec_strings) / \ + sizeof(rte_ixgbe_macsec_strings[0])) + /* Per-queue statistics */ static const struct rte_ixgbe_xstats_name_off rte_ixgbe_rxq_strings[] = { {"mbuf_allocation_errors", offsetof(struct ixgbe_hw_stats, rnbc)}, @@ -800,6 +898,8 @@ ixgbe_pf_reset_hw(struct ixgbe_hw *hw) IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); IXGBE_WRITE_FLUSH(hw); + if (status == IXGBE_ERR_SFP_NOT_PRESENT) + status = IXGBE_SUCCESS; return status; } @@ -1024,7 +1124,8 @@ ixgbe_swfw_lock_reset(struct ixgbe_hw *hw) static int eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); struct ixgbe_vfta *shadow_vfta = @@ -1044,6 +1145,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary @@ -1068,9 +1170,9 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) return 0; } - pci_dev = eth_dev->pci_dev; rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; /* Vendor and Device ID need to be set before init of shared code */ hw->device_id = pci_dev->id.device_id; @@ -1137,6 +1239,9 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) diag = ixgbe_init_hw(hw); } + if (diag == IXGBE_ERR_SFP_NOT_PRESENT) + diag = IXGBE_SUCCESS; + if (diag == IXGBE_ERR_EEPROM_VERSION) { PMD_INIT_LOG(ERR, "This device is a pre-production adapter/" "LOM. Please be aware there may be issues associated " @@ -1213,20 +1318,34 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); - rte_intr_callback_register(&pci_dev->intr_handle, - ixgbe_dev_interrupt_handler, - (void *)eth_dev); + rte_intr_callback_register(intr_handle, + ixgbe_dev_interrupt_handler, eth_dev); /* enable uio/vfio intr/eventfd mapping */ - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_enable(intr_handle); /* enable support intr */ ixgbe_enable_intr(eth_dev); + /* initialize filter info */ + memset(filter_info, 0, + sizeof(struct ixgbe_filter_info)); + /* initialize 5tuple filter list */ TAILQ_INIT(&filter_info->fivetuple_list); - memset(filter_info->fivetuple_mask, 0, - sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE); + + /* initialize flow director filter list & hash */ + ixgbe_fdir_filter_init(eth_dev); + + /* initialize l2 tunnel filter list & hash */ + ixgbe_l2_tn_filter_init(eth_dev); + + TAILQ_INIT(&filter_ntuple_list); + TAILQ_INIT(&filter_ethertype_list); + TAILQ_INIT(&filter_syn_list); + TAILQ_INIT(&filter_fdir_list); + TAILQ_INIT(&filter_l2_tunnel_list); + TAILQ_INIT(&ixgbe_flow_list); return 0; } @@ -1234,7 +1353,8 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) static int eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw; PMD_INIT_FUNC_TRACE(); @@ -1243,7 +1363,6 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) return -EPERM; hw = IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); - pci_dev = eth_dev->pci_dev; if (hw->adapter_stopped == 0) ixgbe_dev_close(eth_dev); @@ -1256,9 +1375,9 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) ixgbe_swfw_lock_reset(hw); /* disable uio intr before callback unregister */ - rte_intr_disable(&(pci_dev->intr_handle)); - rte_intr_callback_unregister(&(pci_dev->intr_handle), - ixgbe_dev_interrupt_handler, (void *)eth_dev); + rte_intr_disable(intr_handle); + rte_intr_callback_unregister(intr_handle, + ixgbe_dev_interrupt_handler, eth_dev); /* uninitialize PF if max_vfs not zero */ ixgbe_pf_host_uninit(eth_dev); @@ -1269,9 +1388,154 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) rte_free(eth_dev->data->hash_mac_addrs); eth_dev->data->hash_mac_addrs = NULL; + /* remove all the fdir filters & hash */ + ixgbe_fdir_filter_uninit(eth_dev); + + /* remove all the L2 tunnel filters & hash */ + ixgbe_l2_tn_filter_uninit(eth_dev); + + /* Remove all ntuple filters of the device */ + ixgbe_ntuple_filter_uninit(eth_dev); + + /* clear all the filters list */ + ixgbe_filterlist_flush(); + + return 0; +} + +static int ixgbe_ntuple_filter_uninit(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(eth_dev->data->dev_private); + struct ixgbe_5tuple_filter *p_5tuple; + + while ((p_5tuple = TAILQ_FIRST(&filter_info->fivetuple_list))) { + TAILQ_REMOVE(&filter_info->fivetuple_list, + p_5tuple, + entries); + rte_free(p_5tuple); + } + memset(filter_info->fivetuple_mask, 0, + sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE); + + return 0; +} + +static int ixgbe_fdir_filter_uninit(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(eth_dev->data->dev_private); + struct ixgbe_fdir_filter *fdir_filter; + + if (fdir_info->hash_map) + rte_free(fdir_info->hash_map); + if (fdir_info->hash_handle) + rte_hash_free(fdir_info->hash_handle); + + while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) { + TAILQ_REMOVE(&fdir_info->fdir_list, + fdir_filter, + entries); + rte_free(fdir_filter); + } + + return 0; +} + +static int ixgbe_l2_tn_filter_uninit(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(eth_dev->data->dev_private); + struct ixgbe_l2_tn_filter *l2_tn_filter; + + if (l2_tn_info->hash_map) + rte_free(l2_tn_info->hash_map); + if (l2_tn_info->hash_handle) + rte_hash_free(l2_tn_info->hash_handle); + + while ((l2_tn_filter = TAILQ_FIRST(&l2_tn_info->l2_tn_list))) { + TAILQ_REMOVE(&l2_tn_info->l2_tn_list, + l2_tn_filter, + entries); + rte_free(l2_tn_filter); + } + + return 0; +} + +static int ixgbe_fdir_filter_init(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(eth_dev->data->dev_private); + char fdir_hash_name[RTE_HASH_NAMESIZE]; + struct rte_hash_parameters fdir_hash_params = { + .name = fdir_hash_name, + .entries = IXGBE_MAX_FDIR_FILTER_NUM, + .key_len = sizeof(union ixgbe_atr_input), + .hash_func = rte_hash_crc, + .hash_func_init_val = 0, + .socket_id = rte_socket_id(), + }; + + TAILQ_INIT(&fdir_info->fdir_list); + snprintf(fdir_hash_name, RTE_HASH_NAMESIZE, + "fdir_%s", eth_dev->data->name); + fdir_info->hash_handle = rte_hash_create(&fdir_hash_params); + if (!fdir_info->hash_handle) { + PMD_INIT_LOG(ERR, "Failed to create fdir hash table!"); + return -EINVAL; + } + fdir_info->hash_map = rte_zmalloc("ixgbe", + sizeof(struct ixgbe_fdir_filter *) * + IXGBE_MAX_FDIR_FILTER_NUM, + 0); + if (!fdir_info->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for fdir hash map!"); + return -ENOMEM; + } + fdir_info->mask_added = FALSE; + return 0; } +static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(eth_dev->data->dev_private); + char l2_tn_hash_name[RTE_HASH_NAMESIZE]; + struct rte_hash_parameters l2_tn_hash_params = { + .name = l2_tn_hash_name, + .entries = IXGBE_MAX_L2_TN_FILTER_NUM, + .key_len = sizeof(struct ixgbe_l2_tn_key), + .hash_func = rte_hash_crc, + .hash_func_init_val = 0, + .socket_id = rte_socket_id(), + }; + + TAILQ_INIT(&l2_tn_info->l2_tn_list); + snprintf(l2_tn_hash_name, RTE_HASH_NAMESIZE, + "l2_tn_%s", eth_dev->data->name); + l2_tn_info->hash_handle = rte_hash_create(&l2_tn_hash_params); + if (!l2_tn_info->hash_handle) { + PMD_INIT_LOG(ERR, "Failed to create L2 TN hash table!"); + return -EINVAL; + } + l2_tn_info->hash_map = rte_zmalloc("ixgbe", + sizeof(struct ixgbe_l2_tn_filter *) * + IXGBE_MAX_L2_TN_FILTER_NUM, + 0); + if (!l2_tn_info->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for L2 TN hash map!"); + return -ENOMEM; + } + l2_tn_info->e_tag_en = FALSE; + l2_tn_info->e_tag_fwd_en = FALSE; + l2_tn_info->e_tag_ether_type = DEFAULT_ETAG_ETYPE; + + return 0; +} /* * Negotiate mailbox API version with the PF. * After reset API version is always set to the basic one (ixgbe_mbox_api_10). @@ -1322,7 +1586,8 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) { int diag; uint32_t tc, tcs; - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); struct ixgbe_vfta *shadow_vfta = @@ -1360,9 +1625,8 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) return 0; } - pci_dev = eth_dev->pci_dev; - rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; hw->device_id = pci_dev->id.device_id; hw->vendor_id = pci_dev->id.vendor_id; @@ -1454,10 +1718,9 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) return -EIO; } - rte_intr_callback_register(&pci_dev->intr_handle, - ixgbevf_dev_interrupt_handler, - (void *)eth_dev); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_callback_register(intr_handle, + ixgbevf_dev_interrupt_handler, eth_dev); + rte_intr_enable(intr_handle); ixgbevf_intr_enable(hw); PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x mac.type=%s", @@ -1472,8 +1735,9 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) static int eth_ixgbevf_dev_uninit(struct rte_eth_dev *eth_dev) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw; - struct rte_pci_device *pci_dev = eth_dev->pci_dev; PMD_INIT_FUNC_TRACE(); @@ -1495,20 +1759,19 @@ eth_ixgbevf_dev_uninit(struct rte_eth_dev *eth_dev) rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; - rte_intr_disable(&pci_dev->intr_handle); - rte_intr_callback_unregister(&pci_dev->intr_handle, - ixgbevf_dev_interrupt_handler, - (void *)eth_dev); + rte_intr_disable(intr_handle); + rte_intr_callback_unregister(intr_handle, + ixgbevf_dev_interrupt_handler, eth_dev); return 0; } static struct eth_driver rte_ixgbe_pmd = { .pci_drv = { - .name = "rte_ixgbe_pmd", .id_table = pci_id_ixgbe_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_ixgbe_dev_init, .eth_dev_uninit = eth_ixgbe_dev_uninit, @@ -1520,43 +1783,16 @@ static struct eth_driver rte_ixgbe_pmd = { */ static struct eth_driver rte_ixgbevf_pmd = { .pci_drv = { - .name = "rte_ixgbevf_pmd", .id_table = pci_id_ixgbevf_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_ixgbevf_dev_init, .eth_dev_uninit = eth_ixgbevf_dev_uninit, .dev_private_size = sizeof(struct ixgbe_adapter), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI IXGBE devices. - */ -static int -rte_ixgbe_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_ixgbe_pmd); - return 0; -} - -/* - * VF Driver initialization routine. - * Invoked one at EAL init time. - * Register itself as the [Virtual Poll Mode] Driver of PCI niantic devices. - */ -static int -rte_ixgbevf_pmd_init(const char *name __rte_unused, const char *param __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_ixgbevf_pmd); - return 0; -} - static int ixgbe_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) { @@ -1768,6 +2004,7 @@ ixgbe_vlan_hw_strip_disable_all(struct rte_eth_dev *dev) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t ctrl; uint16_t i; + struct ixgbe_rx_queue *rxq; PMD_INIT_FUNC_TRACE(); @@ -1778,9 +2015,10 @@ ixgbe_vlan_hw_strip_disable_all(struct rte_eth_dev *dev) } else { /* Other 10G NIC, the VLAN strip can be setup per queue in RXDCTL */ for (i = 0; i < dev->data->nb_rx_queues; i++) { - ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + rxq = dev->data->rx_queues[i]; + ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx)); ctrl &= ~IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl); + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), ctrl); /* record those setting for HW strip per queue */ ixgbe_vlan_hw_strip_bitmap_set(dev, i, 0); @@ -1795,6 +2033,7 @@ ixgbe_vlan_hw_strip_enable_all(struct rte_eth_dev *dev) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t ctrl; uint16_t i; + struct ixgbe_rx_queue *rxq; PMD_INIT_FUNC_TRACE(); @@ -1805,9 +2044,10 @@ ixgbe_vlan_hw_strip_enable_all(struct rte_eth_dev *dev) } else { /* Other 10G NIC, the VLAN strip can be setup per queue in RXDCTL */ for (i = 0; i < dev->data->nb_rx_queues; i++) { - ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + rxq = dev->data->rx_queues[i]; + ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx)); ctrl |= IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl); + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), ctrl); /* record those setting for HW strip per queue */ ixgbe_vlan_hw_strip_bitmap_set(dev, i, 1); @@ -1910,6 +2150,8 @@ ixgbe_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev) static int ixgbe_check_vf_rss_rxq_num(struct rte_eth_dev *dev, uint16_t nb_rx_q) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + switch (nb_rx_q) { case 1: case 2: @@ -1923,7 +2165,7 @@ ixgbe_check_vf_rss_rxq_num(struct rte_eth_dev *dev, uint16_t nb_rx_q) } RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q; - RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx = dev->pci_dev->max_vfs * nb_rx_q; + RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx = pci_dev->max_vfs * nb_rx_q; return 0; } @@ -1940,6 +2182,8 @@ ixgbe_check_mq_mode(struct rte_eth_dev *dev) /* check multi-queue mode */ switch (dev_conf->rxmode.mq_mode) { case ETH_MQ_RX_VMDQ_DCB: + PMD_INIT_LOG(INFO, "ETH_MQ_RX_VMDQ_DCB mode supported in SRIOV"); + break; case ETH_MQ_RX_VMDQ_DCB_RSS: /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ PMD_INIT_LOG(ERR, "SRIOV active," @@ -1975,11 +2219,9 @@ ixgbe_check_mq_mode(struct rte_eth_dev *dev) switch (dev_conf->txmode.mq_mode) { case ETH_MQ_TX_VMDQ_DCB: - /* DCB VMDQ in SRIOV mode, not implement yet */ - PMD_INIT_LOG(ERR, "SRIOV is active," - " unsupported VMDQ mq_mode tx %d.", - dev_conf->txmode.mq_mode); - return -EINVAL; + PMD_INIT_LOG(INFO, "ETH_MQ_TX_VMDQ_DCB mode supported in SRIOV"); + dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB; + break; default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */ dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY; break; @@ -2154,7 +2396,8 @@ ixgbe_dev_start(struct rte_eth_dev *dev) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_vf_info *vfinfo = *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t intr_vector = 0; int err, link_up = 0, negotiate = 0; uint32_t speed = 0; @@ -2216,7 +2459,7 @@ ixgbe_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (intr_handle->intr_vec == NULL) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -2234,6 +2477,37 @@ ixgbe_dev_start(struct rte_eth_dev *dev) goto error; } + mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK | + ETH_VLAN_EXTEND_MASK; + ixgbe_vlan_offload_set(dev, mask); + + if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) { + /* Enable vlan filtering for VMDq */ + ixgbe_vmdq_vlan_hw_filter_enable(dev); + } + + /* Configure DCB hw */ + ixgbe_configure_dcb(dev); + + if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE) { + err = ixgbe_fdir_configure(dev); + if (err) + goto error; + } + + /* Restore vf rate limit */ + if (vfinfo != NULL) { + for (vf = 0; vf < pci_dev->max_vfs; vf++) + for (idx = 0; idx < IXGBE_MAX_QUEUE_NUM_PER_VF; idx++) + if (vfinfo[vf].tx_rate[idx] != 0) + rte_pmd_ixgbe_set_vf_rate_limit( + dev->data->port_id, vf, + vfinfo[vf].tx_rate[idx], + 1 << idx); + } + + ixgbe_restore_statistics_mapping(dev); + err = ixgbe_dev_rxtx_start(dev); if (err < 0) { PMD_INIT_LOG(ERR, "Unable to start rxtx queues"); @@ -2299,13 +2573,13 @@ skip_link_setup: /* check if lsc interrupt is enabled */ if (dev->data->dev_conf.intr_conf.lsc != 0) ixgbe_dev_lsc_interrupt_setup(dev); + ixgbe_dev_macsec_interrupt_setup(dev); } else { rte_intr_callback_unregister(intr_handle, - ixgbe_dev_interrupt_handler, - (void *)dev); + ixgbe_dev_interrupt_handler, dev); if (dev->data->dev_conf.intr_conf.lsc != 0) PMD_INIT_LOG(INFO, "lsc won't enable because of" - " no intr multiplex\n"); + " no intr multiplex"); } /* check if rxq interrupt is enabled */ @@ -2318,36 +2592,8 @@ skip_link_setup: /* resume enabled intr since hw reset */ ixgbe_enable_intr(dev); - - mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK | - ETH_VLAN_EXTEND_MASK; - ixgbe_vlan_offload_set(dev, mask); - - if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) { - /* Enable vlan filtering for VMDq */ - ixgbe_vmdq_vlan_hw_filter_enable(dev); - } - - /* Configure DCB hw */ - ixgbe_configure_dcb(dev); - - if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE) { - err = ixgbe_fdir_configure(dev); - if (err) - goto error; - } - - /* Restore vf rate limit */ - if (vfinfo != NULL) { - for (vf = 0; vf < dev->pci_dev->max_vfs; vf++) - for (idx = 0; idx < IXGBE_MAX_QUEUE_NUM_PER_VF; idx++) - if (vfinfo[vf].tx_rate[idx] != 0) - ixgbe_set_vf_rate_limit(dev, vf, - vfinfo[vf].tx_rate[idx], - 1 << idx); - } - - ixgbe_restore_statistics_mapping(dev); + ixgbe_l2_tunnel_conf(dev); + ixgbe_filter_restore(dev); return 0; @@ -2368,10 +2614,8 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_vf_info *vfinfo = *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); - struct ixgbe_filter_info *filter_info = - IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); - struct ixgbe_5tuple_filter *p_5tuple, *p_5tuple_next; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int vf; PMD_INIT_FUNC_TRACE(); @@ -2386,8 +2630,7 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) /* stop adapter */ ixgbe_stop_adapter(hw); - for (vf = 0; vfinfo != NULL && - vf < dev->pci_dev->max_vfs; vf++) + for (vf = 0; vfinfo != NULL && vf < pci_dev->max_vfs; vf++) vfinfo[vf].clear_to_send = false; if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper) { @@ -2408,17 +2651,6 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) memset(&link, 0, sizeof(link)); rte_ixgbe_dev_atomic_write_link_status(dev, &link); - /* Remove all ntuple filters of the device */ - for (p_5tuple = TAILQ_FIRST(&filter_info->fivetuple_list); - p_5tuple != NULL; p_5tuple = p_5tuple_next) { - p_5tuple_next = TAILQ_NEXT(p_5tuple, entries); - TAILQ_REMOVE(&filter_info->fivetuple_list, - p_5tuple, entries); - rte_free(p_5tuple); - } - memset(filter_info->fivetuple_mask, 0, - sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE); - if (!rte_intr_allow_others(intr_handle)) /* resume to the default handler */ rte_intr_callback_register(intr_handle, @@ -2520,6 +2752,7 @@ ixgbe_dev_close(struct rte_eth_dev *dev) static void ixgbe_read_stats_registers(struct ixgbe_hw *hw, struct ixgbe_hw_stats *hw_stats, + struct ixgbe_macsec_stats *macsec_stats, uint64_t *total_missed_rx, uint64_t *total_qbrc, uint64_t *total_qprc, uint64_t *total_qprdc) { @@ -2527,9 +2760,9 @@ ixgbe_read_stats_registers(struct ixgbe_hw *hw, uint32_t delta_gprc = 0; unsigned i; /* Workaround for RX byte count not including CRC bytes when CRC -+ * strip is enabled. CRC bytes are removed from counters when crc_strip + * strip is enabled. CRC bytes are removed from counters when crc_strip * is disabled. -+ */ + */ int crc_strip = (IXGBE_READ_REG(hw, IXGBE_HLREG0) & IXGBE_HLREG0_RXCRCSTRP); @@ -2689,6 +2922,40 @@ ixgbe_read_stats_registers(struct ixgbe_hw *hw, /* Flow Director Stats registers */ hw_stats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); hw_stats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS); + + /* MACsec Stats registers */ + macsec_stats->out_pkts_untagged += IXGBE_READ_REG(hw, IXGBE_LSECTXUT); + macsec_stats->out_pkts_encrypted += + IXGBE_READ_REG(hw, IXGBE_LSECTXPKTE); + macsec_stats->out_pkts_protected += + IXGBE_READ_REG(hw, IXGBE_LSECTXPKTP); + macsec_stats->out_octets_encrypted += + IXGBE_READ_REG(hw, IXGBE_LSECTXOCTE); + macsec_stats->out_octets_protected += + IXGBE_READ_REG(hw, IXGBE_LSECTXOCTP); + macsec_stats->in_pkts_untagged += IXGBE_READ_REG(hw, IXGBE_LSECRXUT); + macsec_stats->in_pkts_badtag += IXGBE_READ_REG(hw, IXGBE_LSECRXBAD); + macsec_stats->in_pkts_nosci += IXGBE_READ_REG(hw, IXGBE_LSECRXNOSCI); + macsec_stats->in_pkts_unknownsci += + IXGBE_READ_REG(hw, IXGBE_LSECRXUNSCI); + macsec_stats->in_octets_decrypted += + IXGBE_READ_REG(hw, IXGBE_LSECRXOCTD); + macsec_stats->in_octets_validated += + IXGBE_READ_REG(hw, IXGBE_LSECRXOCTV); + macsec_stats->in_pkts_unchecked += IXGBE_READ_REG(hw, IXGBE_LSECRXUNCH); + macsec_stats->in_pkts_delayed += IXGBE_READ_REG(hw, IXGBE_LSECRXDELAY); + macsec_stats->in_pkts_late += IXGBE_READ_REG(hw, IXGBE_LSECRXLATE); + for (i = 0; i < 2; i++) { + macsec_stats->in_pkts_ok += + IXGBE_READ_REG(hw, IXGBE_LSECRXOK(i)); + macsec_stats->in_pkts_invalid += + IXGBE_READ_REG(hw, IXGBE_LSECRXINV(i)); + macsec_stats->in_pkts_notvalid += + IXGBE_READ_REG(hw, IXGBE_LSECRXNV(i)); + } + macsec_stats->in_pkts_unusedsa += IXGBE_READ_REG(hw, IXGBE_LSECRXUNSA); + macsec_stats->in_pkts_notusingsa += + IXGBE_READ_REG(hw, IXGBE_LSECRXNUSA); } /* @@ -2701,6 +2968,9 @@ ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_hw_stats *hw_stats = IXGBE_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + struct ixgbe_macsec_stats *macsec_stats = + IXGBE_DEV_PRIVATE_TO_MACSEC_STATS( + dev->data->dev_private); uint64_t total_missed_rx, total_qbrc, total_qprc, total_qprdc; unsigned i; @@ -2709,8 +2979,8 @@ ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) total_qprc = 0; total_qprdc = 0; - ixgbe_read_stats_registers(hw, hw_stats, &total_missed_rx, &total_qbrc, - &total_qprc, &total_qprdc); + ixgbe_read_stats_registers(hw, hw_stats, macsec_stats, &total_missed_rx, + &total_qbrc, &total_qprc, &total_qprdc); if (stats == NULL) return; @@ -2762,7 +3032,7 @@ ixgbe_dev_stats_reset(struct rte_eth_dev *dev) /* This function calculates the number of xstats based on the current config */ static unsigned ixgbe_xstats_calc_num(void) { - return IXGBE_NB_HW_STATS + + return IXGBE_NB_HW_STATS + IXGBE_NB_MACSEC_STATS + (IXGBE_NB_RXQ_PRIO_STATS * IXGBE_NB_RXQ_PRIO_VALUES) + (IXGBE_NB_TXQ_PRIO_STATS * IXGBE_NB_TXQ_PRIO_VALUES); } @@ -2789,6 +3059,15 @@ static int ixgbe_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev, count++; } + /* MACsec Stats */ + for (i = 0; i < IXGBE_NB_MACSEC_STATS; i++) { + snprintf(xstats_names[count].name, + sizeof(xstats_names[count].name), + "%s", + rte_ixgbe_macsec_strings[i].name); + count++; + } + /* RX Priority Stats */ for (stat = 0; stat < IXGBE_NB_RXQ_PRIO_STATS; stat++) { for (i = 0; i < IXGBE_NB_RXQ_PRIO_VALUES; i++) { @@ -2838,6 +3117,9 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_hw_stats *hw_stats = IXGBE_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + struct ixgbe_macsec_stats *macsec_stats = + IXGBE_DEV_PRIVATE_TO_MACSEC_STATS( + dev->data->dev_private); uint64_t total_missed_rx, total_qbrc, total_qprc, total_qprdc; unsigned i, stat, count = 0; @@ -2851,8 +3133,8 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, total_qprc = 0; total_qprdc = 0; - ixgbe_read_stats_registers(hw, hw_stats, &total_missed_rx, &total_qbrc, - &total_qprc, &total_qprdc); + ixgbe_read_stats_registers(hw, hw_stats, macsec_stats, &total_missed_rx, + &total_qbrc, &total_qprc, &total_qprdc); /* If this is a reset xstats is NULL, and we have cleared the * registers by reading them. @@ -2865,6 +3147,15 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (i = 0; i < IXGBE_NB_HW_STATS; i++) { xstats[count].value = *(uint64_t *)(((char *)hw_stats) + rte_ixgbe_stats_strings[i].offset); + xstats[count].id = count; + count++; + } + + /* MACsec Stats */ + for (i = 0; i < IXGBE_NB_MACSEC_STATS; i++) { + xstats[count].value = *(uint64_t *)(((char *)macsec_stats) + + rte_ixgbe_macsec_strings[i].offset); + xstats[count].id = count; count++; } @@ -2874,6 +3165,7 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, xstats[count].value = *(uint64_t *)(((char *)hw_stats) + rte_ixgbe_rxq_strings[stat].offset + (sizeof(uint64_t) * i)); + xstats[count].id = count; count++; } } @@ -2884,6 +3176,7 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, xstats[count].value = *(uint64_t *)(((char *)hw_stats) + rte_ixgbe_txq_strings[stat].offset + (sizeof(uint64_t) * i)); + xstats[count].id = count; count++; } } @@ -2895,6 +3188,9 @@ ixgbe_dev_xstats_reset(struct rte_eth_dev *dev) { struct ixgbe_hw_stats *stats = IXGBE_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + struct ixgbe_macsec_stats *macsec_stats = + IXGBE_DEV_PRIVATE_TO_MACSEC_STATS( + dev->data->dev_private); unsigned count = ixgbe_xstats_calc_num(); @@ -2903,6 +3199,7 @@ ixgbe_dev_xstats_reset(struct rte_eth_dev *dev) /* Reset software totals */ memset(stats, 0, sizeof(*stats)); + memset(macsec_stats, 0, sizeof(*macsec_stats)); } static void @@ -2991,12 +3288,35 @@ ixgbevf_dev_stats_reset(struct rte_eth_dev *dev) hw_stats->vfgotc = 0; } +static int +ixgbe_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + u16 eeprom_verh, eeprom_verl; + u32 etrack_id; + int ret; + + ixgbe_read_eeprom(hw, 0x2e, &eeprom_verh); + ixgbe_read_eeprom(hw, 0x2d, &eeprom_verl); + + etrack_id = (eeprom_verh << 16) | eeprom_verl; + ret = snprintf(fw_version, fw_size, "0x%08x", etrack_id); + + ret += 1; /* add the size of '\0' */ + if (fw_size < (u32)ret) + return ret; + else + return 0; +} + static void ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct rte_eth_conf *dev_conf = &dev->data->dev_conf; + dev_info->pci_dev = pci_dev; dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues; dev_info->max_tx_queues = (uint16_t)hw->mac.max_tx_queues; if (RTE_ETH_DEV_SRIOV(dev).active == 0) { @@ -3012,7 +3332,7 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_rx_pktlen = 15872; /* includes CRC, cf MAXFRS register */ dev_info->max_mac_addrs = hw->mac.num_rar_entries; dev_info->max_hash_mac_addrs = IXGBE_VMDQ_NUM_UC_MAC; - dev_info->max_vfs = dev->pci_dev->max_vfs; + dev_info->max_vfs = pci_dev->max_vfs; if (hw->mac.type == ixgbe_mac_82598EB) dev_info->max_vmdq_pools = ETH_16_POOLS; else @@ -3033,6 +3353,10 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) !RTE_ETH_DEV_SRIOV(dev).active) dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO; + if (hw->mac.type == ixgbe_mac_82599EB || + hw->mac.type == ixgbe_mac_X540) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_MACSEC_STRIP; + if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X550EM_x || hw->mac.type == ixgbe_mac_X550EM_a) @@ -3046,6 +3370,10 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) DEV_TX_OFFLOAD_SCTP_CKSUM | DEV_TX_OFFLOAD_TCP_TSO; + if (hw->mac.type == ixgbe_mac_82599EB || + hw->mac.type == ixgbe_mac_X540) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT; + if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X550EM_x || hw->mac.type == ixgbe_mac_X550EM_a) @@ -3126,15 +3454,17 @@ static void ixgbevf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + dev_info->pci_dev = pci_dev; dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues; dev_info->max_tx_queues = (uint16_t)hw->mac.max_tx_queues; dev_info->min_rx_bufsize = 1024; /* cf BSIZEPACKET in SRRCTL reg */ - dev_info->max_rx_pktlen = 15872; /* includes CRC, cf MAXFRS reg */ + dev_info->max_rx_pktlen = 9728; /* includes CRC, cf MAXFRS reg */ dev_info->max_mac_addrs = hw->mac.num_rar_entries; dev_info->max_hash_mac_addrs = IXGBE_VMDQ_NUM_UC_MAC; - dev_info->max_vfs = dev->pci_dev->max_vfs; + dev_info->max_vfs = pci_dev->max_vfs; if (hw->mac.type == ixgbe_mac_82598EB) dev_info->max_vmdq_pools = ETH_16_POOLS; else @@ -3341,6 +3671,28 @@ ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev) return 0; } +/** + * It clears the interrupt causes and enables the interrupt. + * It will be called once only during nic initialized. + * + * @param dev + * Pointer to struct rte_eth_dev. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +static int +ixgbe_dev_macsec_interrupt_setup(struct rte_eth_dev *dev) +{ + struct ixgbe_interrupt *intr = + IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + + intr->mask |= IXGBE_EICR_LINKSEC; + + return 0; +} + /* * It reads ICR and sets flag (IXGBE_EICR_LSC) for the link_update. * @@ -3375,6 +3727,9 @@ ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev) if (eicr & IXGBE_EICR_MAILBOX) intr->flags |= IXGBE_FLAG_MAILBOX; + if (eicr & IXGBE_EICR_LINKSEC) + intr->flags |= IXGBE_FLAG_MACSEC; + if (hw->mac.type == ixgbe_mac_X550EM_x && hw->phy.type == ixgbe_phy_x550em_ext_t && (eicr & IXGBE_EICR_GPI_SDP0_X550EM_x)) @@ -3396,6 +3751,7 @@ ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev) static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); struct rte_eth_link link; memset(&link, 0, sizeof(link)); @@ -3410,11 +3766,11 @@ ixgbe_dev_link_status_print(struct rte_eth_dev *dev) PMD_INIT_LOG(INFO, " Port %d: Link Down", (int)(dev->data->port_id)); } - PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d", - dev->pci_dev->addr.domain, - dev->pci_dev->addr.bus, - dev->pci_dev->addr.devid, - dev->pci_dev->addr.function); + PMD_INIT_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT, + pci_dev->addr.domain, + pci_dev->addr.bus, + pci_dev->addr.devid, + pci_dev->addr.function); } /* @@ -3428,13 +3784,13 @@ ixgbe_dev_link_status_print(struct rte_eth_dev *dev) * - On failure, a negative value. */ static int -ixgbe_dev_interrupt_action(struct rte_eth_dev *dev) +ixgbe_dev_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *intr_handle) { struct ixgbe_interrupt *intr = IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private); int64_t timeout; struct rte_eth_link link; - int intr_enable_delay = false; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); @@ -3467,20 +3823,19 @@ ixgbe_dev_interrupt_action(struct rte_eth_dev *dev) timeout = IXGBE_LINK_DOWN_CHECK_TIMEOUT; ixgbe_dev_link_status_print(dev); - - intr_enable_delay = true; - } - - if (intr_enable_delay) { + intr->mask_original = intr->mask; + /* only disable lsc interrupt */ + intr->mask &= ~IXGBE_EIMS_LSC; if (rte_eal_alarm_set(timeout * 1000, ixgbe_dev_interrupt_delayed_handler, (void *)dev) < 0) PMD_DRV_LOG(ERR, "Error setting alarm"); - } else { - PMD_DRV_LOG(DEBUG, "enable intr immediately"); - ixgbe_enable_intr(dev); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + else + intr->mask = intr->mask_original; } + PMD_DRV_LOG(DEBUG, "enable intr immediately"); + ixgbe_enable_intr(dev); + rte_intr_enable(intr_handle); return 0; } @@ -3503,12 +3858,16 @@ static void ixgbe_dev_interrupt_delayed_handler(void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_interrupt *intr = IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private); struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t eicr; + ixgbe_disable_intr(hw); + eicr = IXGBE_READ_REG(hw, IXGBE_EICR); if (eicr & IXGBE_EICR_MAILBOX) ixgbe_pf_mbx_process(dev); @@ -3522,12 +3881,22 @@ ixgbe_dev_interrupt_delayed_handler(void *param) ixgbe_dev_link_update(dev, 0); intr->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; ixgbe_dev_link_status_print(dev); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + } + + if (intr->flags & IXGBE_FLAG_MACSEC) { + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_MACSEC, + NULL); + intr->flags &= ~IXGBE_FLAG_MACSEC; } + /* restore original mask */ + intr->mask = intr->mask_original; + intr->mask_original = 0; + PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr); ixgbe_enable_intr(dev); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); } /** @@ -3543,13 +3912,13 @@ ixgbe_dev_interrupt_delayed_handler(void *param) * void */ static void -ixgbe_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, +ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; ixgbe_dev_interrupt_get_status(dev); - ixgbe_dev_interrupt_action(dev); + ixgbe_dev_interrupt_action(dev, handle); } static int @@ -3913,7 +4282,7 @@ ixgbe_dev_rss_reta_update(struct rte_eth_dev *dev, if (reta_size != sp_reta_size) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, sp_reta_size); + "(%d)", reta_size, sp_reta_size); return -EINVAL; } @@ -3960,7 +4329,7 @@ ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev, if (reta_size != sp_reta_size) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, sp_reta_size); + "(%d)", reta_size, sp_reta_size); return -EINVAL; } @@ -4012,25 +4381,72 @@ ixgbe_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *addr) } static int -ixgbe_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +is_ixgbe_pmd(const char *driver_name) +{ + if (!strstr(driver_name, "ixgbe")) + return -ENOTSUP; + + if (strstr(driver_name, "ixgbe_vf")) + return -ENOTSUP; + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf, + struct ether_addr *mac_addr) { - uint32_t hlreg0; - uint32_t maxfrs; struct ixgbe_hw *hw; + struct ixgbe_vf_info *vfinfo; + int rar_entry; + uint8_t *new_mac = (uint8_t *)(mac_addr); + struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; - uint32_t frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; - ixgbe_dev_info_get(dev, &dev_info); + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); - /* check that mtu is within the allowed range */ - if ((mtu < ETHER_MIN_MTU) || (frame_size > dev_info.max_rx_pktlen)) + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) return -EINVAL; - /* refuse mtu that requires the support of scattered packets when this - * feature has not been enabled before. - */ - if (!dev->data->scattered_rx && - (frame_size + 2 * IXGBE_VLAN_TAG_SIZE > + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + vfinfo = *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private)); + rar_entry = hw->mac.num_rar_entries - (vf + 1); + + if (is_valid_assigned_ether_addr((struct ether_addr *)new_mac)) { + rte_memcpy(vfinfo[vf].vf_mac_addresses, new_mac, + ETHER_ADDR_LEN); + return hw->mac.ops.set_rar(hw, rar_entry, new_mac, vf, + IXGBE_RAH_AV); + } + return -EINVAL; +} + +static int +ixgbe_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + uint32_t hlreg0; + uint32_t maxfrs; + struct ixgbe_hw *hw; + struct rte_eth_dev_info dev_info; + uint32_t frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; + + ixgbe_dev_info_get(dev, &dev_info); + + /* check that mtu is within the allowed range */ + if ((mtu < ETHER_MIN_MTU) || (frame_size > dev_info.max_rx_pktlen)) + return -EINVAL; + + /* refuse mtu that requires the support of scattered packets when this + * feature has not been enabled before. + */ + if (!dev->data->scattered_rx && + (frame_size + 2 * IXGBE_VLAN_TAG_SIZE > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM)) return -EINVAL; @@ -4127,7 +4543,8 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t intr_vector = 0; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int err, mask = 0; @@ -4172,7 +4589,7 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (intr_handle->intr_vec == NULL) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -4190,7 +4607,8 @@ static void ixgbevf_dev_stop(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; PMD_INIT_FUNC_TRACE(); @@ -4332,14 +4750,14 @@ ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask) } static int -ixgbe_vmdq_mode_check(struct ixgbe_hw *hw) +ixgbe_vt_check(struct ixgbe_hw *hw) { uint32_t reg_val; - /* we only need to do this if VMDq is enabled */ + /* if Virtualization Technology is enabled */ reg_val = IXGBE_READ_REG(hw, IXGBE_VT_CTL); if (!(reg_val & IXGBE_VT_CTL_VT_ENABLE)) { - PMD_INIT_LOG(ERR, "VMDq must be enabled for this setting"); + PMD_INIT_LOG(ERR, "VT must be enabled for this setting"); return -1; } @@ -4477,22 +4895,274 @@ ixgbe_convert_vm_rx_mask_to_val(uint16_t rx_mask, uint32_t orig_val) return new_val; } -static int -ixgbe_set_pool_rx_mode(struct rte_eth_dev *dev, uint16_t pool, - uint16_t rx_mask, uint8_t on) + +int +rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on) +{ + struct ixgbe_hw *hw; + struct ixgbe_mac_info *mac; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + mac = &hw->mac; + + mac->ops.set_vlan_anti_spoofing(hw, on, vf); + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on) +{ + struct ixgbe_hw *hw; + struct ixgbe_mac_info *mac; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + mac = &hw->mac; + mac->ops.set_mac_anti_spoofing(hw, on, vf); + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf, uint16_t vlan_id) +{ + struct ixgbe_hw *hw; + uint32_t ctrl; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (vlan_id > ETHER_MAX_VLAN_ID) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + ctrl = IXGBE_READ_REG(hw, IXGBE_VMVIR(vf)); + if (vlan_id) { + ctrl = vlan_id; + ctrl |= IXGBE_VMVIR_VLANA_DEFAULT; + } else { + ctrl = 0; + } + + IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on) +{ + struct ixgbe_hw *hw; + uint32_t ctrl; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + ctrl = IXGBE_READ_REG(hw, IXGBE_PFDTXGSWC); + /* enable or disable VMDQ loopback */ + if (on) + ctrl |= IXGBE_PFDTXGSWC_VT_LBEN; + else + ctrl &= ~IXGBE_PFDTXGSWC_VT_LBEN; + + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on) +{ + struct ixgbe_hw *hw; + uint32_t reg_value; + int i; + int num_queues = (int)(IXGBE_QDE_IDX_MASK >> IXGBE_QDE_IDX_SHIFT); + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + for (i = 0; i <= num_queues; i++) { + reg_value = IXGBE_QDE_WRITE | + (i << IXGBE_QDE_IDX_SHIFT) | + (on & IXGBE_QDE_ENABLE); + IXGBE_WRITE_REG(hw, IXGBE_QDE, reg_value); + } + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on) +{ + struct ixgbe_hw *hw; + uint32_t reg_value; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + /* only support VF's 0 to 63 */ + if ((vf >= dev_info.max_vfs) || (vf > 63)) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + reg_value = IXGBE_READ_REG(hw, IXGBE_SRRCTL(vf)); + if (on) + reg_value |= IXGBE_SRRCTL_DROP_EN; + else + reg_value &= ~IXGBE_SRRCTL_DROP_EN; + + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(vf), reg_value); + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint16_t queues_per_pool; + uint32_t q; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_strip_queue_set, -ENOTSUP); + + /* The PF has 128 queue pairs and in SRIOV configuration + * those queues will be assigned to VF's, so RXDCTL + * registers will be dealing with queues which will be + * assigned to VF's. + * Let's say we have SRIOV configured with 31 VF's then the + * first 124 queues 0-123 will be allocated to VF's and only + * the last 4 queues 123-127 will be assigned to the PF. + */ + + queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; + + for (q = 0; q < queues_per_pool; q++) + (*dev->dev_ops->vlan_strip_queue_set)(dev, + q + vf * queues_per_pool, on); + return 0; +} + +int +rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mask, uint8_t on) { int val = 0; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct ixgbe_hw *hw; + uint32_t vmolr; - struct ixgbe_hw *hw = - IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - uint32_t vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool)); + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf)); if (hw->mac.type == ixgbe_mac_82598EB) { PMD_INIT_LOG(ERR, "setting VF receive mode set should be done" " on 82599 hardware and newer"); return -ENOTSUP; } - if (ixgbe_vmdq_mode_check(hw) < 0) + if (ixgbe_vt_check(hw) < 0) return -ENOTSUP; val = ixgbe_convert_vm_rx_mask_to_val(rx_mask, val); @@ -4502,34 +5172,47 @@ ixgbe_set_pool_rx_mode(struct rte_eth_dev *dev, uint16_t pool, else vmolr &= ~val; - IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr); + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr); return 0; } -static int -ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on) +int +rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on) { + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; uint32_t reg, addr; uint32_t val; const uint8_t bit1 = 0x1; + struct ixgbe_hw *hw; - struct ixgbe_hw *hw = - IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); - if (ixgbe_vmdq_mode_check(hw) < 0) + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) return -ENOTSUP; - if (pool >= ETH_64_POOLS) + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) return -EINVAL; - /* for pool >= 32, set bit in PFVFRE[1], otherwise PFVFRE[0] */ - if (pool >= 32) { + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (ixgbe_vt_check(hw) < 0) + return -ENOTSUP; + + /* for vf >= 32, set bit in PFVFRE[1], otherwise PFVFRE[0] */ + if (vf >= 32) { addr = IXGBE_VFRE(1); - val = bit1 << (pool - 32); + val = bit1 << (vf - 32); } else { addr = IXGBE_VFRE(0); - val = bit1 << pool; + val = bit1 << vf; } reg = IXGBE_READ_REG(hw, addr); @@ -4544,29 +5227,42 @@ ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on) return 0; } -static int -ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on) +int +rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on) { + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; uint32_t reg, addr; uint32_t val; const uint8_t bit1 = 0x1; - struct ixgbe_hw *hw = - IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_hw *hw; - if (ixgbe_vmdq_mode_check(hw) < 0) + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) return -ENOTSUP; - if (pool >= ETH_64_POOLS) + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) return -EINVAL; - /* for pool >= 32, set bit in PFVFTE[1], otherwise PFVFTE[0] */ - if (pool >= 32) { + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + if (ixgbe_vt_check(hw) < 0) + return -ENOTSUP; + + /* for vf >= 32, set bit in PFVFTE[1], otherwise PFVFTE[0] */ + if (vf >= 32) { addr = IXGBE_VFTE(1); - val = bit1 << (pool - 32); + val = bit1 << (vf - 32); } else { addr = IXGBE_VFTE(0); - val = bit1 << pool; + val = bit1 << vf; } reg = IXGBE_READ_REG(hw, addr); @@ -4581,20 +5277,34 @@ ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on) return 0; } -static int -ixgbe_set_pool_vlan_filter(struct rte_eth_dev *dev, uint16_t vlan, - uint64_t pool_mask, uint8_t vlan_on) +int +rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan, + uint64_t vf_mask, uint8_t vlan_on) { + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; int ret = 0; - uint16_t pool_idx; - struct ixgbe_hw *hw = - IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint16_t vf_idx; + struct ixgbe_hw *hw; - if (ixgbe_vmdq_mode_check(hw) < 0) + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if ((vlan > ETHER_MAX_VLAN_ID) || (vf_mask == 0)) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + if (ixgbe_vt_check(hw) < 0) return -ENOTSUP; - for (pool_idx = 0; pool_idx < ETH_64_POOLS; pool_idx++) { - if (pool_mask & ((uint64_t)(1ULL << pool_idx))) { - ret = hw->mac.ops.set_vfta(hw, vlan, pool_idx, + + for (vf_idx = 0; vf_idx < 64; vf_idx++) { + if (vf_mask & ((uint64_t)(1ULL << vf_idx))) { + ret = hw->mac.ops.set_vfta(hw, vlan, vf_idx, vlan_on, false); if (ret < 0) return ret; @@ -4604,59 +5314,141 @@ ixgbe_set_pool_vlan_filter(struct rte_eth_dev *dev, uint16_t vlan, return ret; } -#define IXGBE_MRCTL_VPME 0x01 /* Virtual Pool Mirroring. */ -#define IXGBE_MRCTL_UPME 0x02 /* Uplink Port Mirroring. */ -#define IXGBE_MRCTL_DPME 0x04 /* Downlink Port Mirroring. */ -#define IXGBE_MRCTL_VLME 0x08 /* VLAN Mirroring. */ -#define IXGBE_INVALID_MIRROR_TYPE(mirror_type) \ - ((mirror_type) & ~(uint8_t)(ETH_MIRROR_VIRTUAL_POOL_UP | \ - ETH_MIRROR_UPLINK_PORT | ETH_MIRROR_DOWNLINK_PORT | ETH_MIRROR_VLAN)) - -static int -ixgbe_mirror_rule_set(struct rte_eth_dev *dev, - struct rte_eth_mirror_conf *mirror_conf, - uint8_t rule_id, uint8_t on) +int rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf, + uint16_t tx_rate, uint64_t q_msk) { - uint32_t mr_ctl, vlvf; - uint32_t mp_lsb = 0; - uint32_t mv_msb = 0; - uint32_t mv_lsb = 0; - uint32_t mp_msb = 0; - uint8_t i = 0; - int reg_index = 0; - uint64_t vlan_mask = 0; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct ixgbe_hw *hw; + struct ixgbe_vf_info *vfinfo; + struct rte_eth_link link; + uint8_t nb_q_per_pool; + uint32_t queue_stride; + uint32_t queue_idx, idx = 0, vf_idx; + uint32_t queue_end; + uint16_t total_rate = 0; + struct rte_pci_device *pci_dev; - const uint8_t pool_mask_offset = 32; - const uint8_t vlan_mask_offset = 32; - const uint8_t dst_pool_offset = 8; - const uint8_t rule_mr_offset = 4; - const uint8_t mirror_rule_mask = 0x0F; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); - struct ixgbe_mirror_info *mr_info = - (IXGBE_DEV_PRIVATE_TO_PFDATA(dev->data->dev_private)); - struct ixgbe_hw *hw = - IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - uint8_t mirror_type = 0; + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + rte_eth_link_get_nowait(port, &link); - if (ixgbe_vmdq_mode_check(hw) < 0) + if (is_ixgbe_pmd(dev_info.driver_name) != 0) return -ENOTSUP; - if (rule_id >= IXGBE_MAX_MIRROR_RULES) + if (vf >= dev_info.max_vfs) return -EINVAL; - if (IXGBE_INVALID_MIRROR_TYPE(mirror_conf->rule_type)) { - PMD_DRV_LOG(ERR, "unsupported mirror type 0x%x.", - mirror_conf->rule_type); + if (tx_rate > link.link_speed) return -EINVAL; - } - if (mirror_conf->rule_type & ETH_MIRROR_VLAN) { - mirror_type |= IXGBE_MRCTL_VLME; - /* Check if vlan id is valid and find conresponding VLAN ID index in VLVF */ - for (i = 0; i < IXGBE_VLVF_ENTRIES; i++) { - if (mirror_conf->vlan.vlan_mask & (1ULL << i)) { - /* search vlan id related pool vlan filter index */ - reg_index = ixgbe_find_vlvf_slot(hw, + if (q_msk == 0) + return 0; + + pci_dev = IXGBE_DEV_TO_PCI(dev); + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + vfinfo = *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private)); + nb_q_per_pool = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool; + queue_stride = IXGBE_MAX_RX_QUEUE_NUM / RTE_ETH_DEV_SRIOV(dev).active; + queue_idx = vf * queue_stride; + queue_end = queue_idx + nb_q_per_pool - 1; + if (queue_end >= hw->mac.max_tx_queues) + return -EINVAL; + + if (vfinfo) { + for (vf_idx = 0; vf_idx < pci_dev->max_vfs; vf_idx++) { + if (vf_idx == vf) + continue; + for (idx = 0; idx < RTE_DIM(vfinfo[vf_idx].tx_rate); + idx++) + total_rate += vfinfo[vf_idx].tx_rate[idx]; + } + } else { + return -EINVAL; + } + + /* Store tx_rate for this vf. */ + for (idx = 0; idx < nb_q_per_pool; idx++) { + if (((uint64_t)0x1 << idx) & q_msk) { + if (vfinfo[vf].tx_rate[idx] != tx_rate) + vfinfo[vf].tx_rate[idx] = tx_rate; + total_rate += tx_rate; + } + } + + if (total_rate > dev->data->dev_link.link_speed) { + /* Reset stored TX rate of the VF if it causes exceed + * link speed. + */ + memset(vfinfo[vf].tx_rate, 0, sizeof(vfinfo[vf].tx_rate)); + return -EINVAL; + } + + /* Set RTTBCNRC of each queue/pool for vf X */ + for (; queue_idx <= queue_end; queue_idx++) { + if (0x1 & q_msk) + ixgbe_set_queue_rate_limit(dev, queue_idx, tx_rate); + q_msk = q_msk >> 1; + } + + return 0; +} + +#define IXGBE_MRCTL_VPME 0x01 /* Virtual Pool Mirroring. */ +#define IXGBE_MRCTL_UPME 0x02 /* Uplink Port Mirroring. */ +#define IXGBE_MRCTL_DPME 0x04 /* Downlink Port Mirroring. */ +#define IXGBE_MRCTL_VLME 0x08 /* VLAN Mirroring. */ +#define IXGBE_INVALID_MIRROR_TYPE(mirror_type) \ + ((mirror_type) & ~(uint8_t)(ETH_MIRROR_VIRTUAL_POOL_UP | \ + ETH_MIRROR_UPLINK_PORT | ETH_MIRROR_DOWNLINK_PORT | ETH_MIRROR_VLAN)) + +static int +ixgbe_mirror_rule_set(struct rte_eth_dev *dev, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, uint8_t on) +{ + uint32_t mr_ctl, vlvf; + uint32_t mp_lsb = 0; + uint32_t mv_msb = 0; + uint32_t mv_lsb = 0; + uint32_t mp_msb = 0; + uint8_t i = 0; + int reg_index = 0; + uint64_t vlan_mask = 0; + + const uint8_t pool_mask_offset = 32; + const uint8_t vlan_mask_offset = 32; + const uint8_t dst_pool_offset = 8; + const uint8_t rule_mr_offset = 4; + const uint8_t mirror_rule_mask = 0x0F; + + struct ixgbe_mirror_info *mr_info = + (IXGBE_DEV_PRIVATE_TO_PFDATA(dev->data->dev_private)); + struct ixgbe_hw *hw = + IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint8_t mirror_type = 0; + + if (ixgbe_vt_check(hw) < 0) + return -ENOTSUP; + + if (rule_id >= IXGBE_MAX_MIRROR_RULES) + return -EINVAL; + + if (IXGBE_INVALID_MIRROR_TYPE(mirror_conf->rule_type)) { + PMD_DRV_LOG(ERR, "unsupported mirror type 0x%x.", + mirror_conf->rule_type); + return -EINVAL; + } + + if (mirror_conf->rule_type & ETH_MIRROR_VLAN) { + mirror_type |= IXGBE_MRCTL_VLME; + /* Check if vlan id is valid and find conresponding VLAN ID index in VLVF */ + for (i = 0; i < IXGBE_VLVF_ENTRIES; i++) { + if (mirror_conf->vlan.vlan_mask & (1ULL << i)) { + /* search vlan id related pool vlan filter index */ + reg_index = ixgbe_find_vlvf_slot(hw, mirror_conf->vlan.vlan_id[i], false); if (reg_index < 0) @@ -4759,7 +5551,7 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id) struct ixgbe_mirror_info *mr_info = (IXGBE_DEV_PRIVATE_TO_PFDATA(dev->data->dev_private)); - if (ixgbe_vmdq_mode_check(hw) < 0) + if (ixgbe_vt_check(hw) < 0) return -ENOTSUP; memset(&mr_info->mr_conf[rule_id], 0, @@ -4782,6 +5574,8 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id) static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t mask; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); @@ -4791,7 +5585,7 @@ ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) RTE_SET_USED(queue_id); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } @@ -4814,6 +5608,8 @@ ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) static int ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t mask; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); @@ -4833,7 +5629,7 @@ ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) mask &= (1 << (queue_id - 32)); IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } @@ -4937,7 +5733,8 @@ ixgbe_set_ivar_map(struct ixgbe_hw *hw, int8_t direction, static void ixgbevf_configure_msix(struct rte_eth_dev *dev) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t q_idx; @@ -4970,7 +5767,8 @@ ixgbevf_configure_msix(struct rte_eth_dev *dev) static void ixgbe_configure_msix(struct rte_eth_dev *dev) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t queue_id, base = IXGBE_MISC_VEC_ID; @@ -5085,61 +5883,6 @@ static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev, return 0; } -static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf, - uint16_t tx_rate, uint64_t q_msk) -{ - struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct ixgbe_vf_info *vfinfo = - *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private)); - uint8_t nb_q_per_pool = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool; - uint32_t queue_stride = - IXGBE_MAX_RX_QUEUE_NUM / RTE_ETH_DEV_SRIOV(dev).active; - uint32_t queue_idx = vf * queue_stride, idx = 0, vf_idx; - uint32_t queue_end = queue_idx + nb_q_per_pool - 1; - uint16_t total_rate = 0; - - if (queue_end >= hw->mac.max_tx_queues) - return -EINVAL; - - if (vfinfo != NULL) { - for (vf_idx = 0; vf_idx < dev->pci_dev->max_vfs; vf_idx++) { - if (vf_idx == vf) - continue; - for (idx = 0; idx < RTE_DIM(vfinfo[vf_idx].tx_rate); - idx++) - total_rate += vfinfo[vf_idx].tx_rate[idx]; - } - } else - return -EINVAL; - - /* Store tx_rate for this vf. */ - for (idx = 0; idx < nb_q_per_pool; idx++) { - if (((uint64_t)0x1 << idx) & q_msk) { - if (vfinfo[vf].tx_rate[idx] != tx_rate) - vfinfo[vf].tx_rate[idx] = tx_rate; - total_rate += tx_rate; - } - } - - if (total_rate > dev->data->dev_link.link_speed) { - /* - * Reset stored TX rate of the VF if it causes exceed - * link speed. - */ - memset(vfinfo[vf].tx_rate, 0, sizeof(vfinfo[vf].tx_rate)); - return -EINVAL; - } - - /* Set RTTBCNRC of each queue/pool for vf X */ - for (; queue_idx <= queue_end; queue_idx++) { - if (0x1 & q_msk) - ixgbe_set_queue_rate_limit(dev, queue_idx, tx_rate); - q_msk = q_msk >> 1; - } - - return 0; -} - static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr, __attribute__((unused)) uint32_t index, @@ -5224,21 +5967,24 @@ ixgbevf_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *addr) return -ENOTSUP;\ } while (0) -static int +int ixgbe_syn_filter_set(struct rte_eth_dev *dev, struct rte_eth_syn_filter *filter, bool add) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + uint32_t syn_info; uint32_t synqf; if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) return -EINVAL; - synqf = IXGBE_READ_REG(hw, IXGBE_SYNQF); + syn_info = filter_info->syn_info; if (add) { - if (synqf & IXGBE_SYN_FILTER_ENABLE) + if (syn_info & IXGBE_SYN_FILTER_ENABLE) return -EINVAL; synqf = (uint32_t)(((filter->queue << IXGBE_SYN_FILTER_QUEUE_SHIFT) & IXGBE_SYN_FILTER_QUEUE) | IXGBE_SYN_FILTER_ENABLE); @@ -5248,10 +5994,13 @@ ixgbe_syn_filter_set(struct rte_eth_dev *dev, else synqf &= ~IXGBE_SYN_FILTER_SYNQFP; } else { - if (!(synqf & IXGBE_SYN_FILTER_ENABLE)) + synqf = IXGBE_READ_REG(hw, IXGBE_SYNQF); + if (!(syn_info & IXGBE_SYN_FILTER_ENABLE)) return -ENOENT; synqf &= ~(IXGBE_SYN_FILTER_QUEUE | IXGBE_SYN_FILTER_ENABLE); } + + filter_info->syn_info = synqf; IXGBE_WRITE_REG(hw, IXGBE_SYNQF, synqf); IXGBE_WRITE_FLUSH(hw); return 0; @@ -5307,7 +6056,7 @@ ixgbe_syn_filter_handle(struct rte_eth_dev *dev, (struct rte_eth_syn_filter *)arg); break; default: - PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op); + PMD_DRV_LOG(ERR, "unsupported operation %u", filter_op); ret = -EINVAL; break; } @@ -5329,6 +6078,52 @@ convert_protocol_type(uint8_t protocol_value) return IXGBE_FILTER_PROTOCOL_NONE; } +/* inject a 5-tuple filter to HW */ +static inline void +ixgbe_inject_5tuple_filter(struct rte_eth_dev *dev, + struct ixgbe_5tuple_filter *filter) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + int i; + uint32_t ftqf, sdpqf; + uint32_t l34timir = 0; + uint8_t mask = 0xff; + + i = filter->index; + + sdpqf = (uint32_t)(filter->filter_info.dst_port << + IXGBE_SDPQF_DSTPORT_SHIFT); + sdpqf = sdpqf | (filter->filter_info.src_port & IXGBE_SDPQF_SRCPORT); + + ftqf = (uint32_t)(filter->filter_info.proto & + IXGBE_FTQF_PROTOCOL_MASK); + ftqf |= (uint32_t)((filter->filter_info.priority & + IXGBE_FTQF_PRIORITY_MASK) << IXGBE_FTQF_PRIORITY_SHIFT); + if (filter->filter_info.src_ip_mask == 0) /* 0 means compare. */ + mask &= IXGBE_FTQF_SOURCE_ADDR_MASK; + if (filter->filter_info.dst_ip_mask == 0) + mask &= IXGBE_FTQF_DEST_ADDR_MASK; + if (filter->filter_info.src_port_mask == 0) + mask &= IXGBE_FTQF_SOURCE_PORT_MASK; + if (filter->filter_info.dst_port_mask == 0) + mask &= IXGBE_FTQF_DEST_PORT_MASK; + if (filter->filter_info.proto_mask == 0) + mask &= IXGBE_FTQF_PROTOCOL_COMP_MASK; + ftqf |= mask << IXGBE_FTQF_5TUPLE_MASK_SHIFT; + ftqf |= IXGBE_FTQF_POOL_MASK_EN; + ftqf |= IXGBE_FTQF_QUEUE_ENABLE; + + IXGBE_WRITE_REG(hw, IXGBE_DAQF(i), filter->filter_info.dst_ip); + IXGBE_WRITE_REG(hw, IXGBE_SAQF(i), filter->filter_info.src_ip); + IXGBE_WRITE_REG(hw, IXGBE_SDPQF(i), sdpqf); + IXGBE_WRITE_REG(hw, IXGBE_FTQF(i), ftqf); + + l34timir |= IXGBE_L34T_IMIR_RESERVE; + l34timir |= (uint32_t)(filter->queue << + IXGBE_L34T_IMIR_QUEUE_SHIFT); + IXGBE_WRITE_REG(hw, IXGBE_L34T_IMIR(i), l34timir); +} + /* * add a 5tuple filter * @@ -5346,13 +6141,9 @@ static int ixgbe_add_5tuple_filter(struct rte_eth_dev *dev, struct ixgbe_5tuple_filter *filter) { - struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_filter_info *filter_info = IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); int i, idx, shift; - uint32_t ftqf, sdpqf; - uint32_t l34timir = 0; - uint8_t mask = 0xff; /* * look for an unused 5tuple filter index, @@ -5375,37 +6166,8 @@ ixgbe_add_5tuple_filter(struct rte_eth_dev *dev, return -ENOSYS; } - sdpqf = (uint32_t)(filter->filter_info.dst_port << - IXGBE_SDPQF_DSTPORT_SHIFT); - sdpqf = sdpqf | (filter->filter_info.src_port & IXGBE_SDPQF_SRCPORT); - - ftqf = (uint32_t)(filter->filter_info.proto & - IXGBE_FTQF_PROTOCOL_MASK); - ftqf |= (uint32_t)((filter->filter_info.priority & - IXGBE_FTQF_PRIORITY_MASK) << IXGBE_FTQF_PRIORITY_SHIFT); - if (filter->filter_info.src_ip_mask == 0) /* 0 means compare. */ - mask &= IXGBE_FTQF_SOURCE_ADDR_MASK; - if (filter->filter_info.dst_ip_mask == 0) - mask &= IXGBE_FTQF_DEST_ADDR_MASK; - if (filter->filter_info.src_port_mask == 0) - mask &= IXGBE_FTQF_SOURCE_PORT_MASK; - if (filter->filter_info.dst_port_mask == 0) - mask &= IXGBE_FTQF_DEST_PORT_MASK; - if (filter->filter_info.proto_mask == 0) - mask &= IXGBE_FTQF_PROTOCOL_COMP_MASK; - ftqf |= mask << IXGBE_FTQF_5TUPLE_MASK_SHIFT; - ftqf |= IXGBE_FTQF_POOL_MASK_EN; - ftqf |= IXGBE_FTQF_QUEUE_ENABLE; - - IXGBE_WRITE_REG(hw, IXGBE_DAQF(i), filter->filter_info.dst_ip); - IXGBE_WRITE_REG(hw, IXGBE_SAQF(i), filter->filter_info.src_ip); - IXGBE_WRITE_REG(hw, IXGBE_SDPQF(i), sdpqf); - IXGBE_WRITE_REG(hw, IXGBE_FTQF(i), ftqf); + ixgbe_inject_5tuple_filter(dev, filter); - l34timir |= IXGBE_L34T_IMIR_RESERVE; - l34timir |= (uint32_t)(filter->queue << - IXGBE_L34T_IMIR_QUEUE_SHIFT); - IXGBE_WRITE_REG(hw, IXGBE_L34T_IMIR(i), l34timir); return 0; } @@ -5584,7 +6346,7 @@ ntuple_filter_to_5tuple(struct rte_eth_ntuple_filter *filter, * - On success, zero. * - On failure, a negative value. */ -static int +int ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev, struct rte_eth_ntuple_filter *ntuple_filter, bool add) @@ -5729,48 +6491,7 @@ ixgbe_ntuple_filter_handle(struct rte_eth_dev *dev, return ret; } -static inline int -ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info, - uint16_t ethertype) -{ - int i; - - for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { - if (filter_info->ethertype_filters[i] == ethertype && - (filter_info->ethertype_mask & (1 << i))) - return i; - } - return -1; -} - -static inline int -ixgbe_ethertype_filter_insert(struct ixgbe_filter_info *filter_info, - uint16_t ethertype) -{ - int i; - - for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { - if (!(filter_info->ethertype_mask & (1 << i))) { - filter_info->ethertype_mask |= 1 << i; - filter_info->ethertype_filters[i] = ethertype; - return i; - } - } - return -1; -} - -static inline int -ixgbe_ethertype_filter_remove(struct ixgbe_filter_info *filter_info, - uint8_t idx) -{ - if (idx >= IXGBE_MAX_ETQF_FILTERS) - return -1; - filter_info->ethertype_mask &= ~(1 << idx); - filter_info->ethertype_filters[idx] = 0; - return idx; -} - -static int +int ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, struct rte_eth_ethertype_filter *filter, bool add) @@ -5781,20 +6502,17 @@ ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, uint32_t etqf = 0; uint32_t etqs = 0; int ret; + struct ixgbe_ethertype_filter ethertype_filter; if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) return -EINVAL; -#define TREX_PATCH -#ifndef TREX_PATCH - // no real reason to block this. - // We configure rules using FDIR and ethertype that point to same queue, so there are no race condition issues. + if (filter->ether_type == ETHER_TYPE_IPv4 || filter->ether_type == ETHER_TYPE_IPv6) { PMD_DRV_LOG(ERR, "unsupported ether_type(0x%04x) in" " ethertype filter.", filter->ether_type); return -EINVAL; } -#endif if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) { PMD_DRV_LOG(ERR, "mac compare is unsupported."); @@ -5818,21 +6536,26 @@ ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, } if (add) { - ret = ixgbe_ethertype_filter_insert(filter_info, - filter->ether_type); - if (ret < 0) { - PMD_DRV_LOG(ERR, "ethertype filters are full."); - return -ENOSYS; - } etqf = IXGBE_ETQF_FILTER_EN; etqf |= (uint32_t)filter->ether_type; etqs |= (uint32_t)((filter->queue << IXGBE_ETQS_RX_QUEUE_SHIFT) & IXGBE_ETQS_RX_QUEUE); etqs |= IXGBE_ETQS_QUEUE_EN; - } else { - ret = ixgbe_ethertype_filter_remove(filter_info, (uint8_t)ret); - if (ret < 0) + + ethertype_filter.ethertype = filter->ether_type; + ethertype_filter.etqf = etqf; + ethertype_filter.etqs = etqs; + ethertype_filter.conf = FALSE; + ret = ixgbe_ethertype_filter_insert(filter_info, + ðertype_filter); + if (ret < 0) { + PMD_DRV_LOG(ERR, "ethertype filters are full."); + return -ENOSPC; + } + } else { + ret = ixgbe_ethertype_filter_remove(filter_info, (uint8_t)ret); + if (ret < 0) return -ENOSYS; } IXGBE_WRITE_REG(hw, IXGBE_ETQF(ret), etqf); @@ -5925,7 +6648,7 @@ ixgbe_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg) { - int ret = -EINVAL; + int ret = 0; switch (filter_type) { case RTE_ETH_FILTER_NTUPLE: @@ -5943,9 +6666,15 @@ ixgbe_dev_filter_ctrl(struct rte_eth_dev *dev, case RTE_ETH_FILTER_L2_TUNNEL: ret = ixgbe_dev_l2_tunnel_filter_handle(dev, filter_op, arg); break; + case RTE_ETH_FILTER_GENERIC: + if (filter_op != RTE_ETH_FILTER_GET) + return -EINVAL; + *(const void **)arg = &ixgbe_flow_ops; + break; default: PMD_DRV_LOG(WARNING, "Filter type (%d) not supported", filter_type); + ret = -EINVAL; break; } @@ -6593,12 +7322,15 @@ ixgbe_dev_l2_tunnel_eth_type_conf(struct rte_eth_dev *dev, { int ret = 0; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); if (l2_tunnel == NULL) return -EINVAL; switch (l2_tunnel->l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_ether_type = l2_tunnel->ether_type; ret = ixgbe_update_e_tag_eth_type(hw, l2_tunnel->ether_type); break; default: @@ -6637,9 +7369,12 @@ ixgbe_dev_l2_tunnel_enable(struct rte_eth_dev *dev, { int ret = 0; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); switch (l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_en = TRUE; ret = ixgbe_e_tag_enable(hw); break; default: @@ -6678,9 +7413,12 @@ ixgbe_dev_l2_tunnel_disable(struct rte_eth_dev *dev, { int ret = 0; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); switch (l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_en = FALSE; ret = ixgbe_e_tag_disable(hw); break; default: @@ -6769,12 +7507,108 @@ ixgbe_e_tag_filter_add(struct rte_eth_dev *dev, return -EINVAL; } +static inline struct ixgbe_l2_tn_filter * +ixgbe_l2_tn_filter_lookup(struct ixgbe_l2_tn_info *l2_tn_info, + struct ixgbe_l2_tn_key *key) +{ + int ret; + + ret = rte_hash_lookup(l2_tn_info->hash_handle, (const void *)key); + if (ret < 0) + return NULL; + + return l2_tn_info->hash_map[ret]; +} + +static inline int +ixgbe_insert_l2_tn_filter(struct ixgbe_l2_tn_info *l2_tn_info, + struct ixgbe_l2_tn_filter *l2_tn_filter) +{ + int ret; + + ret = rte_hash_add_key(l2_tn_info->hash_handle, + &l2_tn_filter->key); + + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert L2 tunnel filter" + " to hash table %d!", + ret); + return ret; + } + + l2_tn_info->hash_map[ret] = l2_tn_filter; + + TAILQ_INSERT_TAIL(&l2_tn_info->l2_tn_list, l2_tn_filter, entries); + + return 0; +} + +static inline int +ixgbe_remove_l2_tn_filter(struct ixgbe_l2_tn_info *l2_tn_info, + struct ixgbe_l2_tn_key *key) +{ + int ret; + struct ixgbe_l2_tn_filter *l2_tn_filter; + + ret = rte_hash_del_key(l2_tn_info->hash_handle, key); + + if (ret < 0) { + PMD_DRV_LOG(ERR, + "No such L2 tunnel filter to delete %d!", + ret); + return ret; + } + + l2_tn_filter = l2_tn_info->hash_map[ret]; + l2_tn_info->hash_map[ret] = NULL; + + TAILQ_REMOVE(&l2_tn_info->l2_tn_list, l2_tn_filter, entries); + rte_free(l2_tn_filter); + + return 0; +} + /* Add l2 tunnel filter */ -static int +int ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev, - struct rte_eth_l2_tunnel_conf *l2_tunnel) + struct rte_eth_l2_tunnel_conf *l2_tunnel, + bool restore) { - int ret = 0; + int ret; + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_l2_tn_key key; + struct ixgbe_l2_tn_filter *node; + + if (!restore) { + key.l2_tn_type = l2_tunnel->l2_tunnel_type; + key.tn_id = l2_tunnel->tunnel_id; + + node = ixgbe_l2_tn_filter_lookup(l2_tn_info, &key); + + if (node) { + PMD_DRV_LOG(ERR, + "The L2 tunnel filter already exists!"); + return -EINVAL; + } + + node = rte_zmalloc("ixgbe_l2_tn", + sizeof(struct ixgbe_l2_tn_filter), + 0); + if (!node) + return -ENOMEM; + + (void)rte_memcpy(&node->key, + &key, + sizeof(struct ixgbe_l2_tn_key)); + node->pool = l2_tunnel->pool; + ret = ixgbe_insert_l2_tn_filter(l2_tn_info, node); + if (ret < 0) { + rte_free(node); + return ret; + } + } switch (l2_tunnel->l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: @@ -6786,15 +7620,27 @@ ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev, break; } + if ((!restore) && (ret < 0)) + (void)ixgbe_remove_l2_tn_filter(l2_tn_info, &key); + return ret; } /* Delete l2 tunnel filter */ -static int +int ixgbe_dev_l2_tunnel_filter_del(struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel) { - int ret = 0; + int ret; + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_l2_tn_key key; + + key.l2_tn_type = l2_tunnel->l2_tunnel_type; + key.tn_id = l2_tunnel->tunnel_id; + ret = ixgbe_remove_l2_tn_filter(l2_tn_info, &key); + if (ret < 0) + return ret; switch (l2_tunnel->l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: @@ -6820,7 +7666,7 @@ ixgbe_dev_l2_tunnel_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg) { - int ret = 0; + int ret; if (filter_op == RTE_ETH_FILTER_NOP) return 0; @@ -6835,7 +7681,8 @@ ixgbe_dev_l2_tunnel_filter_handle(struct rte_eth_dev *dev, case RTE_ETH_FILTER_ADD: ret = ixgbe_dev_l2_tunnel_filter_add (dev, - (struct rte_eth_l2_tunnel_conf *)arg); + (struct rte_eth_l2_tunnel_conf *)arg, + FALSE); break; case RTE_ETH_FILTER_DELETE: ret = ixgbe_dev_l2_tunnel_filter_del @@ -6878,10 +7725,13 @@ ixgbe_dev_l2_tunnel_forwarding_enable (struct rte_eth_dev *dev, enum rte_eth_tunnel_type l2_tunnel_type) { + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); int ret = 0; switch (l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_fwd_en = TRUE; ret = ixgbe_e_tag_forwarding_en_dis(dev, 1); break; default: @@ -6899,10 +7749,13 @@ ixgbe_dev_l2_tunnel_forwarding_disable (struct rte_eth_dev *dev, enum rte_eth_tunnel_type l2_tunnel_type) { + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); int ret = 0; switch (l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_fwd_en = FALSE; ret = ixgbe_e_tag_forwarding_en_dis(dev, 0); break; default: @@ -6919,15 +7772,16 @@ ixgbe_e_tag_insertion_en_dis(struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel, bool en) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); int ret = 0; uint32_t vmtir, vmvir; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - if (l2_tunnel->vf_id >= dev->pci_dev->max_vfs) { + if (l2_tunnel->vf_id >= pci_dev->max_vfs) { PMD_DRV_LOG(ERR, "VF id %u should be less than %u", l2_tunnel->vf_id, - dev->pci_dev->max_vfs); + pci_dev->max_vfs); return -EINVAL; } @@ -7240,51 +8094,12 @@ ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, return ret; } -/* ixgbevf_update_xcast_mode - Update Multicast mode - * @hw: pointer to the HW structure - * @netdev: pointer to net device structure - * @xcast_mode: new multicast mode - * - * Updates the Multicast Mode of VF. - */ -static int ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, - int xcast_mode) -{ - struct ixgbe_mbx_info *mbx = &hw->mbx; - u32 msgbuf[2]; - s32 err; - - switch (hw->api_version) { - case ixgbe_mbox_api_12: - break; - default: - return -EOPNOTSUPP; - } - - msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE; - msgbuf[1] = xcast_mode; - - err = mbx->ops.write_posted(hw, msgbuf, 2, 0); - if (err) - return err; - - err = mbx->ops.read_posted(hw, msgbuf, 2, 0); - if (err) - return err; - - msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; - if (msgbuf[0] == (IXGBE_VF_UPDATE_XCAST_MODE | IXGBE_VT_MSGTYPE_NACK)) - return -EPERM; - - return 0; -} - static void ixgbevf_dev_allmulticast_enable(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - ixgbevf_update_xcast_mode(hw, IXGBEVF_XCAST_MODE_ALLMULTI); + hw->mac.ops.update_xcast_mode(hw, IXGBEVF_XCAST_MODE_ALLMULTI); } static void @@ -7292,7 +8107,7 @@ ixgbevf_dev_allmulticast_disable(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - ixgbevf_update_xcast_mode(hw, IXGBEVF_XCAST_MODE_NONE); + hw->mac.ops.update_xcast_mode(hw, IXGBEVF_XCAST_MODE_NONE); } static void ixgbevf_mbx_process(struct rte_eth_dev *dev) @@ -7305,7 +8120,7 @@ static void ixgbevf_mbx_process(struct rte_eth_dev *dev) /* PF reset VF event */ if (in_msg == IXGBE_PF_CONTROL_MSG) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL); } static int @@ -7356,17 +8171,529 @@ ixgbevf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, ixgbevf_dev_interrupt_action(dev); } -static struct rte_driver rte_ixgbe_driver = { - .type = PMD_PDEV, - .init = rte_ixgbe_pmd_init, -}; +/** + * ixgbe_disable_sec_tx_path_generic - Stops the transmit data path + * @hw: pointer to hardware structure + * + * Stops the transmit data path and waits for the HW to internally empty + * the Tx security block + **/ +int ixgbe_disable_sec_tx_path_generic(struct ixgbe_hw *hw) +{ +#define IXGBE_MAX_SECTX_POLL 40 -static struct rte_driver rte_ixgbevf_driver = { - .type = PMD_PDEV, - .init = rte_ixgbevf_pmd_init, -}; + int i; + int sectxreg; + + sectxreg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + sectxreg |= IXGBE_SECTXCTRL_TX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, sectxreg); + for (i = 0; i < IXGBE_MAX_SECTX_POLL; i++) { + sectxreg = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT); + if (sectxreg & IXGBE_SECTXSTAT_SECTX_RDY) + break; + /* Use interrupt-safe sleep just in case */ + usec_delay(1000); + } + + /* For informational purposes only */ + if (i >= IXGBE_MAX_SECTX_POLL) + PMD_DRV_LOG(DEBUG, "Tx unit being enabled before security " + "path fully disabled. Continuing with init."); + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_enable_sec_tx_path_generic - Enables the transmit data path + * @hw: pointer to hardware structure + * + * Enables the transmit data path. + **/ +int ixgbe_enable_sec_tx_path_generic(struct ixgbe_hw *hw) +{ + uint32_t sectxreg; + + sectxreg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + sectxreg &= ~IXGBE_SECTXCTRL_TX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, sectxreg); + IXGBE_WRITE_FLUSH(hw); + + return IXGBE_SUCCESS; +} + +int +rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* Stop the data paths */ + if (ixgbe_disable_sec_rx_path(hw) != IXGBE_SUCCESS) + return -ENOTSUP; + /* + * Workaround: + * As no ixgbe_disable_sec_rx_path equivalent is + * implemented for tx in the base code, and we are + * not allowed to modify the base code in DPDK, so + * just call the hand-written one directly for now. + * The hardware support has been checked by + * ixgbe_disable_sec_rx_path(). + */ + ixgbe_disable_sec_tx_path_generic(hw); + + /* Enable Ethernet CRC (required by MACsec offload) */ + ctrl = IXGBE_READ_REG(hw, IXGBE_HLREG0); + ctrl |= IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_RXCRCSTRP; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, ctrl); + + /* Enable the TX and RX crypto engines */ + ctrl = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + ctrl &= ~IXGBE_SECTXCTRL_SECTX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + ctrl &= ~IXGBE_SECRXCTRL_SECRX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG); + ctrl &= ~IXGBE_SECTX_MINSECIFG_MASK; + ctrl |= 0x3; + IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, ctrl); + + /* Enable SA lookup */ + ctrl = IXGBE_READ_REG(hw, IXGBE_LSECTXCTRL); + ctrl &= ~IXGBE_LSECTXCTRL_EN_MASK; + ctrl |= en ? IXGBE_LSECTXCTRL_AUTH_ENCRYPT : + IXGBE_LSECTXCTRL_AUTH; + ctrl |= IXGBE_LSECTXCTRL_AISCI; + ctrl &= ~IXGBE_LSECTXCTRL_PNTHRSH_MASK; + ctrl |= IXGBE_MACSEC_PNTHRSH & IXGBE_LSECTXCTRL_PNTHRSH_MASK; + IXGBE_WRITE_REG(hw, IXGBE_LSECTXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_LSECRXCTRL); + ctrl &= ~IXGBE_LSECRXCTRL_EN_MASK; + ctrl |= IXGBE_LSECRXCTRL_STRICT << IXGBE_LSECRXCTRL_EN_SHIFT; + ctrl &= ~IXGBE_LSECRXCTRL_PLSH; + if (rp) + ctrl |= IXGBE_LSECRXCTRL_RP; + else + ctrl &= ~IXGBE_LSECRXCTRL_RP; + IXGBE_WRITE_REG(hw, IXGBE_LSECRXCTRL, ctrl); + + /* Start the data paths */ + ixgbe_enable_sec_rx_path(hw); + /* + * Workaround: + * As no ixgbe_enable_sec_rx_path equivalent is + * implemented for tx in the base code, and we are + * not allowed to modify the base code in DPDK, so + * just call the hand-written one directly for now. + */ + ixgbe_enable_sec_tx_path_generic(hw); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_disable(uint8_t port) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* Stop the data paths */ + if (ixgbe_disable_sec_rx_path(hw) != IXGBE_SUCCESS) + return -ENOTSUP; + /* + * Workaround: + * As no ixgbe_disable_sec_rx_path equivalent is + * implemented for tx in the base code, and we are + * not allowed to modify the base code in DPDK, so + * just call the hand-written one directly for now. + * The hardware support has been checked by + * ixgbe_disable_sec_rx_path(). + */ + ixgbe_disable_sec_tx_path_generic(hw); + + /* Disable the TX and RX crypto engines */ + ctrl = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + ctrl |= IXGBE_SECTXCTRL_SECTX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + ctrl |= IXGBE_SECRXCTRL_SECRX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, ctrl); + + /* Disable SA lookup */ + ctrl = IXGBE_READ_REG(hw, IXGBE_LSECTXCTRL); + ctrl &= ~IXGBE_LSECTXCTRL_EN_MASK; + ctrl |= IXGBE_LSECTXCTRL_DISABLE; + IXGBE_WRITE_REG(hw, IXGBE_LSECTXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_LSECRXCTRL); + ctrl &= ~IXGBE_LSECRXCTRL_EN_MASK; + ctrl |= IXGBE_LSECRXCTRL_DISABLE << IXGBE_LSECRXCTRL_EN_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_LSECRXCTRL, ctrl); + + /* Start the data paths */ + ixgbe_enable_sec_rx_path(hw); + /* + * Workaround: + * As no ixgbe_enable_sec_rx_path equivalent is + * implemented for tx in the base code, and we are + * not allowed to modify the base code in DPDK, so + * just call the hand-written one directly for now. + */ + ixgbe_enable_sec_tx_path_generic(hw); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + ctrl = mac[0] | (mac[1] << 8) | (mac[2] << 16) | (mac[3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXSCL, ctrl); + + ctrl = mac[4] | (mac[5] << 8); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXSCH, ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + ctrl = mac[0] | (mac[1] << 8) | (mac[2] << 16) | (mac[3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXSCL, ctrl); + + pi = rte_cpu_to_be_16(pi); + ctrl = mac[4] | (mac[5] << 8) | (pi << 16); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXSCH, ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an, + uint32_t pn, uint8_t *key) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl, i; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (idx != 0 && idx != 1) + return -EINVAL; + + if (an >= 4) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* Set the PN and key */ + pn = rte_cpu_to_be_32(pn); + if (idx == 0) { + IXGBE_WRITE_REG(hw, IXGBE_LSECTXPN0, pn); + + for (i = 0; i < 4; i++) { + ctrl = (key[i * 4 + 0] << 0) | + (key[i * 4 + 1] << 8) | + (key[i * 4 + 2] << 16) | + (key[i * 4 + 3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXKEY0(i), ctrl); + } + } else { + IXGBE_WRITE_REG(hw, IXGBE_LSECTXPN1, pn); + + for (i = 0; i < 4; i++) { + ctrl = (key[i * 4 + 0] << 0) | + (key[i * 4 + 1] << 8) | + (key[i * 4 + 2] << 16) | + (key[i * 4 + 3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXKEY1(i), ctrl); + } + } + + /* Set AN and select the SA */ + ctrl = (an << idx * 2) | (idx << 4); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXSA, ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an, + uint32_t pn, uint8_t *key) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl, i; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (idx != 0 && idx != 1) + return -EINVAL; + + if (an >= 4) + return -EINVAL; + + /* Set the PN */ + pn = rte_cpu_to_be_32(pn); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXPN(idx), pn); + + /* Set the key */ + for (i = 0; i < 4; i++) { + ctrl = (key[i * 4 + 0] << 0) | + (key[i * 4 + 1] << 8) | + (key[i * 4 + 2] << 16) | + (key[i * 4 + 3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXKEY(idx, i), ctrl); + } + + /* Set the AN and validate the SA */ + ctrl = an | (1 << 2); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXSA(idx), ctrl); + + return 0; +} + +/* restore n-tuple filter */ +static inline void +ixgbe_ntuple_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + struct ixgbe_5tuple_filter *node; + + TAILQ_FOREACH(node, &filter_info->fivetuple_list, entries) { + ixgbe_inject_5tuple_filter(dev, node); + } +} + +/* restore ethernet type filter */ +static inline void +ixgbe_ethertype_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + int i; + + for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { + if (filter_info->ethertype_mask & (1 << i)) { + IXGBE_WRITE_REG(hw, IXGBE_ETQF(i), + filter_info->ethertype_filters[i].etqf); + IXGBE_WRITE_REG(hw, IXGBE_ETQS(i), + filter_info->ethertype_filters[i].etqs); + IXGBE_WRITE_FLUSH(hw); + } + } +} + +/* restore SYN filter */ +static inline void +ixgbe_syn_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + uint32_t synqf; + + synqf = filter_info->syn_info; + + if (synqf & IXGBE_SYN_FILTER_ENABLE) { + IXGBE_WRITE_REG(hw, IXGBE_SYNQF, synqf); + IXGBE_WRITE_FLUSH(hw); + } +} + +/* restore L2 tunnel filter */ +static inline void +ixgbe_l2_tn_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_l2_tn_filter *node; + struct rte_eth_l2_tunnel_conf l2_tn_conf; + + TAILQ_FOREACH(node, &l2_tn_info->l2_tn_list, entries) { + l2_tn_conf.l2_tunnel_type = node->key.l2_tn_type; + l2_tn_conf.tunnel_id = node->key.tn_id; + l2_tn_conf.pool = node->pool; + (void)ixgbe_dev_l2_tunnel_filter_add(dev, &l2_tn_conf, TRUE); + } +} + +static int +ixgbe_filter_restore(struct rte_eth_dev *dev) +{ + ixgbe_ntuple_filter_restore(dev); + ixgbe_ethertype_filter_restore(dev); + ixgbe_syn_filter_restore(dev); + ixgbe_fdir_filter_restore(dev); + ixgbe_l2_tn_filter_restore(dev); + + return 0; +} + +static void +ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (l2_tn_info->e_tag_en) + (void)ixgbe_e_tag_enable(hw); + + if (l2_tn_info->e_tag_fwd_en) + (void)ixgbe_e_tag_forwarding_en_dis(dev, 1); + + (void)ixgbe_update_e_tag_eth_type(hw, l2_tn_info->e_tag_ether_type); +} + +/* remove all the n-tuple filters */ +void +ixgbe_clear_all_ntuple_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + struct ixgbe_5tuple_filter *p_5tuple; + + while ((p_5tuple = TAILQ_FIRST(&filter_info->fivetuple_list))) + ixgbe_remove_5tuple_filter(dev, p_5tuple); +} + +/* remove all the ether type filters */ +void +ixgbe_clear_all_ethertype_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + int i; + + for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { + if (filter_info->ethertype_mask & (1 << i) && + !filter_info->ethertype_filters[i].conf) { + (void)ixgbe_ethertype_filter_remove(filter_info, + (uint8_t)i); + IXGBE_WRITE_REG(hw, IXGBE_ETQF(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_ETQS(i), 0); + IXGBE_WRITE_FLUSH(hw); + } + } +} + +/* remove the SYN filter */ +void +ixgbe_clear_syn_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + + if (filter_info->syn_info & IXGBE_SYN_FILTER_ENABLE) { + filter_info->syn_info = 0; + + IXGBE_WRITE_REG(hw, IXGBE_SYNQF, 0); + IXGBE_WRITE_FLUSH(hw); + } +} + +/* remove all the L2 tunnel filters */ +int +ixgbe_clear_all_l2_tn_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_l2_tn_filter *l2_tn_filter; + struct rte_eth_l2_tunnel_conf l2_tn_conf; + int ret = 0; + + while ((l2_tn_filter = TAILQ_FIRST(&l2_tn_info->l2_tn_list))) { + l2_tn_conf.l2_tunnel_type = l2_tn_filter->key.l2_tn_type; + l2_tn_conf.tunnel_id = l2_tn_filter->key.tn_id; + l2_tn_conf.pool = l2_tn_filter->pool; + ret = ixgbe_dev_l2_tunnel_filter_del(dev, &l2_tn_conf); + if (ret < 0) + return ret; + } + + return 0; +} -PMD_REGISTER_DRIVER(rte_ixgbe_driver, ixgbe); -DRIVER_REGISTER_PCI_TABLE(ixgbe, pci_id_ixgbe_map); -PMD_REGISTER_DRIVER(rte_ixgbevf_driver, ixgbevf); -DRIVER_REGISTER_PCI_TABLE(ixgbevf, pci_id_ixgbevf_map); +RTE_PMD_REGISTER_PCI(net_ixgbe, rte_ixgbe_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_ixgbe, pci_id_ixgbe_map); +RTE_PMD_REGISTER_KMOD_DEP(net_ixgbe, "* igb_uio | uio_pci_generic | vfio"); +RTE_PMD_REGISTER_PCI(net_ixgbe_vf, rte_ixgbevf_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_ixgbe_vf, pci_id_ixgbevf_map); +RTE_PMD_REGISTER_KMOD_DEP(net_ixgbe_vf, "* igb_uio | vfio"); diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h b/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338e..680d5d93 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,11 +38,13 @@ #include "base/ixgbe_dcb_82598.h" #include "ixgbe_bypass.h" #include +#include /* need update link, bit flag */ #define IXGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0) #define IXGBE_FLAG_MAILBOX (uint32_t)(1 << 1) #define IXGBE_FLAG_PHY_INTERRUPT (uint32_t)(1 << 2) +#define IXGBE_FLAG_MACSEC (uint32_t)(1 << 3) /* * Defines that were not part of ixgbe_type.h as they are not used by the @@ -130,10 +132,16 @@ #define IXGBE_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define IXGBE_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IXGBE_SECTX_MINSECIFG_MASK 0x0000000F + +#define IXGBE_MACSEC_PNTHRSH 0xFFFFFE00 + +#define IXGBE_MAX_FDIR_FILTER_NUM (1024 * 32) +#define IXGBE_MAX_L2_TN_FILTER_NUM 128 + /* * Information about the fdir mode. */ - struct ixgbe_hw_fdir_mask { uint16_t vlan_tci_mask; uint32_t src_ipv4_mask; @@ -148,6 +156,28 @@ struct ixgbe_hw_fdir_mask { uint8_t tunnel_type_mask; }; +struct ixgbe_fdir_filter { + TAILQ_ENTRY(ixgbe_fdir_filter) entries; + union ixgbe_atr_input ixgbe_fdir; /* key of fdir filter*/ + uint32_t fdirflags; /* drop or forward */ + uint32_t fdirhash; /* hash value for fdir */ + uint8_t queue; /* assigned rx queue */ +}; + +/* list of fdir filters */ +TAILQ_HEAD(ixgbe_fdir_filter_list, ixgbe_fdir_filter); + +struct ixgbe_fdir_rule { + struct ixgbe_hw_fdir_mask mask; + union ixgbe_atr_input ixgbe_fdir; /* key of fdir filter*/ + bool b_spec; /* If TRUE, ixgbe_fdir, fdirflags, queue have meaning. */ + bool b_mask; /* If TRUE, mask has meaning. */ + enum rte_fdir_mode mode; /* IP, MAC VLAN, Tunnel */ + uint32_t fdirflags; /* drop or forward */ + uint32_t soft_id; /* an unique value for this rule */ + uint8_t queue; /* assigned rx queue */ +}; + struct ixgbe_hw_fdir_info { struct ixgbe_hw_fdir_mask mask; uint8_t flex_bytes_offset; @@ -159,12 +189,19 @@ struct ixgbe_hw_fdir_info { uint64_t remove; uint64_t f_add; uint64_t f_remove; + struct ixgbe_fdir_filter_list fdir_list; /* filter list*/ + /* store the pointers of the filters, index is the hash value. */ + struct ixgbe_fdir_filter **hash_map; + struct rte_hash *hash_handle; /* cuckoo hash handler */ + bool mask_added; /* If already got mask from consistent filter */ }; /* structure for interrupt relative data */ struct ixgbe_interrupt { uint32_t flags; uint32_t mask; + /*to save original mask during delayed handler */ + uint32_t mask_original; }; struct ixgbe_stat_mapping_registers { @@ -252,16 +289,131 @@ struct ixgbe_5tuple_filter { (RTE_ALIGN(IXGBE_MAX_FTQF_FILTERS, (sizeof(uint32_t) * NBBY)) / \ (sizeof(uint32_t) * NBBY)) +struct ixgbe_ethertype_filter { + uint16_t ethertype; + uint32_t etqf; + uint32_t etqs; + /** + * If this filter is added by configuration, + * it should not be removed. + */ + bool conf; +}; + /* * Structure to store filters' info. */ struct ixgbe_filter_info { uint8_t ethertype_mask; /* Bit mask for every used ethertype filter */ /* store used ethertype filters*/ - uint16_t ethertype_filters[IXGBE_MAX_ETQF_FILTERS]; + struct ixgbe_ethertype_filter ethertype_filters[IXGBE_MAX_ETQF_FILTERS]; /* Bit mask for every used 5tuple filter */ uint32_t fivetuple_mask[IXGBE_5TUPLE_ARRAY_SIZE]; struct ixgbe_5tuple_filter_list fivetuple_list; + /* store the SYN filter info */ + uint32_t syn_info; +}; + +struct ixgbe_l2_tn_key { + enum rte_eth_tunnel_type l2_tn_type; + uint32_t tn_id; +}; + +struct ixgbe_l2_tn_filter { + TAILQ_ENTRY(ixgbe_l2_tn_filter) entries; + struct ixgbe_l2_tn_key key; + uint32_t pool; +}; + +TAILQ_HEAD(ixgbe_l2_tn_filter_list, ixgbe_l2_tn_filter); + +struct ixgbe_l2_tn_info { + struct ixgbe_l2_tn_filter_list l2_tn_list; + struct ixgbe_l2_tn_filter **hash_map; + struct rte_hash *hash_handle; + bool e_tag_en; /* e-tag enabled */ + bool e_tag_fwd_en; /* e-tag based forwarding enabled */ + bool e_tag_ether_type; /* ether type for e-tag */ +}; + +struct rte_flow { + enum rte_filter_type filter_type; + void *rule; +}; +/* ntuple filter list structure */ +struct ixgbe_ntuple_filter_ele { + TAILQ_ENTRY(ixgbe_ntuple_filter_ele) entries; + struct rte_eth_ntuple_filter filter_info; +}; +/* ethertype filter list structure */ +struct ixgbe_ethertype_filter_ele { + TAILQ_ENTRY(ixgbe_ethertype_filter_ele) entries; + struct rte_eth_ethertype_filter filter_info; +}; +/* syn filter list structure */ +struct ixgbe_eth_syn_filter_ele { + TAILQ_ENTRY(ixgbe_eth_syn_filter_ele) entries; + struct rte_eth_syn_filter filter_info; +}; +/* fdir filter list structure */ +struct ixgbe_fdir_rule_ele { + TAILQ_ENTRY(ixgbe_fdir_rule_ele) entries; + struct ixgbe_fdir_rule filter_info; +}; +/* l2_tunnel filter list structure */ +struct ixgbe_eth_l2_tunnel_conf_ele { + TAILQ_ENTRY(ixgbe_eth_l2_tunnel_conf_ele) entries; + struct rte_eth_l2_tunnel_conf filter_info; +}; +/* ixgbe_flow memory list structure */ +struct ixgbe_flow_mem { + TAILQ_ENTRY(ixgbe_flow_mem) entries; + struct rte_flow *flow; +}; + +TAILQ_HEAD(ixgbe_ntuple_filter_list, ixgbe_ntuple_filter_ele); +struct ixgbe_ntuple_filter_list filter_ntuple_list; +TAILQ_HEAD(ixgbe_ethertype_filter_list, ixgbe_ethertype_filter_ele); +struct ixgbe_ethertype_filter_list filter_ethertype_list; +TAILQ_HEAD(ixgbe_syn_filter_list, ixgbe_eth_syn_filter_ele); +struct ixgbe_syn_filter_list filter_syn_list; +TAILQ_HEAD(ixgbe_fdir_rule_filter_list, ixgbe_fdir_rule_ele); +struct ixgbe_fdir_rule_filter_list filter_fdir_list; +TAILQ_HEAD(ixgbe_l2_tunnel_filter_list, ixgbe_eth_l2_tunnel_conf_ele); +struct ixgbe_l2_tunnel_filter_list filter_l2_tunnel_list; +TAILQ_HEAD(ixgbe_flow_mem_list, ixgbe_flow_mem); +struct ixgbe_flow_mem_list ixgbe_flow_list; + +/* + * Statistics counters collected by the MACsec + */ +struct ixgbe_macsec_stats { + /* TX port statistics */ + uint64_t out_pkts_untagged; + uint64_t out_pkts_encrypted; + uint64_t out_pkts_protected; + uint64_t out_octets_encrypted; + uint64_t out_octets_protected; + + /* RX port statistics */ + uint64_t in_pkts_untagged; + uint64_t in_pkts_badtag; + uint64_t in_pkts_nosci; + uint64_t in_pkts_unknownsci; + uint64_t in_octets_decrypted; + uint64_t in_octets_validated; + + /* RX SC statistics */ + uint64_t in_pkts_unchecked; + uint64_t in_pkts_delayed; + uint64_t in_pkts_late; + + /* RX SA statistics */ + uint64_t in_pkts_ok; + uint64_t in_pkts_invalid; + uint64_t in_pkts_notvalid; + uint64_t in_pkts_unusedsa; + uint64_t in_pkts_notusingsa; }; /* @@ -270,6 +422,7 @@ struct ixgbe_filter_info { struct ixgbe_adapter { struct ixgbe_hw hw; struct ixgbe_hw_stats stats; + struct ixgbe_macsec_stats macsec_stats; struct ixgbe_hw_fdir_info fdir; struct ixgbe_interrupt intr; struct ixgbe_stat_mapping_registers stat_mappings; @@ -283,6 +436,7 @@ struct ixgbe_adapter { struct ixgbe_bypass_info bps; #endif /* RTE_NIC_BYPASS */ struct ixgbe_filter_info filter; + struct ixgbe_l2_tn_info l2_tn; bool rx_bulk_alloc_allowed; bool rx_vec_allowed; @@ -291,12 +445,18 @@ struct ixgbe_adapter { struct rte_timecounter tx_tstamp_tc; }; +#define IXGBE_DEV_TO_PCI(eth_dev) \ + RTE_DEV_TO_PCI((eth_dev)->device) + #define IXGBE_DEV_PRIVATE_TO_HW(adapter)\ (&((struct ixgbe_adapter *)adapter)->hw) #define IXGBE_DEV_PRIVATE_TO_STATS(adapter) \ (&((struct ixgbe_adapter *)adapter)->stats) +#define IXGBE_DEV_PRIVATE_TO_MACSEC_STATS(adapter) \ + (&((struct ixgbe_adapter *)adapter)->macsec_stats) + #define IXGBE_DEV_PRIVATE_TO_INTR(adapter) \ (&((struct ixgbe_adapter *)adapter)->intr) @@ -327,6 +487,9 @@ struct ixgbe_adapter { #define IXGBE_DEV_PRIVATE_TO_FILTER_INFO(adapter) \ (&((struct ixgbe_adapter *)adapter)->filter) +#define IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(adapter) \ + (&((struct ixgbe_adapter *)adapter)->l2_tn) + /* * RX/TX function prototypes */ @@ -396,6 +559,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); @@ -412,10 +578,31 @@ uint32_t ixgbe_rssrk_reg_get(enum ixgbe_mac_type mac_type, uint8_t i); bool ixgbe_rss_update_sp(enum ixgbe_mac_type mac_type); +int ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev, + struct rte_eth_ntuple_filter *filter, + bool add); +int ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, + struct rte_eth_ethertype_filter *filter, + bool add); +int ixgbe_syn_filter_set(struct rte_eth_dev *dev, + struct rte_eth_syn_filter *filter, + bool add); +int +ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + bool restore); +int +ixgbe_dev_l2_tunnel_filter_del(struct rte_eth_dev *dev, + struct rte_eth_l2_tunnel_conf *l2_tunnel); +void ixgbe_filterlist_flush(void); /* * Flow director function prototypes */ int ixgbe_fdir_configure(struct rte_eth_dev *dev); +int ixgbe_fdir_set_input_mask(struct rte_eth_dev *dev); +int ixgbe_fdir_filter_program(struct rte_eth_dev *dev, + struct ixgbe_fdir_rule *rule, + bool del, bool update); void ixgbe_configure_dcb(struct rte_eth_dev *dev); @@ -442,4 +629,69 @@ uint32_t ixgbe_convert_vm_rx_mask_to_val(uint16_t rx_mask, uint32_t orig_val); int ixgbe_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg); +void ixgbe_fdir_filter_restore(struct rte_eth_dev *dev); +int ixgbe_clear_all_fdir_filter(struct rte_eth_dev *dev); + +extern const struct rte_flow_ops ixgbe_flow_ops; + +void ixgbe_clear_all_ethertype_filter(struct rte_eth_dev *dev); +void ixgbe_clear_all_ntuple_filter(struct rte_eth_dev *dev); +void ixgbe_clear_syn_filter(struct rte_eth_dev *dev); +int ixgbe_clear_all_l2_tn_filter(struct rte_eth_dev *dev); + +int ixgbe_disable_sec_tx_path_generic(struct ixgbe_hw *hw); + +int ixgbe_enable_sec_tx_path_generic(struct ixgbe_hw *hw); + +static inline int +ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info, + uint16_t ethertype) +{ + int i; + + for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { + if (filter_info->ethertype_filters[i].ethertype == ethertype && + (filter_info->ethertype_mask & (1 << i))) + return i; + } + return -1; +} + +static inline int +ixgbe_ethertype_filter_insert(struct ixgbe_filter_info *filter_info, + struct ixgbe_ethertype_filter *ethertype_filter) +{ + int i; + + for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { + if (!(filter_info->ethertype_mask & (1 << i))) { + filter_info->ethertype_mask |= 1 << i; + filter_info->ethertype_filters[i].ethertype = + ethertype_filter->ethertype; + filter_info->ethertype_filters[i].etqf = + ethertype_filter->etqf; + filter_info->ethertype_filters[i].etqs = + ethertype_filter->etqs; + filter_info->ethertype_filters[i].conf = + ethertype_filter->conf; + return i; + } + } + return -1; +} + +static inline int +ixgbe_ethertype_filter_remove(struct ixgbe_filter_info *filter_info, + uint8_t idx) +{ + if (idx >= IXGBE_MAX_ETQF_FILTERS) + return -1; + filter_info->ethertype_mask &= ~(1 << idx); + filter_info->ethertype_filters[idx].ethertype = 0; + filter_info->ethertype_filters[idx].etqf = 0; + filter_info->ethertype_filters[idx].etqs = 0; + filter_info->ethertype_filters[idx].etqs = FALSE; + return idx; +} + #endif /* _IXGBE_ETHDEV_H_ */ diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c b/src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c index c38ac97b..3b9d60ca 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -111,10 +112,8 @@ static int fdir_erase_filter_82599(struct ixgbe_hw *hw, uint32_t fdirhash); static int fdir_set_input_mask(struct rte_eth_dev *dev, const struct rte_eth_fdir_masks *input_mask); -static int fdir_set_input_mask_82599(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask); -static int fdir_set_input_mask_x550(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask); +static int fdir_set_input_mask_82599(struct rte_eth_dev *dev); +static int fdir_set_input_mask_x550(struct rte_eth_dev *dev); static int ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev, const struct rte_eth_fdir_flex_conf *conf, uint32_t *fdirctrl); static int fdir_enable_82599(struct ixgbe_hw *hw, uint32_t fdirctrl); @@ -248,13 +247,8 @@ configure_fdir_flags(const struct rte_fdir_conf *conf, uint32_t *fdirctrl) return -EINVAL; }; -#define TREX_PATCH -#ifdef TREX_PATCH - *fdirctrl |= (conf->flexbytes_offset << IXGBE_FDIRCTRL_FLEX_SHIFT); -#else *fdirctrl |= (IXGBE_DEFAULT_FLEXBYTES_OFFSET / sizeof(uint16_t)) << IXGBE_FDIRCTRL_FLEX_SHIFT; -#endif if (conf->mode >= RTE_FDIR_MODE_PERFECT && conf->mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) { @@ -299,8 +293,7 @@ reverse_fdir_bitmasks(uint16_t hi_dword, uint16_t lo_dword) * but makes use of the rte_fdir_masks structure to see which bits to set. */ static int -fdir_set_input_mask_82599(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask) +fdir_set_input_mask_82599(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_hw_fdir_info *info = @@ -312,8 +305,6 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev, uint32_t fdirm = IXGBE_FDIRM_POOL | IXGBE_FDIRM_DIPv6 | IXGBE_FDIRM_FLEX; uint32_t fdirtcpm; /* TCP source and destination port masks. */ uint32_t fdiripv6m; /* IPv6 source and destination masks. */ - uint16_t dst_ipv6m = 0; - uint16_t src_ipv6m = 0; volatile uint32_t *reg; PMD_INIT_FUNC_TRACE(); @@ -324,31 +315,30 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev, * a VLAN of 0 is unspecified, so mask that out as well. L4type * cannot be masked out in this implementation. */ - if (input_mask->dst_port_mask == 0 && input_mask->src_port_mask == 0) + if (info->mask.dst_port_mask == 0 && info->mask.src_port_mask == 0) /* use the L4 protocol mask for raw IPv4/IPv6 traffic */ fdirm |= IXGBE_FDIRM_L4P; - if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0x0FFF)) + if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0x0FFF)) /* mask VLAN Priority */ fdirm |= IXGBE_FDIRM_VLANP; - else if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0xE000)) + else if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0xE000)) /* mask VLAN ID */ fdirm |= IXGBE_FDIRM_VLANID; - else if (input_mask->vlan_tci_mask == 0) + else if (info->mask.vlan_tci_mask == 0) /* mask VLAN ID and Priority */ fdirm |= IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP; - else if (input_mask->vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) { + else if (info->mask.vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) { PMD_INIT_LOG(ERR, "invalid vlan_tci_mask"); return -EINVAL; } - info->mask.vlan_tci_mask = input_mask->vlan_tci_mask; IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); /* store the TCP/UDP port masks, bit reversed from port layout */ fdirtcpm = reverse_fdir_bitmasks( - rte_be_to_cpu_16(input_mask->dst_port_mask), - rte_be_to_cpu_16(input_mask->src_port_mask)); + rte_be_to_cpu_16(info->mask.dst_port_mask), + rte_be_to_cpu_16(info->mask.src_port_mask)); /* write all the same so that UDP, TCP and SCTP use the same mask * (little-endian) @@ -356,30 +346,23 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev, IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm); IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm); IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, ~fdirtcpm); - info->mask.src_port_mask = input_mask->src_port_mask; - info->mask.dst_port_mask = input_mask->dst_port_mask; /* Store source and destination IPv4 masks (big-endian), * can not use IXGBE_WRITE_REG. */ reg = IXGBE_PCI_REG_ADDR(hw, IXGBE_FDIRSIP4M); - *reg = ~(input_mask->ipv4_mask.src_ip); + *reg = ~(info->mask.src_ipv4_mask); reg = IXGBE_PCI_REG_ADDR(hw, IXGBE_FDIRDIP4M); - *reg = ~(input_mask->ipv4_mask.dst_ip); - info->mask.src_ipv4_mask = input_mask->ipv4_mask.src_ip; - info->mask.dst_ipv4_mask = input_mask->ipv4_mask.dst_ip; + *reg = ~(info->mask.dst_ipv4_mask); if (dev->data->dev_conf.fdir_conf.mode == RTE_FDIR_MODE_SIGNATURE) { /* * Store source and destination IPv6 masks (bit reversed) */ - IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.src_ip, src_ipv6m); - IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.dst_ip, dst_ipv6m); - fdiripv6m = (dst_ipv6m << 16) | src_ipv6m; + fdiripv6m = (info->mask.dst_ipv6_mask << 16) | + info->mask.src_ipv6_mask; IXGBE_WRITE_REG(hw, IXGBE_FDIRIP6M, ~fdiripv6m); - info->mask.src_ipv6_mask = src_ipv6m; - info->mask.dst_ipv6_mask = dst_ipv6m; } return IXGBE_SUCCESS; @@ -390,8 +373,7 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev, * but makes use of the rte_fdir_masks structure to see which bits to set. */ static int -fdir_set_input_mask_x550(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask) +fdir_set_input_mask_x550(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_hw_fdir_info *info = @@ -414,20 +396,19 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, /* some bits must be set for mac vlan or tunnel mode */ fdirm |= IXGBE_FDIRM_L4P | IXGBE_FDIRM_L3P; - if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0x0FFF)) + if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0x0FFF)) /* mask VLAN Priority */ fdirm |= IXGBE_FDIRM_VLANP; - else if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0xE000)) + else if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0xE000)) /* mask VLAN ID */ fdirm |= IXGBE_FDIRM_VLANID; - else if (input_mask->vlan_tci_mask == 0) + else if (info->mask.vlan_tci_mask == 0) /* mask VLAN ID and Priority */ fdirm |= IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP; - else if (input_mask->vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) { + else if (info->mask.vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) { PMD_INIT_LOG(ERR, "invalid vlan_tci_mask"); return -EINVAL; } - info->mask.vlan_tci_mask = input_mask->vlan_tci_mask; IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); @@ -437,13 +418,12 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, fdiripv6m |= IXGBE_FDIRIP6M_TUNNEL_TYPE | IXGBE_FDIRIP6M_TNI_VNI; - mac_mask = input_mask->mac_addr_byte_mask; - fdiripv6m |= (mac_mask << IXGBE_FDIRIP6M_INNER_MAC_SHIFT) - & IXGBE_FDIRIP6M_INNER_MAC; - info->mask.mac_addr_byte_mask = input_mask->mac_addr_byte_mask; - if (mode == RTE_FDIR_MODE_PERFECT_TUNNEL) { - switch (input_mask->tunnel_type_mask) { + mac_mask = info->mask.mac_addr_byte_mask; + fdiripv6m |= (mac_mask << IXGBE_FDIRIP6M_INNER_MAC_SHIFT) + & IXGBE_FDIRIP6M_INNER_MAC; + + switch (info->mask.tunnel_type_mask) { case 0: /* Mask turnnel type */ fdiripv6m |= IXGBE_FDIRIP6M_TUNNEL_TYPE; @@ -454,10 +434,8 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, PMD_INIT_LOG(ERR, "invalid tunnel_type_mask"); return -EINVAL; } - info->mask.tunnel_type_mask = - input_mask->tunnel_type_mask; - switch (rte_be_to_cpu_32(input_mask->tunnel_id_mask)) { + switch (rte_be_to_cpu_32(info->mask.tunnel_id_mask)) { case 0x0: /* Mask vxlan id */ fdiripv6m |= IXGBE_FDIRIP6M_TNI_VNI; @@ -471,8 +449,6 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, PMD_INIT_LOG(ERR, "invalid tunnel_id_mask"); return -EINVAL; } - info->mask.tunnel_id_mask = - input_mask->tunnel_id_mask; } IXGBE_WRITE_REG(hw, IXGBE_FDIRIP6M, fdiripv6m); @@ -486,22 +462,90 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, } static int -fdir_set_input_mask(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask) +ixgbe_fdir_store_input_mask_82599(struct rte_eth_dev *dev, + const struct rte_eth_fdir_masks *input_mask) +{ + struct ixgbe_hw_fdir_info *info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + uint16_t dst_ipv6m = 0; + uint16_t src_ipv6m = 0; + + memset(&info->mask, 0, sizeof(struct ixgbe_hw_fdir_mask)); + info->mask.vlan_tci_mask = input_mask->vlan_tci_mask; + info->mask.src_port_mask = input_mask->src_port_mask; + info->mask.dst_port_mask = input_mask->dst_port_mask; + info->mask.src_ipv4_mask = input_mask->ipv4_mask.src_ip; + info->mask.dst_ipv4_mask = input_mask->ipv4_mask.dst_ip; + IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.src_ip, src_ipv6m); + IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.dst_ip, dst_ipv6m); + info->mask.src_ipv6_mask = src_ipv6m; + info->mask.dst_ipv6_mask = dst_ipv6m; + + return IXGBE_SUCCESS; +} + +static int +ixgbe_fdir_store_input_mask_x550(struct rte_eth_dev *dev, + const struct rte_eth_fdir_masks *input_mask) +{ + struct ixgbe_hw_fdir_info *info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + + memset(&info->mask, 0, sizeof(struct ixgbe_hw_fdir_mask)); + info->mask.vlan_tci_mask = input_mask->vlan_tci_mask; + info->mask.mac_addr_byte_mask = input_mask->mac_addr_byte_mask; + info->mask.tunnel_type_mask = input_mask->tunnel_type_mask; + info->mask.tunnel_id_mask = input_mask->tunnel_id_mask; + + return IXGBE_SUCCESS; +} + +static int +ixgbe_fdir_store_input_mask(struct rte_eth_dev *dev, + const struct rte_eth_fdir_masks *input_mask) +{ + enum rte_fdir_mode mode = dev->data->dev_conf.fdir_conf.mode; + + if (mode >= RTE_FDIR_MODE_SIGNATURE && + mode <= RTE_FDIR_MODE_PERFECT) + return ixgbe_fdir_store_input_mask_82599(dev, input_mask); + else if (mode >= RTE_FDIR_MODE_PERFECT_MAC_VLAN && + mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) + return ixgbe_fdir_store_input_mask_x550(dev, input_mask); + + PMD_DRV_LOG(ERR, "Not supported fdir mode - %d!", mode); + return -ENOTSUP; +} + +int +ixgbe_fdir_set_input_mask(struct rte_eth_dev *dev) { enum rte_fdir_mode mode = dev->data->dev_conf.fdir_conf.mode; if (mode >= RTE_FDIR_MODE_SIGNATURE && mode <= RTE_FDIR_MODE_PERFECT) - return fdir_set_input_mask_82599(dev, input_mask); + return fdir_set_input_mask_82599(dev); else if (mode >= RTE_FDIR_MODE_PERFECT_MAC_VLAN && mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) - return fdir_set_input_mask_x550(dev, input_mask); + return fdir_set_input_mask_x550(dev); PMD_DRV_LOG(ERR, "Not supported fdir mode - %d!", mode); return -ENOTSUP; } +static int +fdir_set_input_mask(struct rte_eth_dev *dev, + const struct rte_eth_fdir_masks *input_mask) +{ + int ret; + + ret = ixgbe_fdir_store_input_mask(dev, input_mask); + if (ret) + return ret; + + return ixgbe_fdir_set_input_mask(dev); +} + /* * ixgbe_check_fdir_flex_conf -check if the flex payload and mask configuration * arguments are valid @@ -520,7 +564,7 @@ ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev, uint16_t i; fdirm = IXGBE_READ_REG(hw, IXGBE_FDIRM); -#ifndef TREX_PATCH + if (conf == NULL) { PMD_DRV_LOG(ERR, "NULL pointer."); return -EINVAL; @@ -561,11 +605,6 @@ ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev, return -EINVAL; } } -#else - fdirm &= ~IXGBE_FDIRM_FLEX; - flexbytes = 1; - // fdirctrl gets flex_bytes_offset in configure_fdir_flags -#endif IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); info->mask.flex_bytes_mask = flexbytes ? UINT16_MAX : 0; info->flex_bytes_offset = (uint8_t)((*fdirctrl & @@ -597,9 +636,6 @@ ixgbe_fdir_configure(struct rte_eth_dev *dev) hw->mac.type != ixgbe_mac_X550EM_x && hw->mac.type != ixgbe_mac_X550EM_a && mode != RTE_FDIR_MODE_SIGNATURE && -#ifdef TREX_PATCH - mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN && -#endif mode != RTE_FDIR_MODE_PERFECT) return -ENOSYS; @@ -1088,31 +1124,110 @@ fdir_erase_filter_82599(struct ixgbe_hw *hw, uint32_t fdirhash) } -/* - * ixgbe_add_del_fdir_filter - add or remove a flow diretor filter. - * @dev: pointer to the structure rte_eth_dev - * @fdir_filter: fdir filter entry - * @del: 1 - delete, 0 - add - * @update: 1 - update - */ +static inline struct ixgbe_fdir_filter * +ixgbe_fdir_filter_lookup(struct ixgbe_hw_fdir_info *fdir_info, + union ixgbe_atr_input *key) +{ + int ret; + + ret = rte_hash_lookup(fdir_info->hash_handle, (const void *)key); + if (ret < 0) + return NULL; + + return fdir_info->hash_map[ret]; +} + +static inline int +ixgbe_insert_fdir_filter(struct ixgbe_hw_fdir_info *fdir_info, + struct ixgbe_fdir_filter *fdir_filter) +{ + int ret; + + ret = rte_hash_add_key(fdir_info->hash_handle, + &fdir_filter->ixgbe_fdir); + + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert fdir filter to hash table %d!", + ret); + return ret; + } + + fdir_info->hash_map[ret] = fdir_filter; + + TAILQ_INSERT_TAIL(&fdir_info->fdir_list, fdir_filter, entries); + + return 0; +} + +static inline int +ixgbe_remove_fdir_filter(struct ixgbe_hw_fdir_info *fdir_info, + union ixgbe_atr_input *key) +{ + int ret; + struct ixgbe_fdir_filter *fdir_filter; + + ret = rte_hash_del_key(fdir_info->hash_handle, key); + + if (ret < 0) { + PMD_DRV_LOG(ERR, "No such fdir filter to delete %d!", ret); + return ret; + } + + fdir_filter = fdir_info->hash_map[ret]; + fdir_info->hash_map[ret] = NULL; + + TAILQ_REMOVE(&fdir_info->fdir_list, fdir_filter, entries); + rte_free(fdir_filter); + + return 0; +} + static int -ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, - const struct rte_eth_fdir_filter *fdir_filter, +ixgbe_interpret_fdir_filter(struct rte_eth_dev *dev, + const struct rte_eth_fdir_filter *fdir_filter, + struct ixgbe_fdir_rule *rule) +{ + enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode; + int err; + + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + + err = ixgbe_fdir_filter_to_atr_input(fdir_filter, + &rule->ixgbe_fdir, + fdir_mode); + if (err) + return err; + + rule->mode = fdir_mode; + if (fdir_filter->action.behavior == RTE_ETH_FDIR_REJECT) + rule->fdirflags = IXGBE_FDIRCMD_DROP; + rule->queue = fdir_filter->action.rx_queue; + rule->soft_id = fdir_filter->soft_id; + + return 0; +} + +int +ixgbe_fdir_filter_program(struct rte_eth_dev *dev, + struct ixgbe_fdir_rule *rule, bool del, bool update) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t fdircmd_flags; uint32_t fdirhash; - union ixgbe_atr_input input; uint8_t queue; bool is_perfect = FALSE; int err; struct ixgbe_hw_fdir_info *info = IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode; + struct ixgbe_fdir_filter *node; + bool add_node = FALSE; - if (fdir_mode == RTE_FDIR_MODE_NONE) + if (fdir_mode == RTE_FDIR_MODE_NONE || + fdir_mode != rule->mode) return -ENOTSUP; /* @@ -1125,7 +1240,7 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X550EM_x || hw->mac.type == ixgbe_mac_X550EM_a) && - (fdir_filter->input.flow_type == + (rule->ixgbe_fdir.formatted.flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) && (info->mask.src_port_mask != 0 || info->mask.dst_port_mask != 0)) { @@ -1139,31 +1254,26 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, fdir_mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) is_perfect = TRUE; - memset(&input, 0, sizeof(input)); - - err = ixgbe_fdir_filter_to_atr_input(fdir_filter, &input, - fdir_mode); - if (err) - return err; - if (is_perfect) { -#ifndef TREX_PATCH - // No reason not to use IPV6 in perfect filters. It is working. - if (input.formatted.flow_type & IXGBE_ATR_L4TYPE_IPV6_MASK) { + if (rule->ixgbe_fdir.formatted.flow_type & + IXGBE_ATR_L4TYPE_IPV6_MASK) { PMD_DRV_LOG(ERR, "IPv6 is not supported in" " perfect mode!"); return -ENOTSUP; } -#endif - fdirhash = atr_compute_perfect_hash_82599(&input, + fdirhash = atr_compute_perfect_hash_82599(&rule->ixgbe_fdir, dev->data->dev_conf.fdir_conf.pballoc); - fdirhash |= fdir_filter->soft_id << + fdirhash |= rule->soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT; } else - fdirhash = atr_compute_sig_hash_82599(&input, + fdirhash = atr_compute_sig_hash_82599(&rule->ixgbe_fdir, dev->data->dev_conf.fdir_conf.pballoc); if (del) { + err = ixgbe_remove_fdir_filter(info, &rule->ixgbe_fdir); + if (err < 0) + return err; + err = fdir_erase_filter_82599(hw, fdirhash); if (err < 0) PMD_DRV_LOG(ERR, "Fail to delete FDIR filter!"); @@ -1173,7 +1283,7 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, } /* add or update an fdir filter*/ fdircmd_flags = (update) ? IXGBE_FDIRCMD_FILTER_UPDATE : 0; - if (fdir_filter->action.behavior == RTE_ETH_FDIR_REJECT) { + if (rule->fdirflags & IXGBE_FDIRCMD_DROP) { if (is_perfect) { queue = dev->data->dev_conf.fdir_conf.drop_queue; fdircmd_flags |= IXGBE_FDIRCMD_DROP; @@ -1182,28 +1292,86 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, " signature mode."); return -EINVAL; } - } else if (fdir_filter->action.behavior == RTE_ETH_FDIR_ACCEPT && - fdir_filter->action.rx_queue < IXGBE_MAX_RX_QUEUE_NUM) - queue = (uint8_t)fdir_filter->action.rx_queue; + } else if (rule->queue < IXGBE_MAX_RX_QUEUE_NUM) + queue = (uint8_t)rule->queue; else return -EINVAL; + node = ixgbe_fdir_filter_lookup(info, &rule->ixgbe_fdir); + if (node) { + if (update) { + node->fdirflags = fdircmd_flags; + node->fdirhash = fdirhash; + node->queue = queue; + } else { + PMD_DRV_LOG(ERR, "Conflict with existing fdir filter!"); + return -EINVAL; + } + } else { + add_node = TRUE; + node = rte_zmalloc("ixgbe_fdir", + sizeof(struct ixgbe_fdir_filter), + 0); + if (!node) + return -ENOMEM; + (void)rte_memcpy(&node->ixgbe_fdir, + &rule->ixgbe_fdir, + sizeof(union ixgbe_atr_input)); + node->fdirflags = fdircmd_flags; + node->fdirhash = fdirhash; + node->queue = queue; + + err = ixgbe_insert_fdir_filter(info, node); + if (err < 0) { + rte_free(node); + return err; + } + } + if (is_perfect) { - err = fdir_write_perfect_filter_82599(hw, &input, queue, - fdircmd_flags, fdirhash, - fdir_mode); + err = fdir_write_perfect_filter_82599(hw, &rule->ixgbe_fdir, + queue, fdircmd_flags, + fdirhash, fdir_mode); } else { - err = fdir_add_signature_filter_82599(hw, &input, queue, - fdircmd_flags, fdirhash); + err = fdir_add_signature_filter_82599(hw, &rule->ixgbe_fdir, + queue, fdircmd_flags, + fdirhash); } - if (err < 0) + if (err < 0) { PMD_DRV_LOG(ERR, "Fail to add FDIR filter!"); - else + + if (add_node) + (void)ixgbe_remove_fdir_filter(info, &rule->ixgbe_fdir); + } else { PMD_DRV_LOG(DEBUG, "Success to add FDIR filter"); + } return err; } +/* ixgbe_add_del_fdir_filter - add or remove a flow diretor filter. + * @dev: pointer to the structure rte_eth_dev + * @fdir_filter: fdir filter entry + * @del: 1 - delete, 0 - add + * @update: 1 - update + */ +static int +ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, + const struct rte_eth_fdir_filter *fdir_filter, + bool del, + bool update) +{ + struct ixgbe_fdir_rule rule; + int err; + + err = ixgbe_interpret_fdir_filter(dev, fdir_filter, &rule); + + if (err) + return err; + + return ixgbe_fdir_filter_program(dev, &rule, del, update); +} + static int ixgbe_fdir_flush(struct rte_eth_dev *dev) { @@ -1394,3 +1562,66 @@ ixgbe_fdir_ctrl_func(struct rte_eth_dev *dev, } return ret; } + +/* restore flow director filter */ +void +ixgbe_fdir_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + struct ixgbe_fdir_filter *node; + bool is_perfect = FALSE; + enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode; + + if (fdir_mode >= RTE_FDIR_MODE_PERFECT && + fdir_mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) + is_perfect = TRUE; + + if (is_perfect) { + TAILQ_FOREACH(node, &fdir_info->fdir_list, entries) { + (void)fdir_write_perfect_filter_82599(hw, + &node->ixgbe_fdir, + node->queue, + node->fdirflags, + node->fdirhash, + fdir_mode); + } + } else { + TAILQ_FOREACH(node, &fdir_info->fdir_list, entries) { + (void)fdir_add_signature_filter_82599(hw, + &node->ixgbe_fdir, + node->queue, + node->fdirflags, + node->fdirhash); + } + } +} + +/* remove all the flow director filters */ +int +ixgbe_clear_all_fdir_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + struct ixgbe_fdir_filter *fdir_filter; + struct ixgbe_fdir_filter *filter_flag; + int ret = 0; + + /* flush flow director */ + rte_hash_reset(fdir_info->hash_handle); + memset(fdir_info->hash_map, 0, + sizeof(struct ixgbe_fdir_filter *) * IXGBE_MAX_FDIR_FILTER_NUM); + filter_flag = TAILQ_FIRST(&fdir_info->fdir_list); + while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) { + TAILQ_REMOVE(&fdir_info->fdir_list, + fdir_filter, + entries); + rte_free(fdir_filter); + } + + if (filter_flag != NULL) + ret = ixgbe_fdir_flush(dev); + + return ret; +} diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_flow.c b/src/dpdk/drivers/net/ixgbe/ixgbe_flow.c new file mode 100644 index 00000000..82aceed7 --- /dev/null +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_flow.c @@ -0,0 +1,2878 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ixgbe_logs.h" +#include "base/ixgbe_api.h" +#include "base/ixgbe_vf.h" +#include "base/ixgbe_common.h" +#include "ixgbe_ethdev.h" +#include "ixgbe_bypass.h" +#include "ixgbe_rxtx.h" +#include "base/ixgbe_type.h" +#include "base/ixgbe_phy.h" +#include "rte_pmd_ixgbe.h" + +static int ixgbe_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error); +static int +cons_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error); +static int +ixgbe_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error); +static int +cons_parse_ethertype_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item *pattern, + const struct rte_flow_action *actions, + struct rte_eth_ethertype_filter *filter, + struct rte_flow_error *error); +static int +ixgbe_parse_ethertype_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ethertype_filter *filter, + struct rte_flow_error *error); +static int +cons_parse_syn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_syn_filter *filter, + struct rte_flow_error *error); +static int +ixgbe_parse_syn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_syn_filter *filter, + struct rte_flow_error *error); +static int +cons_parse_l2_tn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_l2_tunnel_conf *filter, + struct rte_flow_error *error); +static int +ixgbe_validate_l2_tn_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_l2_tunnel_conf *rule, + struct rte_flow_error *error); +static int +ixgbe_validate_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error); +static int +ixgbe_parse_fdir_filter_normal(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error); +static int +ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error); +static int +ixgbe_parse_fdir_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error); +static int +ixgbe_flow_validate(__rte_unused struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +static struct rte_flow *ixgbe_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +static int ixgbe_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error); + +const struct rte_flow_ops ixgbe_flow_ops = { + ixgbe_flow_validate, + ixgbe_flow_create, + ixgbe_flow_destroy, + ixgbe_flow_flush, + NULL, +}; + +#define IXGBE_MIN_N_TUPLE_PRIO 1 +#define IXGBE_MAX_N_TUPLE_PRIO 7 +#define NEXT_ITEM_OF_PATTERN(item, pattern, index)\ + do { \ + item = pattern + index;\ + while (item->type == RTE_FLOW_ITEM_TYPE_VOID) {\ + index++; \ + item = pattern + index; \ + } \ + } while (0) + +#define NEXT_ITEM_OF_ACTION(act, actions, index)\ + do { \ + act = actions + index; \ + while (act->type == RTE_FLOW_ACTION_TYPE_VOID) {\ + index++; \ + act = actions + index; \ + } \ + } while (0) + +/** + * Please aware there's an asumption for all the parsers. + * rte_flow_item is using big endian, rte_flow_attr and + * rte_flow_action are using CPU order. + * Because the pattern is used to describe the packets, + * normally the packets should use network order. + */ + +/** + * Parse the rule to see if it is a n-tuple rule. + * And get the n-tuple filter info BTW. + * pattern: + * The first not void item can be ETH or IPV4. + * The second not void item must be IPV4 if the first one is ETH. + * The third not void item must be UDP or TCP. + * The next not void item must be END. + * action: + * The first not void action should be QUEUE. + * The next not void action should be END. + * pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4 src_addr 192.168.1.20 0xFFFFFFFF + * dst_addr 192.167.3.50 0xFFFFFFFF + * next_proto_id 17 0xFF + * UDP/TCP src_port 80 0xFFFF + * dst_port 80 0xFFFF + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +cons_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_action *act; + const struct rte_flow_item_ipv4 *ipv4_spec; + const struct rte_flow_item_ipv4 *ipv4_mask; + const struct rte_flow_item_tcp *tcp_spec; + const struct rte_flow_item_tcp *tcp_mask; + const struct rte_flow_item_udp *udp_spec; + const struct rte_flow_item_udp *udp_mask; + uint32_t index; + + if (!pattern) { + rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /* parse pattern */ + index = 0; + + /* the first not void item can be MAC or IPv4 */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + + if (item->type != RTE_FLOW_ITEM_TYPE_ETH && + item->type != RTE_FLOW_ITEM_TYPE_IPV4) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + /* Skip Ethernet */ + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, + EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + + } + /* if the first item is MAC, the content should be NULL */ + if (item->spec || item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + /* check if the next not void item is IPv4 */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) { + rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + } + + /* get the IPv4 info */ + if (!item->spec || !item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ntuple mask"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + + } + + ipv4_mask = (const struct rte_flow_item_ipv4 *)item->mask; + /** + * Only support src & dst addresses, protocol, + * others should be masked. + */ + if (ipv4_mask->hdr.version_ihl || + ipv4_mask->hdr.type_of_service || + ipv4_mask->hdr.total_length || + ipv4_mask->hdr.packet_id || + ipv4_mask->hdr.fragment_offset || + ipv4_mask->hdr.time_to_live || + ipv4_mask->hdr.hdr_checksum) { + rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + filter->dst_ip_mask = ipv4_mask->hdr.dst_addr; + filter->src_ip_mask = ipv4_mask->hdr.src_addr; + filter->proto_mask = ipv4_mask->hdr.next_proto_id; + + ipv4_spec = (const struct rte_flow_item_ipv4 *)item->spec; + filter->dst_ip = ipv4_spec->hdr.dst_addr; + filter->src_ip = ipv4_spec->hdr.src_addr; + filter->proto = ipv4_spec->hdr.next_proto_id; + + /* check if the next not void item is TCP or UDP */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_TCP && + item->type != RTE_FLOW_ITEM_TYPE_UDP) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + /* get the TCP/UDP info */ + if (!item->spec || !item->mask) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ntuple mask"); + return -rte_errno; + } + + /*Not supported last point for range*/ + if (item->last) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + + } + + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { + tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + + /** + * Only support src & dst ports, tcp flags, + * others should be masked. + */ + if (tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || + tcp_mask->hdr.data_off || + tcp_mask->hdr.rx_win || + tcp_mask->hdr.cksum || + tcp_mask->hdr.tcp_urp) { + memset(filter, 0, + sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + filter->dst_port_mask = tcp_mask->hdr.dst_port; + filter->src_port_mask = tcp_mask->hdr.src_port; + if (tcp_mask->hdr.tcp_flags == 0xFF) { + filter->flags |= RTE_NTUPLE_FLAGS_TCP_FLAG; + } else if (!tcp_mask->hdr.tcp_flags) { + filter->flags &= ~RTE_NTUPLE_FLAGS_TCP_FLAG; + } else { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + filter->dst_port = tcp_spec->hdr.dst_port; + filter->src_port = tcp_spec->hdr.src_port; + filter->tcp_flags = tcp_spec->hdr.tcp_flags; + } else { + udp_mask = (const struct rte_flow_item_udp *)item->mask; + + /** + * Only support src & dst ports, + * others should be masked. + */ + if (udp_mask->hdr.dgram_len || + udp_mask->hdr.dgram_cksum) { + memset(filter, 0, + sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + filter->dst_port_mask = udp_mask->hdr.dst_port; + filter->src_port_mask = udp_mask->hdr.src_port; + + udp_spec = (const struct rte_flow_item_udp *)item->spec; + filter->dst_port = udp_spec->hdr.dst_port; + filter->src_port = udp_spec->hdr.src_port; + } + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + /* parse action */ + index = 0; + + /** + * n-tuple only supports forwarding, + * check if the first not void action is QUEUE. + */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + item, "Not supported action."); + return -rte_errno; + } + filter->queue = + ((const struct rte_flow_action_queue *)act->conf)->index; + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + /* parse attr */ + /* must be input direction */ + if (!attr->ingress) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* not supported */ + if (attr->egress) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + if (attr->priority > 0xFFFF) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Error priority."); + return -rte_errno; + } + filter->priority = (uint16_t)attr->priority; + if (attr->priority < IXGBE_MIN_N_TUPLE_PRIO || + attr->priority > IXGBE_MAX_N_TUPLE_PRIO) + filter->priority = 1; + + return 0; +} + +/* a specific function for ixgbe because the flags is specific */ +static int +ixgbe_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error) +{ + int ret; + + ret = cons_parse_ntuple_filter(attr, pattern, actions, filter, error); + + if (ret) + return ret; + + /* Ixgbe doesn't support tcp flags. */ + if (filter->flags & RTE_NTUPLE_FLAGS_TCP_FLAG) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "Not supported by ntuple filter"); + return -rte_errno; + } + + /* Ixgbe doesn't support many priorities. */ + if (filter->priority < IXGBE_MIN_N_TUPLE_PRIO || + filter->priority > IXGBE_MAX_N_TUPLE_PRIO) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "Priority not supported by ntuple filter"); + return -rte_errno; + } + + if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM || + filter->priority > IXGBE_5TUPLE_MAX_PRI || + filter->priority < IXGBE_5TUPLE_MIN_PRI) + return -rte_errno; + + /* fixed value for ixgbe */ + filter->flags = RTE_5TUPLE_FLAGS; + return 0; +} + +/** + * Parse the rule to see if it is a ethertype rule. + * And get the ethertype filter info BTW. + * pattern: + * The first not void item can be ETH. + * The next not void item must be END. + * action: + * The first not void action should be QUEUE. + * The next not void action should be END. + * pattern example: + * ITEM Spec Mask + * ETH type 0x0807 0xFFFF + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +cons_parse_ethertype_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item *pattern, + const struct rte_flow_action *actions, + struct rte_eth_ethertype_filter *filter, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_action *act; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + const struct rte_flow_action_queue *act_q; + uint32_t index; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /* Parse pattern */ + index = 0; + + /* The first non-void item should be MAC. */ + item = pattern + index; + while (item->type == RTE_FLOW_ITEM_TYPE_VOID) { + index++; + item = pattern + index; + } + if (item->type != RTE_FLOW_ITEM_TYPE_ETH) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ethertype filter"); + return -rte_errno; + } + + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Get the MAC info. */ + if (!item->spec || !item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ethertype filter"); + return -rte_errno; + } + + eth_spec = (const struct rte_flow_item_eth *)item->spec; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + + /* Mask bits of source MAC address must be full of 0. + * Mask bits of destination MAC address must be full + * of 1 or full of 0. + */ + if (!is_zero_ether_addr(ð_mask->src) || + (!is_zero_ether_addr(ð_mask->dst) && + !is_broadcast_ether_addr(ð_mask->dst))) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ether address mask"); + return -rte_errno; + } + + if ((eth_mask->type & UINT16_MAX) != UINT16_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ethertype mask"); + return -rte_errno; + } + + /* If mask bits of destination MAC address + * are full of 1, set RTE_ETHTYPE_FLAGS_MAC. + */ + if (is_broadcast_ether_addr(ð_mask->dst)) { + filter->mac_addr = eth_spec->dst; + filter->flags |= RTE_ETHTYPE_FLAGS_MAC; + } else { + filter->flags &= ~RTE_ETHTYPE_FLAGS_MAC; + } + filter->ether_type = rte_be_to_cpu_16(eth_spec->type); + + /* Check if the next non-void item is END. */ + index++; + item = pattern + index; + while (item->type == RTE_FLOW_ITEM_TYPE_VOID) { + index++; + item = pattern + index; + } + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ethertype filter."); + return -rte_errno; + } + + /* Parse action */ + + index = 0; + /* Check if the first non-void action is QUEUE or DROP. */ + act = actions + index; + while (act->type == RTE_FLOW_ACTION_TYPE_VOID) { + index++; + act = actions + index; + } + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE && + act->type != RTE_FLOW_ACTION_TYPE_DROP) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->queue = act_q->index; + } else { + filter->flags |= RTE_ETHTYPE_FLAGS_DROP; + } + + /* Check if the next non-void item is END */ + index++; + act = actions + index; + while (act->type == RTE_FLOW_ACTION_TYPE_VOID) { + index++; + act = actions + index; + } + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + /* Parse attr */ + /* Must be input direction */ + if (!attr->ingress) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* Not supported */ + if (attr->egress) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* Not supported */ + if (attr->priority) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + /* Not supported */ + if (attr->group) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + attr, "Not support group."); + return -rte_errno; + } + + return 0; +} + +static int +ixgbe_parse_ethertype_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ethertype_filter *filter, + struct rte_flow_error *error) +{ + int ret; + + ret = cons_parse_ethertype_filter(attr, pattern, + actions, filter, error); + + if (ret) + return ret; + + /* Ixgbe doesn't support MAC address. */ + if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "Not supported by ethertype filter"); + return -rte_errno; + } + + if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "queue index much too big"); + return -rte_errno; + } + + if (filter->ether_type == ETHER_TYPE_IPv4 || + filter->ether_type == ETHER_TYPE_IPv6) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "IPv4/IPv6 not supported by ethertype filter"); + return -rte_errno; + } + + if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "mac compare is unsupported"); + return -rte_errno; + } + + if (filter->flags & RTE_ETHTYPE_FLAGS_DROP) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "drop option is unsupported"); + return -rte_errno; + } + + return 0; +} + +/** + * Parse the rule to see if it is a TCP SYN rule. + * And get the TCP SYN filter info BTW. + * pattern: + * The first not void item must be ETH. + * The second not void item must be IPV4 or IPV6. + * The third not void item must be TCP. + * The next not void item must be END. + * action: + * The first not void action should be QUEUE. + * The next not void action should be END. + * pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4/IPV6 NULL NULL + * TCP tcp_flags 0x02 0xFF + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +cons_parse_syn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_syn_filter *filter, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_action *act; + const struct rte_flow_item_tcp *tcp_spec; + const struct rte_flow_item_tcp *tcp_mask; + const struct rte_flow_action_queue *act_q; + uint32_t index; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /* parse pattern */ + index = 0; + + /* the first not void item should be MAC or IPv4 or IPv6 or TCP */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_ETH && + item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_IPV6 && + item->type != RTE_FLOW_ITEM_TYPE_TCP) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Skip Ethernet */ + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + /* if the item is MAC, the content should be NULL */ + if (item->spec || item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid SYN address mask"); + return -rte_errno; + } + + /* check if the next not void item is IPv4 or IPv6 */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_IPV6) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + } + + /* Skip IP */ + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4 || + item->type == RTE_FLOW_ITEM_TYPE_IPV6) { + /* if the item is IP, the content should be NULL */ + if (item->spec || item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid SYN mask"); + return -rte_errno; + } + + /* check if the next not void item is TCP */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_TCP) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + } + + /* Get the TCP info. Only support SYN. */ + if (!item->spec || !item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid SYN mask"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + if (!(tcp_spec->hdr.tcp_flags & TCP_SYN_FLAG) || + tcp_mask->hdr.src_port || + tcp_mask->hdr.dst_port || + tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || + tcp_mask->hdr.data_off || + tcp_mask->hdr.tcp_flags != TCP_SYN_FLAG || + tcp_mask->hdr.rx_win || + tcp_mask->hdr.cksum || + tcp_mask->hdr.tcp_urp) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + + /* parse action */ + index = 0; + + /* check if the first not void action is QUEUE. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->queue = act_q->index; + if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + /* parse attr */ + /* must be input direction */ + if (!attr->ingress) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* not supported */ + if (attr->egress) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* Support 2 priorities, the lowest or highest. */ + if (!attr->priority) { + filter->hig_pri = 0; + } else if (attr->priority == (uint32_t)~0U) { + filter->hig_pri = 1; + } else { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + return 0; +} + +static int +ixgbe_parse_syn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_syn_filter *filter, + struct rte_flow_error *error) +{ + int ret; + + ret = cons_parse_syn_filter(attr, pattern, + actions, filter, error); + + if (ret) + return ret; + + return 0; +} + +/** + * Parse the rule to see if it is a L2 tunnel rule. + * And get the L2 tunnel filter info BTW. + * Only support E-tag now. + * pattern: + * The first not void item can be E_TAG. + * The next not void item must be END. + * action: + * The first not void action should be QUEUE. + * The next not void action should be END. + * pattern example: + * ITEM Spec Mask + * E_TAG grp 0x1 0x3 + e_cid_base 0x309 0xFFF + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +cons_parse_l2_tn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_l2_tunnel_conf *filter, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_item_e_tag *e_tag_spec; + const struct rte_flow_item_e_tag *e_tag_mask; + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + uint32_t index; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + /* parse pattern */ + index = 0; + + /* The first not void item should be e-tag. */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_E_TAG) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + if (!item->spec || !item->mask) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + e_tag_spec = (const struct rte_flow_item_e_tag *)item->spec; + e_tag_mask = (const struct rte_flow_item_e_tag *)item->mask; + + /* Only care about GRP and E cid base. */ + if (e_tag_mask->epcp_edei_in_ecid_b || + e_tag_mask->in_ecid_e || + e_tag_mask->ecid_e || + e_tag_mask->rsvd_grp_ecid_b != rte_cpu_to_be_16(0x3FFF)) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + filter->l2_tunnel_type = RTE_L2_TUNNEL_TYPE_E_TAG; + /** + * grp and e_cid_base are bit fields and only use 14 bits. + * e-tag id is taken as little endian by HW. + */ + filter->tunnel_id = rte_be_to_cpu_16(e_tag_spec->rsvd_grp_ecid_b); + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + /* parse attr */ + /* must be input direction */ + if (!attr->ingress) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* not supported */ + if (attr->egress) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* not supported */ + if (attr->priority) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + /* parse action */ + index = 0; + + /* check if the first not void action is QUEUE. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->pool = act_q->index; + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + return 0; +} + +static int +ixgbe_validate_l2_tn_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_l2_tunnel_conf *l2_tn_filter, + struct rte_flow_error *error) +{ + int ret = 0; + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + ret = cons_parse_l2_tn_filter(attr, pattern, + actions, l2_tn_filter, error); + + if (hw->mac.type != ixgbe_mac_X550 && + hw->mac.type != ixgbe_mac_X550EM_x && + hw->mac.type != ixgbe_mac_X550EM_a) { + memset(l2_tn_filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + return ret; +} + +/* Parse to get the attr and action info of flow director rule. */ +static int +ixgbe_parse_fdir_act_attr(const struct rte_flow_attr *attr, + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + const struct rte_flow_action_mark *mark; + uint32_t index; + + /* parse attr */ + /* must be input direction */ + if (!attr->ingress) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* not supported */ + if (attr->egress) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* not supported */ + if (attr->priority) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + /* parse action */ + index = 0; + + /* check if the first not void action is QUEUE or DROP. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE && + act->type != RTE_FLOW_ACTION_TYPE_DROP) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + act_q = (const struct rte_flow_action_queue *)act->conf; + rule->queue = act_q->index; + } else { /* drop */ + rule->fdirflags = IXGBE_FDIRCMD_DROP; + } + + /* check if the next not void item is MARK */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if ((act->type != RTE_FLOW_ACTION_TYPE_MARK) && + (act->type != RTE_FLOW_ACTION_TYPE_END)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + rule->soft_id = 0; + + if (act->type == RTE_FLOW_ACTION_TYPE_MARK) { + mark = (const struct rte_flow_action_mark *)act->conf; + rule->soft_id = mark->id; + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + } + + /* check if the next not void item is END */ + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + return 0; +} + +/** + * Parse the rule to see if it is a IP or MAC VLAN flow director rule. + * And get the flow director filter info BTW. + * UDP/TCP/SCTP PATTERN: + * The first not void item can be ETH or IPV4. + * The second not void item must be IPV4 if the first one is ETH. + * The third not void item must be UDP or TCP or SCTP. + * The next not void item must be END. + * MAC VLAN PATTERN: + * The first not void item must be ETH. + * The second not void item must be MAC VLAN. + * The next not void item must be END. + * ACTION: + * The first not void action should be QUEUE or DROP. + * The second not void optional action should be MARK, + * mark_id is a uint32_t number. + * The next not void action should be END. + * UDP/TCP/SCTP pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4 src_addr 192.168.1.20 0xFFFFFFFF + * dst_addr 192.167.3.50 0xFFFFFFFF + * UDP/TCP/SCTP src_port 80 0xFFFF + * dst_port 80 0xFFFF + * END + * MAC VLAN pattern example: + * ITEM Spec Mask + * ETH dst_addr + {0xAC, 0x7B, 0xA1, {0xFF, 0xFF, 0xFF, + 0x2C, 0x6D, 0x36} 0xFF, 0xFF, 0xFF} + * MAC VLAN tci 0x2016 0xFFFF + * tpid 0x8100 0xFFFF + * END + * Other members in mask and spec should set to 0x00. + * Item->last should be NULL. + */ +static int +ixgbe_parse_fdir_filter_normal(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + const struct rte_flow_item_ipv4 *ipv4_spec; + const struct rte_flow_item_ipv4 *ipv4_mask; + const struct rte_flow_item_tcp *tcp_spec; + const struct rte_flow_item_tcp *tcp_mask; + const struct rte_flow_item_udp *udp_spec; + const struct rte_flow_item_udp *udp_mask; + const struct rte_flow_item_sctp *sctp_spec; + const struct rte_flow_item_sctp *sctp_mask; + const struct rte_flow_item_vlan *vlan_spec; + const struct rte_flow_item_vlan *vlan_mask; + + uint32_t index, j; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /** + * Some fields may not be provided. Set spec to 0 and mask to default + * value. So, we need not do anything for the not provided fields later. + */ + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask)); + rule->mask.vlan_tci_mask = 0; + + /* parse pattern */ + index = 0; + + /** + * The first not void item should be + * MAC or IPv4 or TCP or UDP or SCTP. + */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_ETH && + item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_TCP && + item->type != RTE_FLOW_ITEM_TYPE_UDP && + item->type != RTE_FLOW_ITEM_TYPE_SCTP) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->mode = RTE_FDIR_MODE_PERFECT; + + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Get the MAC info. */ + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + /** + * Only support vlan and dst MAC address, + * others should be masked. + */ + if (item->spec && !item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + if (item->spec) { + rule->b_spec = TRUE; + eth_spec = (const struct rte_flow_item_eth *)item->spec; + + /* Get the dst MAC. */ + for (j = 0; j < ETHER_ADDR_LEN; j++) { + rule->ixgbe_fdir.formatted.inner_mac[j] = + eth_spec->dst.addr_bytes[j]; + } + } + + + if (item->mask) { + /* If ethernet has meaning, it means MAC VLAN mode. */ + rule->mode = RTE_FDIR_MODE_PERFECT_MAC_VLAN; + + rule->b_mask = TRUE; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + + /* Ether type should be masked. */ + if (eth_mask->type) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /** + * src MAC address must be masked, + * and don't support dst MAC address mask. + */ + for (j = 0; j < ETHER_ADDR_LEN; j++) { + if (eth_mask->src.addr_bytes[j] || + eth_mask->dst.addr_bytes[j] != 0xFF) { + memset(rule, 0, + sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* When no VLAN, considered as full mask. */ + rule->mask.vlan_tci_mask = rte_cpu_to_be_16(0xEFFF); + } + /*** If both spec and mask are item, + * it means don't care about ETH. + * Do nothing. + */ + + /** + * Check if the next not void item is vlan or ipv4. + * IPv6 is not supported. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (rule->mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) { + if (item->type != RTE_FLOW_ITEM_TYPE_VLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } else { + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + } + + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + if (!(item->spec && item->mask)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + vlan_spec = (const struct rte_flow_item_vlan *)item->spec; + vlan_mask = (const struct rte_flow_item_vlan *)item->mask; + + if (vlan_spec->tpid != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->ixgbe_fdir.formatted.vlan_id = vlan_spec->tci; + + if (vlan_mask->tpid != (uint16_t)~0U) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.vlan_tci_mask = vlan_mask->tci; + /* More than one tags are not supported. */ + + /** + * Check if the next not void item is not vlan. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } else if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Get the IP info. */ + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { + /** + * Set the flow type even if there's no content + * as we must have a flow type. + */ + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_IPV4; + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /** + * Only care about src & dst addresses, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->b_mask = TRUE; + ipv4_mask = + (const struct rte_flow_item_ipv4 *)item->mask; + if (ipv4_mask->hdr.version_ihl || + ipv4_mask->hdr.type_of_service || + ipv4_mask->hdr.total_length || + ipv4_mask->hdr.packet_id || + ipv4_mask->hdr.fragment_offset || + ipv4_mask->hdr.time_to_live || + ipv4_mask->hdr.next_proto_id || + ipv4_mask->hdr.hdr_checksum) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.dst_ipv4_mask = ipv4_mask->hdr.dst_addr; + rule->mask.src_ipv4_mask = ipv4_mask->hdr.src_addr; + + if (item->spec) { + rule->b_spec = TRUE; + ipv4_spec = + (const struct rte_flow_item_ipv4 *)item->spec; + rule->ixgbe_fdir.formatted.dst_ip[0] = + ipv4_spec->hdr.dst_addr; + rule->ixgbe_fdir.formatted.src_ip[0] = + ipv4_spec->hdr.src_addr; + } + + /** + * Check if the next not void item is + * TCP or UDP or SCTP or END. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_TCP && + item->type != RTE_FLOW_ITEM_TYPE_UDP && + item->type != RTE_FLOW_ITEM_TYPE_SCTP && + item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Get the TCP info. */ + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { + /** + * Set the flow type even if there's no content + * as we must have a flow type. + */ + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_TCPV4; + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /** + * Only care about src & dst ports, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->b_mask = TRUE; + tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + if (tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || + tcp_mask->hdr.data_off || + tcp_mask->hdr.tcp_flags || + tcp_mask->hdr.rx_win || + tcp_mask->hdr.cksum || + tcp_mask->hdr.tcp_urp) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.src_port_mask = tcp_mask->hdr.src_port; + rule->mask.dst_port_mask = tcp_mask->hdr.dst_port; + + if (item->spec) { + rule->b_spec = TRUE; + tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + rule->ixgbe_fdir.formatted.src_port = + tcp_spec->hdr.src_port; + rule->ixgbe_fdir.formatted.dst_port = + tcp_spec->hdr.dst_port; + } + } + + /* Get the UDP info */ + if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { + /** + * Set the flow type even if there's no content + * as we must have a flow type. + */ + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_UDPV4; + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /** + * Only care about src & dst ports, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->b_mask = TRUE; + udp_mask = (const struct rte_flow_item_udp *)item->mask; + if (udp_mask->hdr.dgram_len || + udp_mask->hdr.dgram_cksum) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.src_port_mask = udp_mask->hdr.src_port; + rule->mask.dst_port_mask = udp_mask->hdr.dst_port; + + if (item->spec) { + rule->b_spec = TRUE; + udp_spec = (const struct rte_flow_item_udp *)item->spec; + rule->ixgbe_fdir.formatted.src_port = + udp_spec->hdr.src_port; + rule->ixgbe_fdir.formatted.dst_port = + udp_spec->hdr.dst_port; + } + } + + /* Get the SCTP info */ + if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { + /** + * Set the flow type even if there's no content + * as we must have a flow type. + */ + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_SCTPV4; + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /** + * Only care about src & dst ports, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->b_mask = TRUE; + sctp_mask = + (const struct rte_flow_item_sctp *)item->mask; + if (sctp_mask->hdr.tag || + sctp_mask->hdr.cksum) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.src_port_mask = sctp_mask->hdr.src_port; + rule->mask.dst_port_mask = sctp_mask->hdr.dst_port; + + if (item->spec) { + rule->b_spec = TRUE; + sctp_spec = + (const struct rte_flow_item_sctp *)item->spec; + rule->ixgbe_fdir.formatted.src_port = + sctp_spec->hdr.src_port; + rule->ixgbe_fdir.formatted.dst_port = + sctp_spec->hdr.dst_port; + } + } + + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); +} + +#define NVGRE_PROTOCOL 0x6558 + +/** + * Parse the rule to see if it is a VxLAN or NVGRE flow director rule. + * And get the flow director filter info BTW. + * VxLAN PATTERN: + * The first not void item must be ETH. + * The second not void item must be IPV4/ IPV6. + * The third not void item must be NVGRE. + * The next not void item must be END. + * NVGRE PATTERN: + * The first not void item must be ETH. + * The second not void item must be IPV4/ IPV6. + * The third not void item must be NVGRE. + * The next not void item must be END. + * ACTION: + * The first not void action should be QUEUE or DROP. + * The second not void optional action should be MARK, + * mark_id is a uint32_t number. + * The next not void action should be END. + * VxLAN pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4/IPV6 NULL NULL + * UDP NULL NULL + * VxLAN vni{0x00, 0x32, 0x54} {0xFF, 0xFF, 0xFF} + * END + * NEGRV pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4/IPV6 NULL NULL + * NVGRE protocol 0x6558 0xFFFF + * tni{0x00, 0x32, 0x54} {0xFF, 0xFF, 0xFF} + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_item_vxlan *vxlan_spec; + const struct rte_flow_item_vxlan *vxlan_mask; + const struct rte_flow_item_nvgre *nvgre_spec; + const struct rte_flow_item_nvgre *nvgre_mask; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + const struct rte_flow_item_vlan *vlan_spec; + const struct rte_flow_item_vlan *vlan_mask; + uint32_t index, j; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /** + * Some fields may not be provided. Set spec to 0 and mask to default + * value. So, we need not do anything for the not provided fields later. + */ + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask)); + rule->mask.vlan_tci_mask = 0; + + /* parse pattern */ + index = 0; + + /** + * The first not void item should be + * MAC or IPv4 or IPv6 or UDP or VxLAN. + */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_ETH && + item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_IPV6 && + item->type != RTE_FLOW_ITEM_TYPE_UDP && + item->type != RTE_FLOW_ITEM_TYPE_VXLAN && + item->type != RTE_FLOW_ITEM_TYPE_NVGRE) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->mode = RTE_FDIR_MODE_PERFECT_TUNNEL; + + /* Skip MAC. */ + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + /* Only used to describe the protocol stack. */ + if (item->spec || item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Check if the next not void item is IPv4 or IPv6. */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_IPV6) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Skip IP. */ + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4 || + item->type == RTE_FLOW_ITEM_TYPE_IPV6) { + /* Only used to describe the protocol stack. */ + if (item->spec || item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Check if the next not void item is UDP or NVGRE. */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_UDP && + item->type != RTE_FLOW_ITEM_TYPE_NVGRE) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Skip UDP. */ + if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { + /* Only used to describe the protocol stack. */ + if (item->spec || item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Check if the next not void item is VxLAN. */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_VXLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Get the VxLAN info */ + if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) { + rule->ixgbe_fdir.formatted.tunnel_type = + RTE_FDIR_TUNNEL_TYPE_VXLAN; + + /* Only care about VNI, others should be masked. */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + rule->b_mask = TRUE; + + /* Tunnel type is always meaningful. */ + rule->mask.tunnel_type_mask = 1; + + vxlan_mask = + (const struct rte_flow_item_vxlan *)item->mask; + if (vxlan_mask->flags) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* VNI must be totally masked or not. */ + if ((vxlan_mask->vni[0] || vxlan_mask->vni[1] || + vxlan_mask->vni[2]) && + ((vxlan_mask->vni[0] != 0xFF) || + (vxlan_mask->vni[1] != 0xFF) || + (vxlan_mask->vni[2] != 0xFF))) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rte_memcpy(&rule->mask.tunnel_id_mask, vxlan_mask->vni, + RTE_DIM(vxlan_mask->vni)); + rule->mask.tunnel_id_mask <<= 8; + + if (item->spec) { + rule->b_spec = TRUE; + vxlan_spec = (const struct rte_flow_item_vxlan *) + item->spec; + rte_memcpy(&rule->ixgbe_fdir.formatted.tni_vni, + vxlan_spec->vni, RTE_DIM(vxlan_spec->vni)); + rule->ixgbe_fdir.formatted.tni_vni <<= 8; + } + } + + /* Get the NVGRE info */ + if (item->type == RTE_FLOW_ITEM_TYPE_NVGRE) { + rule->ixgbe_fdir.formatted.tunnel_type = + RTE_FDIR_TUNNEL_TYPE_NVGRE; + + /** + * Only care about flags0, flags1, protocol and TNI, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + rule->b_mask = TRUE; + + /* Tunnel type is always meaningful. */ + rule->mask.tunnel_type_mask = 1; + + nvgre_mask = + (const struct rte_flow_item_nvgre *)item->mask; + if (nvgre_mask->flow_id) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + if (nvgre_mask->c_k_s_rsvd0_ver != + rte_cpu_to_be_16(0x3000) || + nvgre_mask->protocol != 0xFFFF) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* TNI must be totally masked or not. */ + if (nvgre_mask->tni[0] && + ((nvgre_mask->tni[0] != 0xFF) || + (nvgre_mask->tni[1] != 0xFF) || + (nvgre_mask->tni[2] != 0xFF))) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* tni is a 24-bits bit field */ + rte_memcpy(&rule->mask.tunnel_id_mask, nvgre_mask->tni, + RTE_DIM(nvgre_mask->tni)); + rule->mask.tunnel_id_mask <<= 8; + + if (item->spec) { + rule->b_spec = TRUE; + nvgre_spec = + (const struct rte_flow_item_nvgre *)item->spec; + if (nvgre_spec->c_k_s_rsvd0_ver != + rte_cpu_to_be_16(0x2000) || + nvgre_spec->protocol != + rte_cpu_to_be_16(NVGRE_PROTOCOL)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* tni is a 24-bits bit field */ + rte_memcpy(&rule->ixgbe_fdir.formatted.tni_vni, + nvgre_spec->tni, RTE_DIM(nvgre_spec->tni)); + rule->ixgbe_fdir.formatted.tni_vni <<= 8; + } + } + + /* check if the next not void item is MAC */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_ETH) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /** + * Only support vlan and dst MAC address, + * others should be masked. + */ + + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + rule->b_mask = TRUE; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + + /* Ether type should be masked. */ + if (eth_mask->type) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /* src MAC address should be masked. */ + for (j = 0; j < ETHER_ADDR_LEN; j++) { + if (eth_mask->src.addr_bytes[j]) { + memset(rule, 0, + sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + rule->mask.mac_addr_byte_mask = 0; + for (j = 0; j < ETHER_ADDR_LEN; j++) { + /* It's a per byte mask. */ + if (eth_mask->dst.addr_bytes[j] == 0xFF) { + rule->mask.mac_addr_byte_mask |= 0x1 << j; + } else if (eth_mask->dst.addr_bytes[j]) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* When no vlan, considered as full mask. */ + rule->mask.vlan_tci_mask = rte_cpu_to_be_16(0xEFFF); + + if (item->spec) { + rule->b_spec = TRUE; + eth_spec = (const struct rte_flow_item_eth *)item->spec; + + /* Get the dst MAC. */ + for (j = 0; j < ETHER_ADDR_LEN; j++) { + rule->ixgbe_fdir.formatted.inner_mac[j] = + eth_spec->dst.addr_bytes[j]; + } + } + + /** + * Check if the next not void item is vlan or ipv4. + * IPv6 is not supported. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if ((item->type != RTE_FLOW_ITEM_TYPE_VLAN) && + (item->type != RTE_FLOW_ITEM_TYPE_VLAN)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + if (!(item->spec && item->mask)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + vlan_spec = (const struct rte_flow_item_vlan *)item->spec; + vlan_mask = (const struct rte_flow_item_vlan *)item->mask; + + if (vlan_spec->tpid != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->ixgbe_fdir.formatted.vlan_id = vlan_spec->tci; + + if (vlan_mask->tpid != (uint16_t)~0U) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.vlan_tci_mask = vlan_mask->tci; + /* More than one tags are not supported. */ + + /** + * Check if the next not void item is not vlan. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } else if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /** + * If the tags is 0, it means don't care about the VLAN. + * Do nothing. + */ + + return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); +} + +static int +ixgbe_validate_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + int ret = 0; + + enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode; + + ixgbe_parse_fdir_filter(attr, pattern, actions, + rule, error); + + + if (fdir_mode == RTE_FDIR_MODE_NONE || + fdir_mode != rule->mode) + return -ENOTSUP; + + return ret; +} + +static int +ixgbe_parse_fdir_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + int ret; + + ret = ixgbe_parse_fdir_filter_normal(attr, pattern, + actions, rule, error); + + if (!ret) + return 0; + + ret = ixgbe_parse_fdir_filter_tunnel(attr, pattern, + actions, rule, error); + + return ret; +} + +void +ixgbe_filterlist_flush(void) +{ + struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr; + struct ixgbe_ethertype_filter_ele *ethertype_filter_ptr; + struct ixgbe_eth_syn_filter_ele *syn_filter_ptr; + struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr; + struct ixgbe_fdir_rule_ele *fdir_rule_ptr; + struct ixgbe_flow_mem *ixgbe_flow_mem_ptr; + + while ((ntuple_filter_ptr = TAILQ_FIRST(&filter_ntuple_list))) { + TAILQ_REMOVE(&filter_ntuple_list, + ntuple_filter_ptr, + entries); + rte_free(ntuple_filter_ptr); + } + + while ((ethertype_filter_ptr = TAILQ_FIRST(&filter_ethertype_list))) { + TAILQ_REMOVE(&filter_ethertype_list, + ethertype_filter_ptr, + entries); + rte_free(ethertype_filter_ptr); + } + + while ((syn_filter_ptr = TAILQ_FIRST(&filter_syn_list))) { + TAILQ_REMOVE(&filter_syn_list, + syn_filter_ptr, + entries); + rte_free(syn_filter_ptr); + } + + while ((l2_tn_filter_ptr = TAILQ_FIRST(&filter_l2_tunnel_list))) { + TAILQ_REMOVE(&filter_l2_tunnel_list, + l2_tn_filter_ptr, + entries); + rte_free(l2_tn_filter_ptr); + } + + while ((fdir_rule_ptr = TAILQ_FIRST(&filter_fdir_list))) { + TAILQ_REMOVE(&filter_fdir_list, + fdir_rule_ptr, + entries); + rte_free(fdir_rule_ptr); + } + + while ((ixgbe_flow_mem_ptr = TAILQ_FIRST(&ixgbe_flow_list))) { + TAILQ_REMOVE(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, + entries); + rte_free(ixgbe_flow_mem_ptr->flow); + rte_free(ixgbe_flow_mem_ptr); + } +} + +/** + * Create or destroy a flow rule. + * Theorically one rule can match more than one filters. + * We will let it use the filter which it hitt first. + * So, the sequence matters. + */ +static struct rte_flow * +ixgbe_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + int ret; + struct rte_eth_ntuple_filter ntuple_filter; + struct rte_eth_ethertype_filter ethertype_filter; + struct rte_eth_syn_filter syn_filter; + struct ixgbe_fdir_rule fdir_rule; + struct rte_eth_l2_tunnel_conf l2_tn_filter; + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + struct rte_flow *flow = NULL; + struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr; + struct ixgbe_ethertype_filter_ele *ethertype_filter_ptr; + struct ixgbe_eth_syn_filter_ele *syn_filter_ptr; + struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr; + struct ixgbe_fdir_rule_ele *fdir_rule_ptr; + struct ixgbe_flow_mem *ixgbe_flow_mem_ptr; + + flow = rte_zmalloc("ixgbe_rte_flow", sizeof(struct rte_flow), 0); + if (!flow) { + PMD_DRV_LOG(ERR, "failed to allocate memory"); + return (struct rte_flow *)flow; + } + ixgbe_flow_mem_ptr = rte_zmalloc("ixgbe_flow_mem", + sizeof(struct ixgbe_flow_mem), 0); + if (!ixgbe_flow_mem_ptr) { + PMD_DRV_LOG(ERR, "failed to allocate memory"); + rte_free(flow); + return NULL; + } + ixgbe_flow_mem_ptr->flow = flow; + TAILQ_INSERT_TAIL(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, entries); + + memset(&ntuple_filter, 0, sizeof(struct rte_eth_ntuple_filter)); + ret = ixgbe_parse_ntuple_filter(attr, pattern, + actions, &ntuple_filter, error); + if (!ret) { + ret = ixgbe_add_del_ntuple_filter(dev, &ntuple_filter, TRUE); + if (!ret) { + ntuple_filter_ptr = rte_zmalloc("ixgbe_ntuple_filter", + sizeof(struct ixgbe_ntuple_filter_ele), 0); + (void)rte_memcpy(&ntuple_filter_ptr->filter_info, + &ntuple_filter, + sizeof(struct rte_eth_ntuple_filter)); + TAILQ_INSERT_TAIL(&filter_ntuple_list, + ntuple_filter_ptr, entries); + flow->rule = ntuple_filter_ptr; + flow->filter_type = RTE_ETH_FILTER_NTUPLE; + return flow; + } + goto out; + } + + memset(ðertype_filter, 0, sizeof(struct rte_eth_ethertype_filter)); + ret = ixgbe_parse_ethertype_filter(attr, pattern, + actions, ðertype_filter, error); + if (!ret) { + ret = ixgbe_add_del_ethertype_filter(dev, + ðertype_filter, TRUE); + if (!ret) { + ethertype_filter_ptr = rte_zmalloc( + "ixgbe_ethertype_filter", + sizeof(struct ixgbe_ethertype_filter_ele), 0); + (void)rte_memcpy(ðertype_filter_ptr->filter_info, + ðertype_filter, + sizeof(struct rte_eth_ethertype_filter)); + TAILQ_INSERT_TAIL(&filter_ethertype_list, + ethertype_filter_ptr, entries); + flow->rule = ethertype_filter_ptr; + flow->filter_type = RTE_ETH_FILTER_ETHERTYPE; + return flow; + } + goto out; + } + + memset(&syn_filter, 0, sizeof(struct rte_eth_syn_filter)); + ret = cons_parse_syn_filter(attr, pattern, actions, &syn_filter, error); + if (!ret) { + ret = ixgbe_syn_filter_set(dev, &syn_filter, TRUE); + if (!ret) { + syn_filter_ptr = rte_zmalloc("ixgbe_syn_filter", + sizeof(struct ixgbe_eth_syn_filter_ele), 0); + (void)rte_memcpy(&syn_filter_ptr->filter_info, + &syn_filter, + sizeof(struct rte_eth_syn_filter)); + TAILQ_INSERT_TAIL(&filter_syn_list, + syn_filter_ptr, + entries); + flow->rule = syn_filter_ptr; + flow->filter_type = RTE_ETH_FILTER_SYN; + return flow; + } + goto out; + } + + memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_parse_fdir_filter(attr, pattern, + actions, &fdir_rule, error); + if (!ret) { + /* A mask cannot be deleted. */ + if (fdir_rule.b_mask) { + if (!fdir_info->mask_added) { + /* It's the first time the mask is set. */ + rte_memcpy(&fdir_info->mask, + &fdir_rule.mask, + sizeof(struct ixgbe_hw_fdir_mask)); + ret = ixgbe_fdir_set_input_mask(dev); + if (ret) + goto out; + + fdir_info->mask_added = TRUE; + } else { + /** + * Only support one global mask, + * all the masks should be the same. + */ + ret = memcmp(&fdir_info->mask, + &fdir_rule.mask, + sizeof(struct ixgbe_hw_fdir_mask)); + if (ret) + goto out; + } + } + + if (fdir_rule.b_spec) { + ret = ixgbe_fdir_filter_program(dev, &fdir_rule, + FALSE, FALSE); + if (!ret) { + fdir_rule_ptr = rte_zmalloc("ixgbe_fdir_filter", + sizeof(struct ixgbe_fdir_rule_ele), 0); + (void)rte_memcpy(&fdir_rule_ptr->filter_info, + &fdir_rule, + sizeof(struct ixgbe_fdir_rule)); + TAILQ_INSERT_TAIL(&filter_fdir_list, + fdir_rule_ptr, entries); + flow->rule = fdir_rule_ptr; + flow->filter_type = RTE_ETH_FILTER_FDIR; + + return flow; + } + + if (ret) + goto out; + } + + goto out; + } + + memset(&l2_tn_filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + ret = cons_parse_l2_tn_filter(attr, pattern, + actions, &l2_tn_filter, error); + if (!ret) { + ret = ixgbe_dev_l2_tunnel_filter_add(dev, &l2_tn_filter, FALSE); + if (!ret) { + l2_tn_filter_ptr = rte_zmalloc("ixgbe_l2_tn_filter", + sizeof(struct ixgbe_eth_l2_tunnel_conf_ele), 0); + (void)rte_memcpy(&l2_tn_filter_ptr->filter_info, + &l2_tn_filter, + sizeof(struct rte_eth_l2_tunnel_conf)); + TAILQ_INSERT_TAIL(&filter_l2_tunnel_list, + l2_tn_filter_ptr, entries); + flow->rule = l2_tn_filter_ptr; + flow->filter_type = RTE_ETH_FILTER_L2_TUNNEL; + return flow; + } + } + +out: + TAILQ_REMOVE(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, entries); + rte_free(ixgbe_flow_mem_ptr); + rte_free(flow); + return NULL; +} + +/** + * Check if the flow rule is supported by ixgbe. + * It only checkes the format. Don't guarantee the rule can be programmed into + * the HW. Because there can be no enough room for the rule. + */ +static int +ixgbe_flow_validate(__rte_unused struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_eth_ntuple_filter ntuple_filter; + struct rte_eth_ethertype_filter ethertype_filter; + struct rte_eth_syn_filter syn_filter; + struct rte_eth_l2_tunnel_conf l2_tn_filter; + struct ixgbe_fdir_rule fdir_rule; + int ret; + + memset(&ntuple_filter, 0, sizeof(struct rte_eth_ntuple_filter)); + ret = ixgbe_parse_ntuple_filter(attr, pattern, + actions, &ntuple_filter, error); + if (!ret) + return 0; + + memset(ðertype_filter, 0, sizeof(struct rte_eth_ethertype_filter)); + ret = ixgbe_parse_ethertype_filter(attr, pattern, + actions, ðertype_filter, error); + if (!ret) + return 0; + + memset(&syn_filter, 0, sizeof(struct rte_eth_syn_filter)); + ret = ixgbe_parse_syn_filter(attr, pattern, + actions, &syn_filter, error); + if (!ret) + return 0; + + memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_validate_fdir_filter(dev, attr, pattern, + actions, &fdir_rule, error); + if (!ret) + return 0; + + memset(&l2_tn_filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + ret = ixgbe_validate_l2_tn_filter(dev, attr, pattern, + actions, &l2_tn_filter, error); + + return ret; +} + +/* Destroy a flow rule on ixgbe. */ +static int +ixgbe_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + int ret; + struct rte_flow *pmd_flow = flow; + enum rte_filter_type filter_type = pmd_flow->filter_type; + struct rte_eth_ntuple_filter ntuple_filter; + struct rte_eth_ethertype_filter ethertype_filter; + struct rte_eth_syn_filter syn_filter; + struct ixgbe_fdir_rule fdir_rule; + struct rte_eth_l2_tunnel_conf l2_tn_filter; + struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr; + struct ixgbe_ethertype_filter_ele *ethertype_filter_ptr; + struct ixgbe_eth_syn_filter_ele *syn_filter_ptr; + struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr; + struct ixgbe_fdir_rule_ele *fdir_rule_ptr; + struct ixgbe_flow_mem *ixgbe_flow_mem_ptr; + + switch (filter_type) { + case RTE_ETH_FILTER_NTUPLE: + ntuple_filter_ptr = (struct ixgbe_ntuple_filter_ele *) + pmd_flow->rule; + (void)rte_memcpy(&ntuple_filter, + &ntuple_filter_ptr->filter_info, + sizeof(struct rte_eth_ntuple_filter)); + ret = ixgbe_add_del_ntuple_filter(dev, &ntuple_filter, FALSE); + if (!ret) { + TAILQ_REMOVE(&filter_ntuple_list, + ntuple_filter_ptr, entries); + rte_free(ntuple_filter_ptr); + } + break; + case RTE_ETH_FILTER_ETHERTYPE: + ethertype_filter_ptr = (struct ixgbe_ethertype_filter_ele *) + pmd_flow->rule; + (void)rte_memcpy(ðertype_filter, + ðertype_filter_ptr->filter_info, + sizeof(struct rte_eth_ethertype_filter)); + ret = ixgbe_add_del_ethertype_filter(dev, + ðertype_filter, FALSE); + if (!ret) { + TAILQ_REMOVE(&filter_ethertype_list, + ethertype_filter_ptr, entries); + rte_free(ethertype_filter_ptr); + } + break; + case RTE_ETH_FILTER_SYN: + syn_filter_ptr = (struct ixgbe_eth_syn_filter_ele *) + pmd_flow->rule; + (void)rte_memcpy(&syn_filter, + &syn_filter_ptr->filter_info, + sizeof(struct rte_eth_syn_filter)); + ret = ixgbe_syn_filter_set(dev, &syn_filter, FALSE); + if (!ret) { + TAILQ_REMOVE(&filter_syn_list, + syn_filter_ptr, entries); + rte_free(syn_filter_ptr); + } + break; + case RTE_ETH_FILTER_FDIR: + fdir_rule_ptr = (struct ixgbe_fdir_rule_ele *)pmd_flow->rule; + (void)rte_memcpy(&fdir_rule, + &fdir_rule_ptr->filter_info, + sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_fdir_filter_program(dev, &fdir_rule, TRUE, FALSE); + if (!ret) { + TAILQ_REMOVE(&filter_fdir_list, + fdir_rule_ptr, entries); + rte_free(fdir_rule_ptr); + } + break; + case RTE_ETH_FILTER_L2_TUNNEL: + l2_tn_filter_ptr = (struct ixgbe_eth_l2_tunnel_conf_ele *) + pmd_flow->rule; + (void)rte_memcpy(&l2_tn_filter, &l2_tn_filter_ptr->filter_info, + sizeof(struct rte_eth_l2_tunnel_conf)); + ret = ixgbe_dev_l2_tunnel_filter_del(dev, &l2_tn_filter); + if (!ret) { + TAILQ_REMOVE(&filter_l2_tunnel_list, + l2_tn_filter_ptr, entries); + rte_free(l2_tn_filter_ptr); + } + break; + default: + PMD_DRV_LOG(WARNING, "Filter type (%d) not supported", + filter_type); + ret = -EINVAL; + break; + } + + if (ret) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "Failed to destroy flow"); + return ret; + } + + TAILQ_FOREACH(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries) { + if (ixgbe_flow_mem_ptr->flow == pmd_flow) { + TAILQ_REMOVE(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, entries); + rte_free(ixgbe_flow_mem_ptr); + } + } + rte_free(flow); + + return ret; +} + +/* Destroy all flow rules associated with a port on ixgbe. */ +static int +ixgbe_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error) +{ + int ret = 0; + + ixgbe_clear_all_ntuple_filter(dev); + ixgbe_clear_all_ethertype_filter(dev); + ixgbe_clear_syn_filter(dev); + + ret = ixgbe_clear_all_fdir_filter(dev); + if (ret < 0) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "Failed to flush rule"); + return ret; + } + + ret = ixgbe_clear_all_l2_tn_filter(dev); + if (ret < 0) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "Failed to flush rule"); + return ret; + } + + ixgbe_filterlist_flush(); + + return 0; +} diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_pf.c b/src/dpdk/drivers/net/ixgbe/ixgbe_pf.c index 56393ff2..4715045f 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_pf.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_pf.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,6 +51,7 @@ #include "base/ixgbe_common.h" #include "ixgbe_ethdev.h" +#include "rte_pmd_ixgbe.h" #define IXGBE_MAX_VFTA (128) #define IXGBE_VF_MSG_SIZE_DEFAULT 1 @@ -60,7 +61,9 @@ static inline uint16_t dev_num_vf(struct rte_eth_dev *eth_dev) { - return eth_dev->pci_dev->max_vfs; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + + return pci_dev->max_vfs; } static inline @@ -175,6 +178,7 @@ ixgbe_add_tx_flow_control_drop_filter(struct rte_eth_dev *eth_dev) IXGBE_DEV_PRIVATE_TO_FILTER_INFO(eth_dev->data->dev_private); uint16_t vf_num; int i; + struct ixgbe_ethertype_filter ethertype_filter; if (!hw->mac.ops.set_ethertype_anti_spoofing) { RTE_LOG(INFO, PMD, "ether type anti-spoofing is not" @@ -182,16 +186,23 @@ ixgbe_add_tx_flow_control_drop_filter(struct rte_eth_dev *eth_dev) return; } - /* occupy an entity of ether type filter */ - for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { - if (!(filter_info->ethertype_mask & (1 << i))) { - filter_info->ethertype_mask |= 1 << i; - filter_info->ethertype_filters[i] = - IXGBE_ETHERTYPE_FLOW_CTRL; - break; - } + i = ixgbe_ethertype_filter_lookup(filter_info, + IXGBE_ETHERTYPE_FLOW_CTRL); + if (i >= 0) { + RTE_LOG(ERR, PMD, "A ether type filter" + " entity for flow control already exists!\n"); + return; } - if (i == IXGBE_MAX_ETQF_FILTERS) { + + ethertype_filter.ethertype = IXGBE_ETHERTYPE_FLOW_CTRL; + ethertype_filter.etqf = IXGBE_ETQF_FILTER_EN | + IXGBE_ETQF_TX_ANTISPOOF | + IXGBE_ETHERTYPE_FLOW_CTRL; + ethertype_filter.etqs = 0; + ethertype_filter.conf = TRUE; + i = ixgbe_ethertype_filter_insert(filter_info, + ðertype_filter); + if (i < 0) { RTE_LOG(ERR, PMD, "Cannot find an unused ether type filter" " entity for flow control.\n"); return; @@ -660,6 +671,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf) struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_vf_info *vfinfo = *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); + struct rte_pmd_ixgbe_mb_event_param cb_param; retval = ixgbe_read_mbx(hw, msgbuf, mbx_size, vf); if (retval) { @@ -674,27 +686,54 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf) /* flush the ack before we write any messages back */ IXGBE_WRITE_FLUSH(hw); + /** + * initialise structure to send to user application + * will return response from user in retval field + */ + cb_param.retval = RTE_PMD_IXGBE_MB_EVENT_PROCEED; + cb_param.vfid = vf; + cb_param.msg_type = msgbuf[0] & 0xFFFF; + cb_param.msg = (void *)msgbuf; + /* perform VF reset */ if (msgbuf[0] == IXGBE_VF_RESET) { int ret = ixgbe_vf_reset(dev, vf, msgbuf); vfinfo[vf].clear_to_send = true; + + /* notify application about VF reset */ + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_VF_MBOX, &cb_param); return ret; } + /** + * ask user application if we allowed to perform those functions + * if we get cb_param.retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED + * then business as usual, + * if 0, do nothing and send ACK to VF + * if cb_param.retval > 1, do nothing and send NAK to VF + */ + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_VF_MBOX, &cb_param); + + retval = cb_param.retval; + /* check & process VF to PF mailbox message */ switch ((msgbuf[0] & 0xFFFF)) { case IXGBE_VF_SET_MAC_ADDR: - retval = ixgbe_vf_set_mac_addr(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_vf_set_mac_addr(dev, vf, msgbuf); break; case IXGBE_VF_SET_MULTICAST: - retval = ixgbe_vf_set_multicast(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_vf_set_multicast(dev, vf, msgbuf); break; case IXGBE_VF_SET_LPE: - retval = ixgbe_set_vf_lpe(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_set_vf_lpe(dev, vf, msgbuf); break; case IXGBE_VF_SET_VLAN: - retval = ixgbe_vf_set_vlan(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_vf_set_vlan(dev, vf, msgbuf); break; case IXGBE_VF_API_NEGOTIATE: retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf); @@ -704,7 +743,8 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf) msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE; break; case IXGBE_VF_UPDATE_XCAST_MODE: - retval = ixgbe_set_vf_mc_promisc(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_set_vf_mc_promisc(dev, vf, msgbuf); break; default: PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]); diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_regs.h b/src/dpdk/drivers/net/ixgbe/ixgbe_regs.h index c7457a6f..2aa48201 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_regs.h +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_regs.h @@ -41,7 +41,7 @@ struct reg_info { uint32_t count; uint32_t stride; const char *name; -} reg_info; +}; static const struct reg_info ixgbe_regs_general[] = { {IXGBE_CTRL, 1, 1, "IXGBE_CTRL"}, @@ -56,10 +56,10 @@ static const struct reg_info ixgbe_regs_general[] = { }; static const struct reg_info ixgbevf_regs_general[] = { - {IXGBE_CTRL, 1, 1, "IXGBE_CTRL"}, - {IXGBE_STATUS, 1, 1, "IXGBE_STATUS"}, + {IXGBE_VFCTRL, 1, 1, "IXGBE_VFCTRL"}, + {IXGBE_VFSTATUS, 1, 1, "IXGBE_VFSTATUS"}, {IXGBE_VFLINKS, 1, 1, "IXGBE_VFLINKS"}, - {IXGBE_FRTIMER, 1, 1, "IXGBE_FRTIMER"}, + {IXGBE_VFFRTIMER, 1, 1, "IXGBE_VFFRTIMER"}, {IXGBE_VFMAILBOX, 1, 1, "IXGBE_VFMAILBOX"}, {IXGBE_VFMBMEM, 16, 4, "IXGBE_VFMBMEM"}, {IXGBE_VFRXMEMWRAP, 1, 1, "IXGBE_VFRXMEMWRAP"}, @@ -145,17 +145,17 @@ static const struct reg_info ixgbe_regs_rxdma[] = { }; static const struct reg_info ixgbevf_regs_rxdma[] = { - {IXGBE_RDBAL(0), 8, 0x40, "IXGBE_RDBAL"}, - {IXGBE_RDBAH(0), 8, 0x40, "IXGBE_RDBAH"}, - {IXGBE_RDLEN(0), 8, 0x40, "IXGBE_RDLEN"}, - {IXGBE_RDH(0), 8, 0x40, "IXGBE_RDH"}, - {IXGBE_RDT(0), 8, 0x40, "IXGBE_RDT"}, - {IXGBE_RXDCTL(0), 8, 0x40, "IXGBE_RXDCTL"}, - {IXGBE_SRRCTL(0), 8, 0x40, "IXGBE_SRRCTL"}, + {IXGBE_VFRDBAL(0), 8, 0x40, "IXGBE_VFRDBAL"}, + {IXGBE_VFRDBAH(0), 8, 0x40, "IXGBE_VFRDBAH"}, + {IXGBE_VFRDLEN(0), 8, 0x40, "IXGBE_VFRDLEN"}, + {IXGBE_VFRDH(0), 8, 0x40, "IXGBE_VFRDH"}, + {IXGBE_VFRDT(0), 8, 0x40, "IXGBE_VFRDT"}, + {IXGBE_VFRXDCTL(0), 8, 0x40, "IXGBE_VFRXDCTL"}, + {IXGBE_VFSRRCTL(0), 8, 0x40, "IXGBE_VFSRRCTL"}, {IXGBE_VFPSRTYPE, 1, 1, "IXGBE_VFPSRTYPE"}, {IXGBE_VFRSCCTL(0), 8, 0x40, "IXGBE_VFRSCCTL"}, - {IXGBE_PVFDCA_RXCTRL(0), 8, 0x40, "IXGBE_PVFDCA_RXCTRL"}, - {IXGBE_PVFDCA_TXCTRL(0), 8, 0x40, "IXGBE_PVFDCA_TXCTRL"}, + {IXGBE_VFDCA_RXCTRL(0), 8, 0x40, "IXGBE_VFDCA_RXCTRL"}, + {IXGBE_VFDCA_TXCTRL(0), 8, 0x40, "IXGBE_VFDCA_TXCTRL"}, {0, 0, 0, ""} }; @@ -193,14 +193,14 @@ static struct reg_info ixgbe_regs_tx[] = { }; static const struct reg_info ixgbevf_regs_tx[] = { - {IXGBE_TDBAL(0), 4, 0x40, "IXGBE_TDBAL"}, - {IXGBE_TDBAH(0), 4, 0x40, "IXGBE_TDBAH"}, - {IXGBE_TDLEN(0), 4, 0x40, "IXGBE_TDLEN"}, - {IXGBE_TDH(0), 4, 0x40, "IXGBE_TDH"}, - {IXGBE_TDT(0), 4, 0x40, "IXGBE_TDT"}, - {IXGBE_TXDCTL(0), 4, 0x40, "IXGBE_TXDCTL"}, - {IXGBE_TDWBAL(0), 4, 0x40, "IXGBE_TDWBAL"}, - {IXGBE_TDWBAH(0), 4, 0x40, "IXGBE_TDWBAH"}, + {IXGBE_VFTDBAL(0), 4, 0x40, "IXGBE_VFTDBAL"}, + {IXGBE_VFTDBAH(0), 4, 0x40, "IXGBE_VFTDBAH"}, + {IXGBE_VFTDLEN(0), 4, 0x40, "IXGBE_VFTDLEN"}, + {IXGBE_VFTDH(0), 4, 0x40, "IXGBE_VFTDH"}, + {IXGBE_VFTDT(0), 4, 0x40, "IXGBE_VFTDT"}, + {IXGBE_VFTXDCTL(0), 4, 0x40, "IXGBE_VFTXDCTL"}, + {IXGBE_VFTDWBAL(0), 4, 0x40, "IXGBE_VFTDWBAL"}, + {IXGBE_VFTDWBAH(0), 4, 0x40, "IXGBE_VFTDWBAH"}, {0, 0, 0, ""} }; diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c index a018e926..36f1c020 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * Copyright 2014 6WIND S.A. * All rights reserved. * @@ -58,7 +58,6 @@ #include #include #include -#include #include #include #include @@ -71,6 +70,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -86,8 +86,12 @@ PKT_TX_IP_CKSUM | \ PKT_TX_L4_MASK | \ PKT_TX_TCP_SEG | \ + PKT_TX_MACSEC | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -322,7 +326,7 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, /* update tail pointer */ rte_wmb(); - IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail); + IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail); return nb_pkts; } @@ -520,6 +524,8 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags) cmdtype |= IXGBE_ADVTXD_DCMD_TSE; if (ol_flags & PKT_TX_OUTER_IP_CKSUM) cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT); + if (ol_flags & PKT_TX_MACSEC) + cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC; return cmdtype; } @@ -898,12 +904,63 @@ end_of_tx: PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", (unsigned) txq->port_id, (unsigned) txq->queue_id, (unsigned) tx_id, (unsigned) nb_tx); - IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id); + IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); txq->tx_tail = tx_id; return nb_tx; } +/********************************************************************* + * + * TX prep functions + * + **********************************************************************/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** + * Check if packet meets requirements for number of segments + * + * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and + * non-TSO + */ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + /********************************************************************* * * RX functions @@ -1345,7 +1402,9 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status) * Bit 30: L4I, L4I integrity error */ static uint64_t error_to_pkt_flags_map[4] = { - 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD, PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD }; pkt_flags = error_to_pkt_flags_map[(rx_status >> @@ -1580,7 +1639,8 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, /* update tail pointer */ rte_wmb(); - IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger); + IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, + cur_free_trigger); } if (rxq->rx_tail >= rxq->nb_rx_desc) @@ -1984,8 +2044,8 @@ next_desc: if (!ixgbe_rx_alloc_bufs(rxq, false)) { rte_wmb(); - IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, - next_rdt); + IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, + next_rdt); nb_hold -= rxq->rx_free_thresh; } else { PMD_RX_LOG(DEBUG, "RX bulk alloc failed " @@ -2156,7 +2216,7 @@ next_desc: rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx); rte_wmb(); - IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id); + IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id); nb_hold = 0; } @@ -2281,6 +2341,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, "Using simple tx code path"); + dev->tx_pkt_prepare = NULL; #ifdef RTE_IXGBE_INC_VECTOR if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ && (rte_eal_process_type() != RTE_PROC_PRIMARY || @@ -2301,6 +2362,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq) (unsigned long)txq->tx_rs_thresh, (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST); dev->tx_pkt_burst = ixgbe_xmit_pkts; + dev->tx_pkt_prepare = ixgbe_prep_pkts; } } @@ -2584,7 +2646,6 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq) * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST * rxq->rx_free_thresh < rxq->nb_rx_desc * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0 - * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST) * Scattered packets are not supported. This should be checked * outside of this function. */ @@ -2606,15 +2667,6 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq) "rxq->rx_free_thresh=%d", rxq->nb_rx_desc, rxq->rx_free_thresh); ret = -EINVAL; - } else if (!(rxq->nb_rx_desc < - (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) { - PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: " - "rxq->nb_rx_desc=%d, " - "IXGBE_MAX_RING_DESC=%d, " - "RTE_PMD_IXGBE_RX_MAX_BURST=%d", - rxq->nb_rx_desc, IXGBE_MAX_RING_DESC, - RTE_PMD_IXGBE_RX_MAX_BURST); - ret = -EINVAL; } return ret; @@ -2631,12 +2683,7 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq) /* * By default, the Rx queue setup function allocates enough memory for * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires - * extra memory at the end of the descriptor ring to be zero'd out. A - * pre-condition for using the Rx burst bulk alloc function is that the - * number of descriptors is less than or equal to - * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the - * constraints here to see if we need to zero out memory after the end - * of the H/W descriptor ring. + * extra memory at the end of the descriptor ring to be zero'd out. */ if (adapter->rx_bulk_alloc_allowed) /* zero out extra memory */ @@ -3312,15 +3359,16 @@ ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev) /** * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters - * @hw: pointer to hardware structure + * @dev: pointer to eth_dev structure * @dcb_config: pointer to ixgbe_dcb_config structure */ static void -ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw, +ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev, struct ixgbe_dcb_config *dcb_config) { uint32_t reg; uint32_t q; + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); PMD_INIT_FUNC_TRACE(); if (hw->mac.type != ixgbe_mac_82598EB) { @@ -3339,10 +3387,17 @@ ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw, reg |= IXGBE_MTQC_VT_ENA; IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg); - /* Disable drop for all queues */ - for (q = 0; q < 128; q++) - IXGBE_WRITE_REG(hw, IXGBE_QDE, - (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT))); + if (RTE_ETH_DEV_SRIOV(dev).active == 0) { + /* Disable drop for all queues in VMDQ mode*/ + for (q = 0; q < 128; q++) + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT))); + } else { + /* Enable drop for all queues in SRIOV mode */ + for (q = 0; q < 128; q++) + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE)); + } /* Enable the Tx desc arbiter */ reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS); @@ -3377,7 +3432,7 @@ ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev, vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF); /*Configure general DCB TX parameters*/ - ixgbe_dcb_tx_hw_config(hw, dcb_config); + ixgbe_dcb_tx_hw_config(dev, dcb_config); } static void @@ -3660,7 +3715,7 @@ ixgbe_dcb_hw_configure(struct rte_eth_dev *dev, /*get DCB TX configuration parameters from rte_eth_conf*/ ixgbe_dcb_tx_config(dev, dcb_config); /*Configure general DCB TX parameters*/ - ixgbe_dcb_tx_hw_config(hw, dcb_config); + ixgbe_dcb_tx_hw_config(dev, dcb_config); break; default: PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration"); @@ -3809,7 +3864,7 @@ void ixgbe_configure_dcb(struct rte_eth_dev *dev) (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS)) return; - if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES) + if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES) return; /** Configure DCB hardware **/ @@ -4081,12 +4136,13 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev) case ETH_MQ_RX_VMDQ_RSS: ixgbe_config_vf_rss(dev); break; - - /* FIXME if support DCB/RSS together with VMDq & SRIOV */ case ETH_MQ_RX_VMDQ_DCB: + ixgbe_vmdq_dcb_configure(dev); + break; + /* FIXME if support DCB/RSS together with VMDq & SRIOV */ case ETH_MQ_RX_VMDQ_DCB_RSS: PMD_INIT_LOG(ERR, - "Could not support DCB with VMDq & SRIOV"); + "Could not support DCB/RSS with VMDq & SRIOV"); return -1; default: ixgbe_config_vf_default(dev); @@ -4913,8 +4969,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx)); } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE)); if (!poll_ms) - // TREX_PATCH - changed log level from ERR to DEBUG - PMD_INIT_LOG(DEBUG, "Could not disable Rx Queue %d", + PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id); rte_delay_us(RTE_IXGBE_WAIT_100_US); diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h index 2608b364..739fd198 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h @@ -67,7 +67,7 @@ #define RTE_IXGBE_MAX_RX_BURST RTE_IXGBE_RXQ_REARM_THRESH #endif -#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_IXGBE_DESCS_PER_LOOP - 1) * \ +#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_PMD_IXGBE_RX_MAX_BURST) * \ sizeof(union ixgbe_adv_rx_desc)) #ifdef RTE_PMD_PACKET_PREFETCH @@ -80,6 +80,8 @@ #define RTE_IXGBE_WAIT_100_US 100 #define RTE_IXGBE_VMTXSW_REGISTER_COUNT 2 +#define IXGBE_TX_MAX_SEG 40 + #define IXGBE_PACKET_TYPE_MASK_82599 0X7F #define IXGBE_PACKET_TYPE_MASK_X550 0X10FF #define IXGBE_PACKET_TYPE_MASK_TUNNEL 0XFF diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h index 62b82013..a3473b98 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h @@ -204,8 +204,20 @@ _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq) return; /* free all mbufs that are valid in the ring */ - for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask) - rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + if (rxq->rxrearm_nb == 0) { + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } else { + for (i = rxq->rx_tail; + i != rxq->rxrearm_start; + i = (i + 1) & mask) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } + rxq->rxrearm_nb = rxq->nb_rx_desc; /* set all entries to NULL */ @@ -309,12 +321,8 @@ ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev) if (fconf->mode != RTE_FDIR_MODE_NONE) return -1; - /* - * - no csum error report support - * - no header split support - */ - if (rxmode->hw_ip_checksum == 1 || - rxmode->header_split == 1) + /* no header split support */ + if (rxmode->header_split == 1) return -1; return 0; diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c index 64a329ea..f96cc85c 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c @@ -556,5 +556,11 @@ ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq) int __attribute__((cold)) ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) { + struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; + + /* no csum error report support */ + if (rxmode->hw_ip_checksum == 1) + return -1; + return ixgbe_rx_vec_dev_conf_condition_check_default(dev); } diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c index 1c4fd7c1..abbf2841 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c @@ -145,7 +145,7 @@ static inline void desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags, struct rte_mbuf **rx_pkts) { - __m128i ptype0, ptype1, vtag0, vtag1; + __m128i ptype0, ptype1, vtag0, vtag1, csum; union { uint16_t e[4]; uint64_t dword; @@ -156,24 +156,45 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags, 0x0000, 0x0000, 0x0000, 0x0000, 0x000F, 0x000F, 0x000F, 0x000F); + /* mask the lower byte of ol_flags */ + const __m128i ol_flags_msk = _mm_set_epi16( + 0x0000, 0x0000, 0x0000, 0x0000, + 0x00FF, 0x00FF, 0x00FF, 0x00FF); + /* map rss type to rss hash flag */ const __m128i rss_flags = _mm_set_epi8(PKT_RX_FDIR, 0, 0, 0, 0, 0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0); - /* mask everything except vlan present bit */ - const __m128i vlan_msk = _mm_set_epi16( - 0x0000, 0x0000, - 0x0000, 0x0000, - IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, - IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP); - /* map vlan present (0x8) to ol_flags */ - const __m128i vlan_map = _mm_set_epi8( + /* mask everything except vlan present and l4/ip csum error */ + const __m128i vlan_csum_msk = _mm_set_epi16( + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, + IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, + IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP); + /* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */ + const __m128i vlan_csum_map_lo = _mm_set_epi8( 0, 0, 0, 0, - 0, 0, 0, vlan_flags, + vlan_flags | PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + vlan_flags | PKT_RX_IP_CKSUM_BAD, + vlan_flags | PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + vlan_flags | PKT_RX_IP_CKSUM_GOOD, 0, 0, 0, 0, - 0, 0, 0, 0); + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD); + + const __m128i vlan_csum_map_hi = _mm_set_epi8( + 0, 0, 0, 0, + 0, PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, + PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), + 0, 0, 0, 0, + 0, PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, + PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t)); ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]); ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]); @@ -185,8 +206,26 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags, ptype0 = _mm_shuffle_epi8(rss_flags, ptype0); vtag1 = _mm_unpacklo_epi32(vtag0, vtag1); - vtag1 = _mm_and_si128(vtag1, vlan_msk); - vtag1 = _mm_shuffle_epi8(vlan_map, vtag1); + vtag1 = _mm_and_si128(vtag1, vlan_csum_msk); + + /* csum bits are in the most significant, to use shuffle we need to + * shift them. Change mask to 0xc000 to 0x0003. + */ + csum = _mm_srli_epi16(vtag1, 14); + + /* now or the most significant 64 bits containing the checksum + * flags with the vlan present flags. + */ + csum = _mm_srli_si128(csum, 8); + vtag1 = _mm_or_si128(csum, vtag1); + + /* convert VP, IPE, L4E to ol_flags */ + vtag0 = _mm_shuffle_epi8(vlan_csum_map_hi, vtag1); + vtag0 = _mm_slli_epi16(vtag0, sizeof(uint8_t)); + + vtag1 = _mm_shuffle_epi8(vlan_csum_map_lo, vtag1); + vtag1 = _mm_and_si128(vtag1, ol_flags_msk); + vtag1 = _mm_or_si128(vtag0, vtag1); vtag1 = _mm_or_si128(ptype0, vtag1); vol.dword = _mm_cvtsi128_si64(vtag1); @@ -210,7 +249,6 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags, * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST * numbers of DD bit * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two - * - don't support ol_flags for rss and csum err */ static inline uint16_t _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, @@ -243,7 +281,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, */ rxdp = rxq->rx_ring + rxq->rx_tail; - _mm_prefetch((const void *)rxdp, _MM_HINT_T0); + rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act @@ -305,6 +343,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); @@ -313,8 +352,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); /* B.2 copy 2 mbuf point into rx_pkts */ @@ -425,7 +466,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST * numbers of DD bit * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two - * - don't support ol_flags for rss and csum err */ uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -438,7 +478,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, * vPMD receive routine that reassembles scattered packets * * Notice: - * - don't support ol_flags for rss and csum err * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST * numbers of DD bit diff --git a/src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h b/src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h new file mode 100644 index 00000000..4d7b507d --- /dev/null +++ b/src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h @@ -0,0 +1,412 @@ +/*- + * BSD LICENSE + * + * Copyright (c) 2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file rte_pmd_ixgbe.h + * ixgbe PMD specific functions. + * + **/ + +#ifndef _PMD_IXGBE_H_ +#define _PMD_IXGBE_H_ + +#include + +/** + * Set the VF MAC address. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @param mac_addr + * VF MAC address. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if *vf* or *mac_addr* is invalid. + */ +int rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf, + struct ether_addr *mac_addr); + +/** + * Enable/Disable VF VLAN anti spoofing. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF on which to set VLAN anti spoofing. + * @param on + * 1 - Enable VFs VLAN anti spoofing. + * 0 - Disable VFs VLAN anti spoofing. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable/Disable VF MAC anti spoofing. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF on which to set MAC anti spoofing. + * @param on + * 1 - Enable VFs MAC anti spoofing. + * 0 - Disable VFs MAC anti spoofing. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable/Disable vf vlan insert + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * ID specifying VF. + * @param vlan_id + * 0 - Disable VF's vlan insert. + * n - Enable; n is inserted as the vlan id. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf, + uint16_t vlan_id); + +/** + * Enable/Disable tx loopback + * + * @param port + * The port identifier of the Ethernet device. + * @param on + * 1 - Enable tx loopback. + * 0 - Disable tx loopback. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on); + +/** + * set all queues drop enable bit + * + * @param port + * The port identifier of the Ethernet device. + * @param on + * 1 - set the queue drop enable bit for all pools. + * 0 - reset the queue drop enable bit for all pools. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on); + +/** + * set drop enable bit in the VF split rx control register + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * ID specifying VF. + * @param on + * 1 - set the drop enable bit in the split rx control register. + * 0 - reset the drop enable bit in the split rx control register. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ + +int rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable/Disable vf vlan strip for all queues in a pool + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * ID specifying VF. + * @param on + * 1 - Enable VF's vlan strip on RX queues. + * 0 - Disable VF's vlan strip on RX queues. + * + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int +rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable MACsec offload. + * + * @param port + * The port identifier of the Ethernet device. + * @param en + * 1 - Enable encryption (encrypt and add integrity signature). + * 0 - Disable encryption (only add integrity signature). + * @param rp + * 1 - Enable replay protection. + * 0 - Disable replay protection. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + */ +int rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp); + +/** + * Disable MACsec offload. + * + * @param port + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + */ +int rte_pmd_ixgbe_macsec_disable(uint8_t port); + +/** + * Configure Tx SC (Secure Connection). + * + * @param port + * The port identifier of the Ethernet device. + * @param mac + * The MAC address on the local side. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + */ +int rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac); + +/** + * Configure Rx SC (Secure Connection). + * + * @param port + * The port identifier of the Ethernet device. + * @param mac + * The MAC address on the remote side. + * @param pi + * The PI (port identifier) on the remote side. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + */ +int rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi); + +/** + * Enable Tx SA (Secure Association). + * + * @param port + * The port identifier of the Ethernet device. + * @param idx + * The SA to be enabled (0 or 1). + * @param an + * The association number on the local side. + * @param pn + * The packet number on the local side. + * @param key + * The key on the local side. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an, + uint32_t pn, uint8_t *key); + +/** + * Enable Rx SA (Secure Association). + * + * @param port + * The port identifier of the Ethernet device. + * @param idx + * The SA to be enabled (0 or 1) + * @param an + * The association number on the remote side. + * @param pn + * The packet number on the remote side. + * @param key + * The key on the remote side. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an, + uint32_t pn, uint8_t *key); + +/** +* Set RX L2 Filtering mode of a VF of an Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param rx_mask +* The RX mode mask, which is one or more of accepting Untagged Packets, +* packets that match the PFUTA table, Broadcast and Multicast Promiscuous. +* ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC, +* ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used +* in rx_mode. +* @param on +* 1 - Enable a VF RX mode. +* 0 - Disable a VF RX mode. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mask, uint8_t on); + +/** +* Enable or disable a VF traffic receive of an Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param on +* 1 - Enable a VF traffic receive. +* 0 - Disable a VF traffic receive. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on); + +/** +* Enable or disable a VF traffic transmit of the Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param on +* 1 - Enable a VF traffic transmit. +* 0 - Disable a VF traffic transmit. +* @return +* - (0) if successful. +* - (-ENODEV) if *port_id* invalid. +* - (-ENOTSUP) if hardware doesn't support. +* - (-EINVAL) if bad parameter. +*/ +int +rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on); + +/** +* Enable/Disable hardware VF VLAN filtering by an Ethernet device of +* received VLAN packets tagged with a given VLAN Tag Identifier. +* +* @param port +* The port identifier of the Ethernet device. +* @param vlan +* The VLAN Tag Identifier whose filtering must be enabled or disabled. +* @param vf_mask +* Bitmap listing which VFs participate in the VLAN filtering. +* @param vlan_on +* 1 - Enable VFs VLAN filtering. +* 0 - Disable VFs VLAN filtering. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan, uint64_t vf_mask, uint8_t vlan_on); + +/** + * Set the rate limitation for a vf on an Ethernet device. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @param tx_rate + * The tx rate allocated from the total link speed for this VF id. + * @param q_msk + * The queue mask which need to set the rate. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf, uint16_t tx_rate, uint64_t q_msk); + +/** + * Response sent back to ixgbe driver from user app after callback + */ +enum rte_pmd_ixgbe_mb_event_rsp { + RTE_PMD_IXGBE_MB_EVENT_NOOP_ACK, /**< skip mbox request and ACK */ + RTE_PMD_IXGBE_MB_EVENT_NOOP_NACK, /**< skip mbox request and NACK */ + RTE_PMD_IXGBE_MB_EVENT_PROCEED, /**< proceed with mbox request */ + RTE_PMD_IXGBE_MB_EVENT_MAX /**< max value of this enum */ +}; + +/** + * Data sent to the user application when the callback is executed. + */ +struct rte_pmd_ixgbe_mb_event_param { + uint16_t vfid; /**< Virtual Function number */ + uint16_t msg_type; /**< VF to PF message type, defined in ixgbe_mbx.h */ + uint16_t retval; /**< return value */ + void *msg; /**< pointer to message */ +}; +#endif /* _PMD_IXGBE_H_ */ diff --git a/src/dpdk/drivers/net/mlx4/mlx4.c b/src/dpdk/drivers/net/mlx4/mlx4.c index 304c8461..79efaaa3 100644 --- a/src/dpdk/drivers/net/mlx4/mlx4.c +++ b/src/dpdk/drivers/net/mlx4/mlx4.c @@ -61,16 +61,16 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include #include @@ -87,7 +87,7 @@ #include #include #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* Generated configuration header. */ @@ -2961,19 +2961,25 @@ rxq_cq_to_pkt_type(uint32_t flags) if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) pkt_type = TRANSPOSE(flags, - IBV_EXP_CQ_RX_OUTER_IPV4_PACKET, RTE_PTYPE_L3_IPV4) | + IBV_EXP_CQ_RX_OUTER_IPV4_PACKET, + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_OUTER_IPV6_PACKET, RTE_PTYPE_L3_IPV6) | + IBV_EXP_CQ_RX_OUTER_IPV6_PACKET, + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_INNER_L3_IPV4) | + IBV_EXP_CQ_RX_IPV4_PACKET, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_INNER_L3_IPV6); + IBV_EXP_CQ_RX_IPV6_PACKET, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN); else pkt_type = TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_L3_IPV4) | + IBV_EXP_CQ_RX_IPV4_PACKET, + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_L3_IPV6); + IBV_EXP_CQ_RX_IPV6_PACKET, + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN); return pkt_type; } @@ -2995,25 +3001,20 @@ rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags) if (rxq->csum) ol_flags |= - TRANSPOSE(~flags, + TRANSPOSE(flags, IBV_EXP_CQ_RX_IP_CSUM_OK, - PKT_RX_IP_CKSUM_BAD) | - TRANSPOSE(~flags, + PKT_RX_IP_CKSUM_GOOD) | + TRANSPOSE(flags, IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK, - PKT_RX_L4_CKSUM_BAD); - /* - * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place - * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional - * (its value is 0). - */ + PKT_RX_L4_CKSUM_GOOD); if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) ol_flags |= - TRANSPOSE(~flags, + TRANSPOSE(flags, IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK, - PKT_RX_IP_CKSUM_BAD) | - TRANSPOSE(~flags, + PKT_RX_IP_CKSUM_GOOD) | + TRANSPOSE(flags, IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK, - PKT_RX_L4_CKSUM_BAD); + PKT_RX_L4_CKSUM_GOOD); return ol_flags; } @@ -4426,6 +4427,8 @@ mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) unsigned int max; char ifname[IF_NAMESIZE]; + info->pci_dev = RTE_DEV_TO_PCI(dev->device); + if (priv == NULL) return; priv_lock(priv); @@ -4826,7 +4829,7 @@ end: } /** - * DPDK callback to retrieve physical link information (unlocked version). + * DPDK callback to retrieve physical link information. * * @param dev * Pointer to Ethernet device structure. @@ -4834,9 +4837,9 @@ end: * Wait for request completion (ignored). */ static int -mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) +mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete) { - struct priv *priv = mlx4_get_priv(dev); + const struct priv *priv = mlx4_get_priv(dev); struct ethtool_cmd edata = { .cmd = ETHTOOL_GSET }; @@ -4844,6 +4847,8 @@ mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) struct rte_eth_link dev_link; int link_speed = 0; + /* priv_lock() is not taken to allow concurrent calls. */ + if (priv == NULL) return -EINVAL; (void)wait_to_complete; @@ -4878,28 +4883,6 @@ mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) return -1; } -/** - * DPDK callback to retrieve physical link information. - * - * @param dev - * Pointer to Ethernet device structure. - * @param wait_to_complete - * Wait for request completion (ignored). - */ -static int -mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete) -{ - struct priv *priv = mlx4_get_priv(dev); - int ret; - - if (priv == NULL) - return -EINVAL; - priv_lock(priv); - ret = mlx4_link_update_unlocked(dev, wait_to_complete); - priv_unlock(priv); - return ret; -} - /** * DPDK callback to change the MTU. * @@ -5416,7 +5399,7 @@ priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) struct rte_eth_link *link = &dev->data->dev_link; priv->pending_alarm = 0; - mlx4_link_update_unlocked(dev, 0); + mlx4_link_update(dev, 0); if (((link->link_speed == 0) && link->link_status) || ((link->link_speed != 0) && !link->link_status)) { /* Inconsistent status, check again later. */ @@ -5448,7 +5431,7 @@ mlx4_dev_link_status_handler(void *arg) ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); if (ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -5471,7 +5454,7 @@ mlx4_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); if (ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -5544,7 +5527,7 @@ static struct eth_driver mlx4_driver; * 0 on success, negative errno value on failure. */ static int -mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) { struct ibv_device **list; struct ibv_device *ibv_dev; @@ -5803,7 +5786,7 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) snprintf(name, sizeof(name), "%s port %u", ibv_get_device_name(ibv_dev), port); - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI); + eth_dev = rte_eth_dev_allocate(name); } if (eth_dev == NULL) { ERROR("can not allocate rte ethdev"); @@ -5839,11 +5822,9 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) eth_dev->rx_pkt_burst = mlx4_rx_burst_secondary_setup; } else { eth_dev->data->dev_private = priv; - eth_dev->data->rx_mbuf_alloc_failed = 0; - eth_dev->data->mtu = ETHER_MTU; eth_dev->data->mac_addrs = priv->mac; } - eth_dev->pci_dev = pci_dev; + eth_dev->device = &pci_dev->device; rte_eth_copy_pci_info(eth_dev, pci_dev); @@ -5851,7 +5832,6 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->dev = eth_dev; eth_dev->dev_ops = &mlx4_dev_ops; - TAILQ_INIT(ð_dev->link_intr_cbs); /* Bring Ethernet device up. */ DEBUG("forcing Ethernet interface up"); @@ -5911,9 +5891,11 @@ static const struct rte_pci_id mlx4_pci_id_map[] = { static struct eth_driver mlx4_driver = { .pci_drv = { - .name = MLX4_DRIVER_NAME, + .driver = { + .name = MLX4_DRIVER_NAME + }, .id_table = mlx4_pci_id_map, - .devinit = mlx4_pci_devinit, + .probe = mlx4_pci_probe, .drv_flags = RTE_PCI_DRV_INTR_LSC, }, .dev_private_size = sizeof(struct priv) @@ -5922,12 +5904,10 @@ static struct eth_driver mlx4_driver = { /** * Driver initialization routine. */ -static int -rte_mlx4_pmd_init(const char *name, const char *args) +RTE_INIT(rte_mlx4_pmd_init); +static void +rte_mlx4_pmd_init(void) { - (void)name; - (void)args; - RTE_BUILD_BUG_ON(sizeof(wr_id_t) != sizeof(uint64_t)); /* * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use @@ -5938,13 +5918,9 @@ rte_mlx4_pmd_init(const char *name, const char *args) setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); ibv_fork_init(); rte_eal_pci_register(&mlx4_driver.pci_drv); - return 0; } -static struct rte_driver rte_mlx4_driver = { - .type = PMD_PDEV, - .init = rte_mlx4_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_mlx4_driver, mlx4); -DRIVER_REGISTER_PCI_TABLE(mlx4, mlx4_pci_id_map); +RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(net_mlx4, mlx4_pci_id_map); +RTE_PMD_REGISTER_KMOD_DEP(net_mlx4, + "* ib_uverbs & mlx4_en & mlx4_core & mlx4_ib"); diff --git a/src/dpdk/drivers/net/mlx4/mlx4.h b/src/dpdk/drivers/net/mlx4/mlx4.h index d0c7bc29..4c7505e2 100644 --- a/src/dpdk/drivers/net/mlx4/mlx4.h +++ b/src/dpdk/drivers/net/mlx4/mlx4.h @@ -96,7 +96,7 @@ enum { PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO = 0x1007, }; -#define MLX4_DRIVER_NAME "librte_pmd_mlx4" +#define MLX4_DRIVER_NAME "net_mlx4" /* Bit-field manipulation. */ #define BITFIELD_DECLARE(bf, type, size) \ diff --git a/src/dpdk/drivers/net/mlx5/mlx5.c b/src/dpdk/drivers/net/mlx5/mlx5.c index 7c072391..d4bd4696 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5.c +++ b/src/dpdk/drivers/net/mlx5/mlx5.c @@ -181,9 +181,6 @@ mlx5_dev_close(struct rte_eth_dev *dev) } if (priv->reta_idx != NULL) rte_free(priv->reta_idx); - - mlx5_stats_free(dev); - priv_unlock(priv); memset(priv, 0, sizeof(*priv)); } @@ -202,6 +199,9 @@ static const struct eth_dev_ops mlx5_dev_ops = { .link_update = mlx5_link_update, .stats_get = mlx5_stats_get, .stats_reset = mlx5_stats_reset, + .xstats_get = mlx5_xstats_get, + .xstats_reset = mlx5_xstats_reset, + .xstats_get_names = mlx5_xstats_get_names, .dev_infos_get = mlx5_dev_infos_get, .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, .vlan_filter_set = mlx5_vlan_filter_set, @@ -257,7 +257,6 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr) return ret; } - /** * Verify and store value for device argument. * @@ -290,7 +289,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque) } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { priv->txqs_inline = tmp; } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { - priv->mps = !!tmp; + priv->mps &= !!tmp; /* Enable MPW only if HW supports */ } else { WARN("%s: unknown parameter", key); return -EINVAL; @@ -298,8 +297,6 @@ mlx5_args_check(const char *key, const char *val, void *opaque) return 0; } - - /** * Parse device parameters. * @@ -336,16 +333,16 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs) if (rte_kvargs_count(kvlist, params[i])) { ret = rte_kvargs_process(kvlist, params[i], mlx5_args_check, priv); - if (ret != 0) + if (ret != 0) { + rte_kvargs_free(kvlist); return ret; + } } } rte_kvargs_free(kvlist); return 0; } - - static struct eth_driver mlx5_driver; /** @@ -363,7 +360,7 @@ static struct eth_driver mlx5_driver; * 0 on success, negative errno value on failure. */ static int -mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) { struct ibv_device **list; struct ibv_device *ibv_dev; @@ -374,13 +371,6 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) unsigned int mps; int idx; int i; - static int ibv_was_init=0; - - if (ibv_was_init==0) { - ibv_fork_init(); - ibv_was_init=1; - } - (void)pci_drv; assert(pci_drv == &mlx5_driver.pci_drv); @@ -423,10 +413,26 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) sriov = ((pci_dev->id.device_id == PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)); - /* Multi-packet send is only supported by ConnectX-4 Lx PF. */ - mps = (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4LX); + PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) || + (pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) || + (pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)); + /* + * Multi-packet send is supported by ConnectX-4 Lx PF as well + * as all ConnectX-5 devices. + */ + switch (pci_dev->id.device_id) { + case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: + case PCI_DEVICE_ID_MELLANOX_CONNECTX5: + case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: + case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: + case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: + mps = 1; + break; + default: + mps = 0; + } INFO("PCI information matches, using device \"%s\"" " (SR-IOV: %s, MPS: %s)", list[i]->name, @@ -526,16 +532,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->mtu = ETHER_MTU; priv->mps = mps; /* Enable MPW by default if supported. */ priv->cqe_comp = 1; /* Enable compression by default. */ - - - err = mlx5_args(priv, pci_dev->devargs); - - /* TREX PATCH */ - /* set for maximum performance default */ - priv->txq_inline =64; - priv->txqs_inline =4; - - + err = mlx5_args(priv, pci_dev->device.devargs); if (err) { ERROR("failed to process device arguments: %s", strerror(err)); @@ -562,8 +559,9 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; /* Remove this check once DPDK supports larger/variable * indirection tables. */ - if (priv->ind_table_max_size > (unsigned int)RSS_INDIRECTION_TABLE_SIZE) - priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE; + if (priv->ind_table_max_size > + (unsigned int)ETH_RSS_RETA_SIZE_512) + priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512; DEBUG("maximum RX indirection table size is %u", priv->ind_table_max_size); priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap & @@ -641,7 +639,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) snprintf(name, sizeof(name), "%s port %u", ibv_get_device_name(ibv_dev), port); - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI); + eth_dev = rte_eth_dev_allocate(name); } if (eth_dev == NULL) { ERROR("can not allocate rte ethdev"); @@ -676,22 +674,19 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup; } else { eth_dev->data->dev_private = priv; - eth_dev->data->rx_mbuf_alloc_failed = 0; - eth_dev->data->mtu = ETHER_MTU; eth_dev->data->mac_addrs = priv->mac; } - eth_dev->pci_dev = pci_dev; + eth_dev->device = &pci_dev->device; rte_eth_copy_pci_info(eth_dev, pci_dev); eth_dev->driver = &mlx5_driver; priv->dev = eth_dev; eth_dev->dev_ops = &mlx5_dev_ops; - TAILQ_INIT(ð_dev->link_intr_cbs); - /* Bring Ethernet device up. */ DEBUG("forcing Ethernet interface up"); priv_set_flags(priv, ~IFF_UP, IFF_UP); + mlx5_link_update(priv->dev, 1); continue; port_error: @@ -745,521 +740,21 @@ static const struct rte_pci_id mlx5_pci_id_map[] = { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) }, - { - .vendor_id = 0 - } -}; - -static struct eth_driver mlx5_driver = { - .pci_drv = { - .name = MLX5_DRIVER_NAME, - .id_table = mlx5_pci_id_map, - .devinit = mlx5_pci_devinit, - .drv_flags = RTE_PCI_DRV_INTR_LSC, - }, - .dev_private_size = sizeof(struct priv) -}; - -/** - * Driver initialization routine. - */ -static int -rte_mlx5_pmd_init(const char *name, const char *args) -{ - (void)name; - (void)args; - /* - * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use - * huge pages. Calling ibv_fork_init() during init allows - * applications to use fork() safely for purposes other than - * using this PMD, which is not supported in forked processes. - */ - setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); - rte_eal_pci_register(&mlx5_driver.pci_drv); - return 0; -} - -static struct rte_driver rte_mlx5_driver = { - .type = PMD_PDEV, - .init = rte_mlx5_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_mlx5_driver, mlx5); -DRIVER_REGISTER_PCI_TABLE(mlx5, mlx5_pci_id_map); - - - - - - -#if 0 -/** - * Verify and store value for device argument. - * - * @param[in] key - * Key argument to verify. - * @param[in] val - * Value associated with key. - * @param opaque - * User data. - * - * @return - * 0 on success, negative errno value on failure. - */ -static int -mlx5_args_check(const char *key, const char *val, void *opaque) -{ - struct priv *priv = opaque; - unsigned long tmp; - - errno = 0; - tmp = strtoul(val, NULL, 0); - if (errno) { - WARN("%s: \"%s\" is not a valid integer", key, val); - return errno; - } - if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { - priv->cqe_comp = !!tmp; - } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { - priv->txq_inline = tmp; - } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { - priv->txqs_inline = tmp; - } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { - priv->mps = !!tmp; - } else { - WARN("%s: unknown parameter", key); - return -EINVAL; - } - return 0; -} - -/** - * Parse device parameters. - * - * @param priv - * Pointer to private structure. - * @param devargs - * Device arguments structure. - * - * @return - * 0 on success, errno value on failure. - */ -static int -mlx5_args(struct priv *priv, struct rte_devargs *devargs) -{ - const char **params = (const char *[]){ - MLX5_RXQ_CQE_COMP_EN, - MLX5_TXQ_INLINE, - MLX5_TXQS_MIN_INLINE, - MLX5_TXQ_MPW_EN, - NULL, - }; - struct rte_kvargs *kvlist; - int ret = 0; - int i; - - if (devargs == NULL) - return 0; - /* Following UGLY cast is done to pass checkpatch. */ - kvlist = rte_kvargs_parse(devargs->args, params); - if (kvlist == NULL) - return 0; - /* Process parameters. */ - for (i = 0; (params[i] != NULL); ++i) { - if (rte_kvargs_count(kvlist, params[i])) { - ret = rte_kvargs_process(kvlist, params[i], - mlx5_args_check, priv); - if (ret != 0) - return ret; - } - } - rte_kvargs_free(kvlist); - return 0; -} - -static struct eth_driver mlx5_driver; - -/** - * DPDK callback to register a PCI device. - * - * This function creates an Ethernet device for each port of a given - * PCI device. - * - * @param[in] pci_drv - * PCI driver structure (mlx5_driver). - * @param[in] pci_dev - * PCI device information. - * - * @return - * 0 on success, negative errno value on failure. - */ -static int -mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) -{ - struct ibv_device **list; - struct ibv_device *ibv_dev; - int err = 0; - struct ibv_context *attr_ctx = NULL; - struct ibv_device_attr device_attr; - unsigned int sriov; - unsigned int mps; - int idx; - int i; - - (void)pci_drv; - assert(pci_drv == &mlx5_driver.pci_drv); - /* Get mlx5_dev[] index. */ - idx = mlx5_dev_idx(&pci_dev->addr); - if (idx == -1) { - ERROR("this driver cannot support any more adapters"); - return -ENOMEM; - } - DEBUG("using driver device index %d", idx); - - /* Save PCI address. */ - mlx5_dev[idx].pci_addr = pci_dev->addr; - list = ibv_get_device_list(&i); - if (list == NULL) { - assert(errno); - if (errno == ENOSYS) { - WARN("cannot list devices, is ib_uverbs loaded?"); - return 0; - } - return -errno; - } - assert(i >= 0); - /* - * For each listed device, check related sysfs entry against - * the provided PCI ID. - */ - while (i != 0) { - struct rte_pci_addr pci_addr; - - --i; - DEBUG("checking device \"%s\"", list[i]->name); - if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr)) - continue; - if ((pci_dev->addr.domain != pci_addr.domain) || - (pci_dev->addr.bus != pci_addr.bus) || - (pci_dev->addr.devid != pci_addr.devid) || - (pci_dev->addr.function != pci_addr.function)) - continue; - sriov = ((pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || - (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)); - /* Multi-packet send is only supported by ConnectX-4 Lx PF. */ - mps = (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4LX); - INFO("PCI information matches, using device \"%s\"" - " (SR-IOV: %s, MPS: %s)", - list[i]->name, - sriov ? "true" : "false", - mps ? "true" : "false"); - attr_ctx = ibv_open_device(list[i]); - err = errno; - break; - } - if (attr_ctx == NULL) { - ibv_free_device_list(list); - switch (err) { - case 0: - WARN("cannot access device, is mlx5_ib loaded?"); - return 0; - case EINVAL: - WARN("cannot use device, are drivers up to date?"); - return 0; - } - assert(err > 0); - return -err; - } - ibv_dev = list[i]; - - DEBUG("device opened"); - if (ibv_query_device(attr_ctx, &device_attr)) - goto error; - INFO("%u port(s) detected", device_attr.phys_port_cnt); - - for (i = 0; i < device_attr.phys_port_cnt; i++) { - uint32_t port = i + 1; /* ports are indexed from one */ - uint32_t test = (1 << i); - struct ibv_context *ctx = NULL; - struct ibv_port_attr port_attr; - struct ibv_pd *pd = NULL; - struct priv *priv = NULL; - struct rte_eth_dev *eth_dev; - struct ibv_exp_device_attr exp_device_attr; - struct ether_addr mac; - uint16_t num_vfs = 0; - - exp_device_attr.comp_mask = - IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | - IBV_EXP_DEVICE_ATTR_RX_HASH | - IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS | - IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN | - 0; - - DEBUG("using port %u (%08" PRIx32 ")", port, test); - - ctx = ibv_open_device(ibv_dev); - if (ctx == NULL) - goto port_error; - - /* Check port status. */ - err = ibv_query_port(ctx, port, &port_attr); - if (err) { - ERROR("port query failed: %s", strerror(err)); - goto port_error; - } - - if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { - ERROR("port %d is not configured in Ethernet mode", - port); - goto port_error; - } - - if (port_attr.state != IBV_PORT_ACTIVE) - DEBUG("port %d is not active: \"%s\" (%d)", - port, ibv_port_state_str(port_attr.state), - port_attr.state); - - /* Allocate protection domain. */ - pd = ibv_alloc_pd(ctx); - if (pd == NULL) { - ERROR("PD allocation failure"); - err = ENOMEM; - goto port_error; - } - - mlx5_dev[idx].ports |= test; - - /* from rte_ethdev.c */ - priv = rte_zmalloc("ethdev private structure", - sizeof(*priv), - RTE_CACHE_LINE_SIZE); - if (priv == NULL) { - ERROR("priv allocation failure"); - err = ENOMEM; - goto port_error; - } - - priv->ctx = ctx; - priv->device_attr = device_attr; - priv->port = port; - priv->pd = pd; - priv->mtu = ETHER_MTU; - priv->mps = mps; /* Enable MPW by default if supported. */ - priv->cqe_comp = 1; /* Enable compression by default. */ - err = mlx5_args(priv, pci_dev->device.devargs); - if (err) { - ERROR("failed to process device arguments: %s", - strerror(err)); - goto port_error; - } - if (ibv_exp_query_device(ctx, &exp_device_attr)) { - ERROR("ibv_exp_query_device() failed"); - goto port_error; - } - - priv->hw_csum = - ((exp_device_attr.exp_device_cap_flags & - IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) && - (exp_device_attr.exp_device_cap_flags & - IBV_EXP_DEVICE_RX_CSUM_IP_PKT)); - DEBUG("checksum offloading is %ssupported", - (priv->hw_csum ? "" : "not ")); - - priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & - IBV_EXP_DEVICE_VXLAN_SUPPORT); - DEBUG("L2 tunnel checksum offloads are %ssupported", - (priv->hw_csum_l2tun ? "" : "not ")); - - priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; - /* Remove this check once DPDK supports larger/variable - * indirection tables. */ - if (priv->ind_table_max_size > (unsigned int)RSS_INDIRECTION_TABLE_SIZE) - priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE; - DEBUG("maximum RX indirection table size is %u", - priv->ind_table_max_size); - priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap & - IBV_EXP_RECEIVE_WQ_CVLAN_STRIP); - DEBUG("VLAN stripping is %ssupported", - (priv->hw_vlan_strip ? "" : "not ")); - - priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags & - IBV_EXP_DEVICE_SCATTER_FCS); - DEBUG("FCS stripping configuration is %ssupported", - (priv->hw_fcs_strip ? "" : "not ")); - - priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align; - DEBUG("hardware RX end alignment padding is %ssupported", - (priv->hw_padding ? "" : "not ")); - - priv_get_num_vfs(priv, &num_vfs); - priv->sriov = (num_vfs || sriov); - if (priv->mps && !mps) { - ERROR("multi-packet send not supported on this device" - " (" MLX5_TXQ_MPW_EN ")"); - err = ENOTSUP; - goto port_error; - } - /* Allocate and register default RSS hash keys. */ - priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n, - sizeof((*priv->rss_conf)[0]), 0); - if (priv->rss_conf == NULL) { - err = ENOMEM; - goto port_error; - } - err = rss_hash_rss_conf_new_key(priv, - rss_hash_default_key, - rss_hash_default_key_len, - ETH_RSS_PROTO_MASK); - if (err) - goto port_error; - /* Configure the first MAC address by default. */ - if (priv_get_mac(priv, &mac.addr_bytes)) { - ERROR("cannot get MAC address, is mlx5_en loaded?" - " (errno: %s)", strerror(errno)); - goto port_error; - } - INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", - priv->port, - mac.addr_bytes[0], mac.addr_bytes[1], - mac.addr_bytes[2], mac.addr_bytes[3], - mac.addr_bytes[4], mac.addr_bytes[5]); - /* Register MAC address. */ - claim_zero(priv_mac_addr_add(priv, 0, - (const uint8_t (*)[ETHER_ADDR_LEN]) - mac.addr_bytes)); - /* Initialize FD filters list. */ - err = fdir_init_filters_list(priv); - if (err) - goto port_error; -#ifndef NDEBUG - { - char ifname[IF_NAMESIZE]; - - if (priv_get_ifname(priv, &ifname) == 0) - DEBUG("port %u ifname is \"%s\"", - priv->port, ifname); - else - DEBUG("port %u ifname is unknown", priv->port); - } -#endif - /* Get actual MTU if possible. */ - priv_get_mtu(priv, &priv->mtu); - DEBUG("port %u MTU is %u", priv->port, priv->mtu); - - /* from rte_ethdev.c */ - { - char name[RTE_ETH_NAME_MAX_LEN]; - - snprintf(name, sizeof(name), "%s port %u", - ibv_get_device_name(ibv_dev), port); - eth_dev = rte_eth_dev_allocate(name); - } - if (eth_dev == NULL) { - ERROR("can not allocate rte ethdev"); - err = ENOMEM; - goto port_error; - } - - /* Secondary processes have to use local storage for their - * private data as well as a copy of eth_dev->data, but this - * pointer must not be modified before burst functions are - * actually called. */ - if (mlx5_is_secondary()) { - struct mlx5_secondary_data *sd = - &mlx5_secondary_data[eth_dev->data->port_id]; - sd->primary_priv = eth_dev->data->dev_private; - if (sd->primary_priv == NULL) { - ERROR("no private data for port %u", - eth_dev->data->port_id); - err = EINVAL; - goto port_error; - } - sd->shared_dev_data = eth_dev->data; - rte_spinlock_init(&sd->lock); - memcpy(sd->data.name, sd->shared_dev_data->name, - sizeof(sd->data.name)); - sd->data.dev_private = priv; - sd->data.rx_mbuf_alloc_failed = 0; - sd->data.mtu = ETHER_MTU; - sd->data.port_id = sd->shared_dev_data->port_id; - sd->data.mac_addrs = priv->mac; - eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup; - eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup; - } else { - eth_dev->data->dev_private = priv; - eth_dev->data->rx_mbuf_alloc_failed = 0; - eth_dev->data->mtu = ETHER_MTU; - eth_dev->data->mac_addrs = priv->mac; - } - - eth_dev->pci_dev = pci_dev; - rte_eth_copy_pci_info(eth_dev, pci_dev); - eth_dev->driver = &mlx5_driver; - priv->dev = eth_dev; - eth_dev->dev_ops = &mlx5_dev_ops; - - TAILQ_INIT(ð_dev->link_intr_cbs); - - /* Bring Ethernet device up. */ - DEBUG("forcing Ethernet interface up"); - priv_set_flags(priv, ~IFF_UP, IFF_UP); - mlx5_link_update_unlocked(priv->dev, 1); - continue; - -port_error: - if (priv) { - rte_free(priv->rss_conf); - rte_free(priv); - } - if (pd) - claim_zero(ibv_dealloc_pd(pd)); - if (ctx) - claim_zero(ibv_close_device(ctx)); - break; - } - - /* - * XXX if something went wrong in the loop above, there is a resource - * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as - * long as the dpdk does not provide a way to deallocate a ethdev and a - * way to enumerate the registered ethdevs to free the previous ones. - */ - - /* no port found, complain */ - if (!mlx5_dev[idx].ports) { - err = ENODEV; - goto error; - } - -error: - if (attr_ctx) - claim_zero(ibv_close_device(attr_ctx)); - if (list) - ibv_free_device_list(list); - assert(err >= 0); - return -err; -} - -static const struct rte_pci_id mlx5_pci_id_map[] = { { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX4) + PCI_DEVICE_ID_MELLANOX_CONNECTX5) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) + PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) + PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) + PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) }, { .vendor_id = 0 @@ -1292,9 +787,10 @@ rte_mlx5_pmd_init(void) * using this PMD, which is not supported in forked processes. */ setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); + ibv_fork_init(); rte_eal_pci_register(&mlx5_driver.pci_drv); } RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); -#endif +RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib"); diff --git a/src/dpdk/drivers/net/mlx5/mlx5.h b/src/dpdk/drivers/net/mlx5/mlx5.h index 83b29e18..879da5ef 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5.h +++ b/src/dpdk/drivers/net/mlx5/mlx5.h @@ -59,6 +59,7 @@ #include #include #include +#include #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -82,36 +83,20 @@ enum { PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014, PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015, PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016, + PCI_DEVICE_ID_MELLANOX_CONNECTX5 = 0x1017, + PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018, + PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019, + PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a, }; -struct mlx5_stats_priv { - - struct rte_eth_stats m_shadow; - uint32_t n_stats; /* number of counters */ - - void * et_stats ;/* point to ethtool counter struct ethtool_stats*/ - - /* index into ethtool */ - uint16_t inx_rx_vport_unicast_bytes; - uint16_t inx_rx_vport_multicast_bytes; - uint16_t inx_rx_vport_broadcast_bytes; - uint16_t inx_rx_vport_unicast_packets; - uint16_t inx_rx_vport_multicast_packets; - uint16_t inx_rx_vport_broadcast_packets; - uint16_t inx_tx_vport_unicast_bytes; - uint16_t inx_tx_vport_multicast_bytes; - uint16_t inx_tx_vport_broadcast_bytes; - uint16_t inx_tx_vport_unicast_packets; - uint16_t inx_tx_vport_multicast_packets; - uint16_t inx_tx_vport_broadcast_packets; - uint16_t inx_rx_wqe_err; - uint16_t inx_rx_crc_errors_phy; - uint16_t inx_rx_in_range_len_errors_phy; - uint16_t inx_rx_symbol_err_phy; - uint16_t inx_tx_errors_phy; +struct mlx5_xstats_ctrl { + /* Number of device stats. */ + uint16_t stats_n; + /* Index in the device counters table. */ + uint16_t dev_table_idx[MLX5_MAX_XSTATS]; + uint64_t base[MLX5_MAX_XSTATS]; }; - struct priv { struct rte_eth_dev *dev; /* Ethernet device. */ struct ibv_context *ctx; /* Verbs context. */ @@ -163,9 +148,10 @@ struct priv { unsigned int reta_idx_n; /* RETA index size. */ struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */ struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */ + LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */ uint32_t link_speed_capa; /* Link speed capabilities. */ + struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ rte_spinlock_t lock; /* Lock for control functions. */ - struct mlx5_stats_priv m_stats; }; /* Local storage for secondary process data. */ @@ -217,7 +203,6 @@ int priv_set_flags(struct priv *, unsigned int, unsigned int); int mlx5_dev_configure(struct rte_eth_dev *); void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *); const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev); -int mlx5_link_update_unlocked(struct rte_eth_dev *, int); int mlx5_link_update(struct rte_eth_dev *, int); int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t); int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *); @@ -273,9 +258,14 @@ void mlx5_allmulticast_disable(struct rte_eth_dev *); /* mlx5_stats.c */ +void priv_xstats_init(struct priv *); void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *); void mlx5_stats_reset(struct rte_eth_dev *); -void mlx5_stats_free(struct rte_eth_dev *dev); +int mlx5_xstats_get(struct rte_eth_dev *, + struct rte_eth_xstat *, unsigned int); +void mlx5_xstats_reset(struct rte_eth_dev *); +int mlx5_xstats_get_names(struct rte_eth_dev *, + struct rte_eth_xstat_name *, unsigned int); /* mlx5_vlan.c */ @@ -298,4 +288,21 @@ void priv_fdir_enable(struct priv *); int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type, enum rte_filter_op, void *); +/* mlx5_flow.c */ + +int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); +struct rte_flow *mlx5_flow_create(struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); +int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *, + struct rte_flow_error *); +int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *); +int priv_flow_start(struct priv *); +void priv_flow_stop(struct priv *); + #endif /* RTE_PMD_MLX5_H_ */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_defs.h b/src/dpdk/drivers/net/mlx5/mlx5_defs.h index 30adfebb..e91d2454 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_defs.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_defs.h @@ -54,9 +54,6 @@ */ #define MLX5_TX_COMP_THRESH 32 -/* RSS Indirection table size. */ -#define RSS_INDIRECTION_TABLE_SIZE 256 - /* * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP * from which buffers are to be transmitted will have to be mapped by this @@ -79,41 +76,7 @@ /* Alarm timeout. */ #define MLX5_ALARM_TIMEOUT_US 100000 - -//#ifdef TREX_PATCH_DPDK PATH for DPDK16.11 should be removed - -/** - * Mask of bits used to determine the status of RX IP checksum. - * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum - * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong - * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid - * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet - * data, but the integrity of the IP header is verified. - */ -#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7)) - -#define PKT_RX_IP_CKSUM_UNKNOWN 0 -#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) -#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) -#define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7)) - -/** - * Mask of bits used to determine the status of RX L4 checksum. - * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum - * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong - * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid - * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet - * data, but the integrity of the L4 data is verified. - */ -#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8)) - -#define PKT_RX_L4_CKSUM_UNKNOWN 0 -#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) -#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8) -#define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8)) - - -//#endif - +/* Maximum number of extended statistics counters. */ +#define MLX5_MAX_XSTATS 32 #endif /* RTE_PMD_MLX5_DEFS_H_ */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c b/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c index 85b81360..2145965f 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c @@ -43,9 +43,11 @@ #include #include #include +#include #include #include #include +#include #include /* DPDK headers don't like -pedantic. */ @@ -67,6 +69,57 @@ #include "mlx5_rxtx.h" #include "mlx5_utils.h" +/* Add defines in case the running kernel is not the same as user headers. */ +#ifndef ETHTOOL_GLINKSETTINGS +struct ethtool_link_settings { + uint32_t cmd; + uint32_t speed; + uint8_t duplex; + uint8_t port; + uint8_t phy_address; + uint8_t autoneg; + uint8_t mdio_support; + uint8_t eth_to_mdix; + uint8_t eth_tp_mdix_ctrl; + int8_t link_mode_masks_nwords; + uint32_t reserved[8]; + uint32_t link_mode_masks[]; +}; + +#define ETHTOOL_GLINKSETTINGS 0x0000004c +#define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 +#define ETHTOOL_LINK_MODE_Autoneg_BIT 6 +#define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 +#define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 +#define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 +#define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 +#define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 +#define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 +#define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 +#define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 +#define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 +#define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 +#define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 +#define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 +#define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 +#define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 +#endif +#ifndef HAVE_ETHTOOL_LINK_MODE_25G +#define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 +#define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 +#define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 +#endif +#ifndef HAVE_ETHTOOL_LINK_MODE_50G +#define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 +#define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 +#endif +#ifndef HAVE_ETHTOOL_LINK_MODE_100G +#define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 +#define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 +#define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 +#define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 +#endif + /** * Return private structure associated with an Ethernet device. * @@ -562,6 +615,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) unsigned int max; char ifname[IF_NAMESIZE]; + info->pci_dev = RTE_DEV_TO_PCI(dev->device); + priv_lock(priv); /* FIXME: we should ask the device for these values. */ info->min_rx_bufsize = 32; @@ -626,7 +681,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) } /** - * Retrieve physical link information (unlocked version using legacy ioctl). + * DPDK callback to retrieve physical link information. * * @param dev * Pointer to Ethernet device structure. @@ -644,6 +699,8 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) struct rte_eth_link dev_link; int link_speed = 0; + /* priv_lock() is not taken to allow concurrent calls. */ + (void)wait_to_complete; if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); @@ -690,8 +747,7 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) } /** - * Retrieve physical link information (unlocked version using new ioctl from - * Linux 4.5). + * Retrieve physical link information (unlocked version using new ioctl). * * @param dev * Pointer to Ethernet device structure. @@ -701,7 +757,6 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) static int mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) { -#ifdef ETHTOOL_GLINKSETTINGS struct priv *priv = mlx5_get_priv(dev); struct ethtool_link_settings edata = { .cmd = ETHTOOL_GLINKSETTINGS, @@ -728,7 +783,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) sc = edata.link_mode_masks[0] | ((uint64_t)edata.link_mode_masks[1] << 32); priv->link_speed_capa = 0; - /* Link speeds available in kernel v4.5. */ if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT) priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT | @@ -751,25 +805,18 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT | ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)) priv->link_speed_capa |= ETH_LINK_SPEED_56G; - /* Link speeds available in kernel v4.6. */ -#ifdef HAVE_ETHTOOL_LINK_MODE_25G if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT | ETHTOOL_LINK_MODE_25000baseKR_Full_BIT | ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)) priv->link_speed_capa |= ETH_LINK_SPEED_25G; -#endif -#ifdef HAVE_ETHTOOL_LINK_MODE_50G if (sc & (ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT | ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)) priv->link_speed_capa |= ETH_LINK_SPEED_50G; -#endif -#ifdef HAVE_ETHTOOL_LINK_MODE_100G if (sc & (ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT | ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT | ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT | ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)) priv->link_speed_capa |= ETH_LINK_SPEED_100G; -#endif dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & @@ -779,33 +826,10 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) dev->data->dev_link = dev_link; return 0; } -#else - (void)dev; - (void)wait_to_complete; -#endif /* Link status is still the same. */ return -1; } -/** - * DPDK callback to retrieve physical link information (unlocked version). - * - * @param dev - * Pointer to Ethernet device structure. - * @param wait_to_complete - * Wait for request completion (ignored). - */ -int -mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) -{ - int ret; - - ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete); - if (ret < 0) - ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete); - return ret; -} - /** * DPDK callback to retrieve physical link information. * @@ -817,13 +841,15 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) int mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) { - struct priv *priv = mlx5_get_priv(dev); - int ret; - - priv_lock(priv); - ret = mlx5_link_update_unlocked(dev, wait_to_complete); - priv_unlock(priv); - return ret; + struct utsname utsname; + int ver[3]; + + if (uname(&utsname) == -1 || + sscanf(utsname.release, "%d.%d.%d", + &ver[0], &ver[1], &ver[2]) != 3 || + KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) + return mlx5_link_update_unlocked_gset(dev, wait_to_complete); + return mlx5_link_update_unlocked_gs(dev, wait_to_complete); } /** @@ -1141,7 +1167,7 @@ static int priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) { struct ibv_async_event event; - int port_change = 0; + struct rte_eth_link *link = &dev->data->dev_link; int ret = 0; /* Read all message and acknowledge them. */ @@ -1149,29 +1175,24 @@ priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) if (ibv_get_async_event(priv->ctx, &event)) break; - if (event.event_type == IBV_EVENT_PORT_ACTIVE || - event.event_type == IBV_EVENT_PORT_ERR) - port_change = 1; - else + if (event.event_type != IBV_EVENT_PORT_ACTIVE && + event.event_type != IBV_EVENT_PORT_ERR) DEBUG("event type %d on port %d not handled", event.event_type, event.element.port_num); ibv_ack_async_event(&event); } - - if (port_change ^ priv->pending_alarm) { - struct rte_eth_link *link = &dev->data->dev_link; - - priv->pending_alarm = 0; - mlx5_link_update_unlocked(dev, 0); - if (((link->link_speed == 0) && link->link_status) || - ((link->link_speed != 0) && !link->link_status)) { + mlx5_link_update(dev, 0); + if (((link->link_speed == 0) && link->link_status) || + ((link->link_speed != 0) && !link->link_status)) { + if (!priv->pending_alarm) { /* Inconsistent status, check again later. */ priv->pending_alarm = 1; rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, mlx5_dev_link_status_handler, dev); - } else - ret = 1; + } + } else { + ret = 1; } return ret; } @@ -1191,10 +1212,11 @@ mlx5_dev_link_status_handler(void *arg) priv_lock(priv); assert(priv->pending_alarm == 1); + priv->pending_alarm = 0; ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); - //if (ret) - // _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + if (ret) + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -1216,8 +1238,8 @@ mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) priv_lock(priv); ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); - //if (ret) - // _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + if (ret) + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -1515,14 +1537,11 @@ void priv_select_tx_function(struct priv *priv) { priv->dev->tx_pkt_burst = mlx5_tx_burst; - /* Display warning for unsupported configurations. */ - if (priv->sriov && priv->mps) - WARN("multi-packet send WQE cannot be used on a SR-IOV setup"); /* Select appropriate TX function. */ - if ((priv->sriov == 0) && priv->mps && priv->txq_inline) { + if (priv->mps && priv->txq_inline) { priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline; DEBUG("selected MPW inline TX function"); - } else if ((priv->sriov == 0) && priv->mps) { + } else if (priv->mps) { priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw; DEBUG("selected MPW TX function"); } diff --git a/src/dpdk/drivers/net/mlx5/mlx5_fdir.c b/src/dpdk/drivers/net/mlx5/mlx5_fdir.c index f03e95ef..f80c58b4 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_fdir.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_fdir.c @@ -37,14 +37,12 @@ #include #include -#define TREX_PATCH - /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic" #endif -#include +#include #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -57,6 +55,8 @@ #include #include #include +#include +#include #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -69,9 +69,6 @@ struct fdir_flow_desc { uint16_t src_port; uint32_t src_ip[4]; uint32_t dst_ip[4]; - uint8_t tos; - uint8_t ip_id; - uint8_t proto; uint8_t mac[6]; uint16_t vlan_tag; enum hash_rxq_type type; @@ -107,7 +104,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, /* Set VLAN ID. */ desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci; -#ifndef TREX_PATCH /* Set MAC address. */ if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) { rte_memcpy(desc->mac, @@ -117,14 +113,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, desc->type = HASH_RXQ_ETH; return; } -#else - if (fdir_filter->input.flow.ip4_flow.ip_id == 2) { - desc->type = HASH_RXQ_ETH; - desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id; - return; - } -#endif - /* Set mode */ switch (fdir_filter->input.flow_type) { @@ -159,9 +147,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip; desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip; - desc->tos = fdir_filter->input.flow.ip4_flow.ttl; /* TTL is mapped to TOS TREX_PATCH */ - desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id; - desc->proto = fdir_filter->input.flow.ip4_flow.proto; break; case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: @@ -175,9 +160,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, rte_memcpy(desc->dst_ip, fdir_filter->input.flow.ipv6_flow.dst_ip, sizeof(desc->dst_ip)); - desc->tos = (uint8_t)fdir_filter->input.flow.ipv6_flow.hop_limits; /* TTL is mapped to TOS - TREX_PATCH */ - desc->ip_id = (uint8_t)fdir_filter->input.flow.ipv6_flow.flow_label; - desc->proto = fdir_filter->input.flow.ipv6_flow.proto; break; default: break; @@ -218,11 +200,6 @@ priv_fdir_overlap(const struct priv *priv, ((desc1->dst_port & mask->dst_port_mask) != (desc2->dst_port & mask->dst_port_mask))) return 0; - if ( (desc1->tos != desc2->tos) || - (desc1->ip_id != desc2->ip_id) || - (desc1->proto != desc2->proto) ) - return 0; - switch (desc1->type) { case HASH_RXQ_IPV4: case HASH_RXQ_UDPV4: @@ -277,8 +254,8 @@ priv_fdir_flow_add(struct priv *priv, struct ibv_exp_flow_attr *attr = &data->attr; uintptr_t spec_offset = (uintptr_t)&data->spec; struct ibv_exp_flow_spec_eth *spec_eth; - struct ibv_exp_flow_spec_ipv4_ext *spec_ipv4; - struct ibv_exp_flow_spec_ipv6_ext *spec_ipv6; + struct ibv_exp_flow_spec_ipv4 *spec_ipv4; + struct ibv_exp_flow_spec_ipv6 *spec_ipv6; struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp; struct mlx5_fdir_filter *iter_fdir_filter; unsigned int i; @@ -290,10 +267,8 @@ priv_fdir_flow_add(struct priv *priv, (iter_fdir_filter->flow != NULL) && (priv_fdir_overlap(priv, &mlx5_fdir_filter->desc, - &iter_fdir_filter->desc))){ - ERROR("overlap rules, please check your rules"); - return EEXIST; - } + &iter_fdir_filter->desc))) + return EEXIST; /* * No padding must be inserted by the compiler between attr and spec. @@ -316,7 +291,6 @@ priv_fdir_flow_add(struct priv *priv, /* Update priority */ attr->priority = 2; -#ifndef TREX_PATCH if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) { /* MAC Address */ for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) { @@ -326,14 +300,6 @@ priv_fdir_flow_add(struct priv *priv, } goto create_flow; } -#else - // empty mask means "match everything". This rule will match all packets, no matter what is the ether type - if (desc->ip_id == 2) { - spec_eth->val.ether_type = 0x0806; - spec_eth->mask.ether_type = 0x0000; - goto create_flow; - } -#endif switch (desc->type) { case HASH_RXQ_IPV4: @@ -342,10 +308,10 @@ priv_fdir_flow_add(struct priv *priv, spec_offset += spec_eth->size; /* Set IP spec */ - spec_ipv4 = (struct ibv_exp_flow_spec_ipv4_ext *)spec_offset; + spec_ipv4 = (struct ibv_exp_flow_spec_ipv4 *)spec_offset; /* The second specification must be IP. */ - assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4_EXT); + assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4); assert(spec_ipv4->size == sizeof(*spec_ipv4)); spec_ipv4->val.src_ip = @@ -355,21 +321,6 @@ priv_fdir_flow_add(struct priv *priv, spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip; spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip; - /* PROTO */ - spec_ipv4->val.proto = desc->proto & mask->ipv4_mask.proto; - spec_ipv4->mask.proto = mask->ipv4_mask.proto; - -#ifdef TREX_PATCH - /* TOS */ - if (desc->ip_id == 1) { - spec_ipv4->mask.tos = 0x1; - spec_ipv4->val.tos = 0x1; - } else { - spec_ipv4->mask.tos = 0x0; - spec_ipv4->val.tos = 0x0; - } -#endif - /* Update priority */ attr->priority = 1; @@ -384,10 +335,10 @@ priv_fdir_flow_add(struct priv *priv, spec_offset += spec_eth->size; /* Set IP spec */ - spec_ipv6 = (struct ibv_exp_flow_spec_ipv6_ext *)spec_offset; + spec_ipv6 = (struct ibv_exp_flow_spec_ipv6 *)spec_offset; /* The second specification must be IP. */ - assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6_EXT); + assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6); assert(spec_ipv6->size == sizeof(*spec_ipv6)); for (i = 0; i != RTE_DIM(desc->src_ip); ++i) { @@ -403,20 +354,6 @@ priv_fdir_flow_add(struct priv *priv, mask->ipv6_mask.dst_ip, sizeof(spec_ipv6->mask.dst_ip)); - spec_ipv6->val.next_hdr = desc->proto & mask->ipv6_mask.proto; - spec_ipv6->mask.next_hdr = mask->ipv6_mask.proto; - -#ifdef TREX_PATCH - /* TOS */ - if (desc->ip_id == 1) { - spec_ipv6->mask.traffic_class = 0x1; - spec_ipv6->val.traffic_class = 0x1; - } else { - spec_ipv6->mask.traffic_class = 0; - spec_ipv6->val.traffic_class = 0; - } -#endif - /* Update priority */ attr->priority = 1; @@ -894,10 +831,8 @@ priv_fdir_filter_add(struct priv *priv, /* Duplicate filters are currently unsupported. */ mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter); if (mlx5_fdir_filter != NULL) { -#ifndef TREX_PATCH ERROR("filter already exists"); -#endif - return EEXIST; + return EINVAL; } /* Create new flow director filter. */ @@ -1022,11 +957,9 @@ priv_fdir_filter_delete(struct priv *priv, return 0; } -#ifndef TREX_PATCH ERROR("%p: flow director delete failed, cannot find filter", (void *)priv); -#endif - return ENOENT; + return EINVAL; } /** @@ -1111,6 +1044,14 @@ priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg) return ret; } +static const struct rte_flow_ops mlx5_flow_ops = { + .validate = mlx5_flow_validate, + .create = mlx5_flow_create, + .destroy = mlx5_flow_destroy, + .flush = mlx5_flow_flush, + .query = NULL, +}; + /** * Manage filter operations. * @@ -1136,6 +1077,11 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, struct priv *priv = dev->data->dev_private; switch (filter_type) { + case RTE_ETH_FILTER_GENERIC: + if (filter_op != RTE_ETH_FILTER_GET) + return -EINVAL; + *(const void **)arg = &mlx5_flow_ops; + return 0; case RTE_ETH_FILTER_FDIR: priv_lock(priv); ret = priv_fdir_ctrl_func(priv, filter_op, arg); diff --git a/src/dpdk/drivers/net/mlx5/mlx5_flow.c b/src/dpdk/drivers/net/mlx5/mlx5_flow.c new file mode 100644 index 00000000..23c1b5ef --- /dev/null +++ b/src/dpdk/drivers/net/mlx5/mlx5_flow.c @@ -0,0 +1,1247 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +/* Verbs header. */ +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-Wpedantic" +#endif +#include +#ifdef PEDANTIC +#pragma GCC diagnostic error "-Wpedantic" +#endif + +#include +#include +#include +#include + +#include "mlx5.h" +#include "mlx5_prm.h" + +static int +mlx5_flow_create_eth(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_vlan(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_ipv4(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_ipv6(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_udp(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_tcp(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_vxlan(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +struct rte_flow { + LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */ + struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */ + struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */ + struct ibv_qp *qp; /**< Verbs queue pair. */ + struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */ + struct ibv_exp_wq *wq; /**< Verbs work queue. */ + struct ibv_cq *cq; /**< Verbs completion queue. */ + struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */ + uint32_t mark:1; /**< Set if the flow is marked. */ +}; + +/** Static initializer for items. */ +#define ITEMS(...) \ + (const enum rte_flow_item_type []){ \ + __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \ + } + +/** Structure to generate a simple graph of layers supported by the NIC. */ +struct mlx5_flow_items { + /** List of possible actions for these items. */ + const enum rte_flow_action_type *const actions; + /** Bit-masks corresponding to the possibilities for the item. */ + const void *mask; + /** + * Default bit-masks to use when item->mask is not provided. When + * \default_mask is also NULL, the full supported bit-mask (\mask) is + * used instead. + */ + const void *default_mask; + /** Bit-masks size in bytes. */ + const unsigned int mask_sz; + /** + * Conversion function from rte_flow to NIC specific flow. + * + * @param item + * rte_flow item to convert. + * @param default_mask + * Default bit-masks to use when item->mask is not provided. + * @param data + * Internal structure to store the conversion. + * + * @return + * 0 on success, negative value otherwise. + */ + int (*convert)(const struct rte_flow_item *item, + const void *default_mask, + void *data); + /** Size in bytes of the destination structure. */ + const unsigned int dst_sz; + /** List of possible following items. */ + const enum rte_flow_item_type *const items; +}; + +/** Valid action for this PMD. */ +static const enum rte_flow_action_type valid_actions[] = { + RTE_FLOW_ACTION_TYPE_DROP, + RTE_FLOW_ACTION_TYPE_QUEUE, + RTE_FLOW_ACTION_TYPE_MARK, + RTE_FLOW_ACTION_TYPE_END, +}; + +/** Graph of supported items and associated actions. */ +static const struct mlx5_flow_items mlx5_flow_items[] = { + [RTE_FLOW_ITEM_TYPE_END] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VXLAN), + }, + [RTE_FLOW_ITEM_TYPE_ETH] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_eth){ + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", + }, + .default_mask = &rte_flow_item_eth_mask, + .mask_sz = sizeof(struct rte_flow_item_eth), + .convert = mlx5_flow_create_eth, + .dst_sz = sizeof(struct ibv_exp_flow_spec_eth), + }, + [RTE_FLOW_ITEM_TYPE_VLAN] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_vlan){ + .tci = -1, + }, + .default_mask = &rte_flow_item_vlan_mask, + .mask_sz = sizeof(struct rte_flow_item_vlan), + .convert = mlx5_flow_create_vlan, + .dst_sz = 0, + }, + [RTE_FLOW_ITEM_TYPE_IPV4] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_TCP), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_ipv4){ + .hdr = { + .src_addr = -1, + .dst_addr = -1, + .type_of_service = -1, + .next_proto_id = -1, + }, + }, + .default_mask = &rte_flow_item_ipv4_mask, + .mask_sz = sizeof(struct rte_flow_item_ipv4), + .convert = mlx5_flow_create_ipv4, + .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext), + }, + [RTE_FLOW_ITEM_TYPE_IPV6] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_TCP), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_ipv6){ + .hdr = { + .src_addr = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + }, + .dst_addr = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + }, + }, + }, + .default_mask = &rte_flow_item_ipv6_mask, + .mask_sz = sizeof(struct rte_flow_item_ipv6), + .convert = mlx5_flow_create_ipv6, + .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6), + }, + [RTE_FLOW_ITEM_TYPE_UDP] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_udp){ + .hdr = { + .src_port = -1, + .dst_port = -1, + }, + }, + .default_mask = &rte_flow_item_udp_mask, + .mask_sz = sizeof(struct rte_flow_item_udp), + .convert = mlx5_flow_create_udp, + .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp), + }, + [RTE_FLOW_ITEM_TYPE_TCP] = { + .actions = valid_actions, + .mask = &(const struct rte_flow_item_tcp){ + .hdr = { + .src_port = -1, + .dst_port = -1, + }, + }, + .default_mask = &rte_flow_item_tcp_mask, + .mask_sz = sizeof(struct rte_flow_item_tcp), + .convert = mlx5_flow_create_tcp, + .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp), + }, + [RTE_FLOW_ITEM_TYPE_VXLAN] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_vxlan){ + .vni = "\xff\xff\xff", + }, + .default_mask = &rte_flow_item_vxlan_mask, + .mask_sz = sizeof(struct rte_flow_item_vxlan), + .convert = mlx5_flow_create_vxlan, + .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel), + }, +}; + +/** Structure to pass to the conversion function. */ +struct mlx5_flow { + struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */ + unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */ + uint32_t inner; /**< Set once VXLAN is encountered. */ +}; + +struct mlx5_flow_action { + uint32_t queue:1; /**< Target is a receive queue. */ + uint32_t drop:1; /**< Target is a drop queue. */ + uint32_t mark:1; /**< Mark is present in the flow. */ + uint32_t queue_id; /**< Identifier of the queue. */ + uint32_t mark_id; /**< Mark identifier. */ +}; + +/** + * Check support for a given item. + * + * @param item[in] + * Item specification. + * @param mask[in] + * Bit-masks covering supported fields to compare with spec, last and mask in + * \item. + * @param size + * Bit-Mask size in bytes. + * + * @return + * 0 on success. + */ +static int +mlx5_flow_item_validate(const struct rte_flow_item *item, + const uint8_t *mask, unsigned int size) +{ + int ret = 0; + + if (!item->spec && (item->mask || item->last)) + return -1; + if (item->spec && !item->mask) { + unsigned int i; + const uint8_t *spec = item->spec; + + for (i = 0; i < size; ++i) + if ((spec[i] | mask[i]) != mask[i]) + return -1; + } + if (item->last && !item->mask) { + unsigned int i; + const uint8_t *spec = item->last; + + for (i = 0; i < size; ++i) + if ((spec[i] | mask[i]) != mask[i]) + return -1; + } + if (item->mask) { + unsigned int i; + const uint8_t *spec = item->mask; + + for (i = 0; i < size; ++i) + if ((spec[i] | mask[i]) != mask[i]) + return -1; + } + if (item->spec && item->last) { + uint8_t spec[size]; + uint8_t last[size]; + const uint8_t *apply = mask; + unsigned int i; + + if (item->mask) + apply = item->mask; + for (i = 0; i < size; ++i) { + spec[i] = ((const uint8_t *)item->spec)[i] & apply[i]; + last[i] = ((const uint8_t *)item->last)[i] & apply[i]; + } + ret = memcmp(spec, last, size); + } + return ret; +} + +/** + * Validate a flow supported by the NIC. + * + * @param priv + * Pointer to private structure. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. + * @param[in, out] flow + * Flow structure to update. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +priv_flow_validate(struct priv *priv, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + struct mlx5_flow *flow) +{ + const struct mlx5_flow_items *cur_item = mlx5_flow_items; + struct mlx5_flow_action action = { + .queue = 0, + .drop = 0, + .mark = 0, + }; + + (void)priv; + if (attr->group) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + NULL, + "groups are not supported"); + return -rte_errno; + } + if (attr->priority) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + NULL, + "priorities are not supported"); + return -rte_errno; + } + if (attr->egress) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + NULL, + "egress is not supported"); + return -rte_errno; + } + if (!attr->ingress) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + NULL, + "only ingress is supported"); + return -rte_errno; + } + for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { + const struct mlx5_flow_items *token = NULL; + unsigned int i; + int err; + + if (items->type == RTE_FLOW_ITEM_TYPE_VOID) + continue; + for (i = 0; + cur_item->items && + cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END; + ++i) { + if (cur_item->items[i] == items->type) { + token = &mlx5_flow_items[items->type]; + break; + } + } + if (!token) + goto exit_item_not_supported; + cur_item = token; + err = mlx5_flow_item_validate(items, + (const uint8_t *)cur_item->mask, + cur_item->mask_sz); + if (err) + goto exit_item_not_supported; + if (flow->ibv_attr && cur_item->convert) { + err = cur_item->convert(items, + (cur_item->default_mask ? + cur_item->default_mask : + cur_item->mask), + flow); + if (err) + goto exit_item_not_supported; + } + flow->offset += cur_item->dst_sz; + } + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { + if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { + continue; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { + action.drop = 1; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + const struct rte_flow_action_queue *queue = + (const struct rte_flow_action_queue *) + actions->conf; + + if (!queue || (queue->index > (priv->rxqs_n - 1))) + goto exit_action_not_supported; + action.queue = 1; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) { + const struct rte_flow_action_mark *mark = + (const struct rte_flow_action_mark *) + actions->conf; + + if (!mark) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "mark must be defined"); + return -rte_errno; + } else if (mark->id >= MLX5_FLOW_MARK_MAX) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "mark must be between 0" + " and 16777199"); + return -rte_errno; + } + action.mark = 1; + } else { + goto exit_action_not_supported; + } + } + if (action.mark && !flow->ibv_attr && !action.drop) + flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag); + if (!action.queue && !action.drop) { + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "no valid action"); + return -rte_errno; + } + return 0; +exit_item_not_supported: + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + items, "item not supported"); + return -rte_errno; +exit_action_not_supported: + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + actions, "action not supported"); + return -rte_errno; +} + +/** + * Validate a flow supported by the NIC. + * + * @see rte_flow_validate() + * @see rte_flow_ops + */ +int +mlx5_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct priv *priv = dev->data->dev_private; + int ret; + struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) }; + + priv_lock(priv); + ret = priv_flow_validate(priv, attr, items, actions, error, &flow); + priv_unlock(priv); + return ret; +} + +/** + * Convert Ethernet item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_eth(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_eth *spec = item->spec; + const struct rte_flow_item_eth *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_eth *eth; + const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth); + unsigned int i; + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 2; + eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *eth = (struct ibv_exp_flow_spec_eth) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH, + .size = eth_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN); + memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN); + memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN); + memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN); + /* Remove unwanted bits from values. */ + for (i = 0; i < ETHER_ADDR_LEN; ++i) { + eth->val.dst_mac[i] &= eth->mask.dst_mac[i]; + eth->val.src_mac[i] &= eth->mask.src_mac[i]; + } + return 0; +} + +/** + * Convert VLAN item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_vlan(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_vlan *spec = item->spec; + const struct rte_flow_item_vlan *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_eth *eth; + const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth); + + eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size); + if (!spec) + return 0; + if (!mask) + mask = default_mask; + eth->val.vlan_tag = spec->tci; + eth->mask.vlan_tag = mask->tci; + eth->val.vlan_tag &= eth->mask.vlan_tag; + return 0; +} + +/** + * Convert IPv4 item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_ipv4(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_ipv4 *spec = item->spec; + const struct rte_flow_item_ipv4 *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_ipv4_ext *ipv4; + unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext); + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 1; + ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT, + .size = ipv4_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){ + .src_ip = spec->hdr.src_addr, + .dst_ip = spec->hdr.dst_addr, + .proto = spec->hdr.next_proto_id, + .tos = spec->hdr.type_of_service, + }; + ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){ + .src_ip = mask->hdr.src_addr, + .dst_ip = mask->hdr.dst_addr, + .proto = mask->hdr.next_proto_id, + .tos = mask->hdr.type_of_service, + }; + /* Remove unwanted bits from values. */ + ipv4->val.src_ip &= ipv4->mask.src_ip; + ipv4->val.dst_ip &= ipv4->mask.dst_ip; + ipv4->val.proto &= ipv4->mask.proto; + ipv4->val.tos &= ipv4->mask.tos; + return 0; +} + +/** + * Convert IPv6 item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_ipv6(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_ipv6 *spec = item->spec; + const struct rte_flow_item_ipv6 *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_ipv6 *ipv6; + unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6); + unsigned int i; + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 1; + ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *ipv6 = (struct ibv_exp_flow_spec_ipv6) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6, + .size = ipv6_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + memcpy(ipv6->val.src_ip, spec->hdr.src_addr, + RTE_DIM(ipv6->val.src_ip)); + memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr, + RTE_DIM(ipv6->val.dst_ip)); + memcpy(ipv6->mask.src_ip, mask->hdr.src_addr, + RTE_DIM(ipv6->mask.src_ip)); + memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr, + RTE_DIM(ipv6->mask.dst_ip)); + /* Remove unwanted bits from values. */ + for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) { + ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i]; + ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i]; + } + return 0; +} + +/** + * Convert UDP item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_udp(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_udp *spec = item->spec; + const struct rte_flow_item_udp *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_tcp_udp *udp; + unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp); + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 0; + udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *udp = (struct ibv_exp_flow_spec_tcp_udp) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP, + .size = udp_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + udp->val.dst_port = spec->hdr.dst_port; + udp->val.src_port = spec->hdr.src_port; + udp->mask.dst_port = mask->hdr.dst_port; + udp->mask.src_port = mask->hdr.src_port; + /* Remove unwanted bits from values. */ + udp->val.src_port &= udp->mask.src_port; + udp->val.dst_port &= udp->mask.dst_port; + return 0; +} + +/** + * Convert TCP item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_tcp(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_tcp *spec = item->spec; + const struct rte_flow_item_tcp *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_tcp_udp *tcp; + unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp); + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 0; + tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *tcp = (struct ibv_exp_flow_spec_tcp_udp) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP, + .size = tcp_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + tcp->val.dst_port = spec->hdr.dst_port; + tcp->val.src_port = spec->hdr.src_port; + tcp->mask.dst_port = mask->hdr.dst_port; + tcp->mask.src_port = mask->hdr.src_port; + /* Remove unwanted bits from values. */ + tcp->val.src_port &= tcp->mask.src_port; + tcp->val.dst_port &= tcp->mask.dst_port; + return 0; +} + +/** + * Convert VXLAN item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_vxlan(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_vxlan *spec = item->spec; + const struct rte_flow_item_vxlan *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_tunnel *vxlan; + unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel); + union vni { + uint32_t vlan_id; + uint8_t vni[4]; + } id; + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 0; + id.vni[0] = 0; + vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *vxlan = (struct ibv_exp_flow_spec_tunnel) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL, + .size = size, + }; + flow->inner = IBV_EXP_FLOW_SPEC_INNER; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + memcpy(&id.vni[1], spec->vni, 3); + vxlan->val.tunnel_id = id.vlan_id; + memcpy(&id.vni[1], mask->vni, 3); + vxlan->mask.tunnel_id = id.vlan_id; + /* Remove unwanted bits from values. */ + vxlan->val.tunnel_id &= vxlan->mask.tunnel_id; + return 0; +} + +/** + * Convert mark/flag action to Verbs specification. + * + * @param flow + * Pointer to MLX5 flow structure. + * @param mark_id + * Mark identifier. + */ +static int +mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id) +{ + struct ibv_exp_flow_spec_action_tag *tag; + unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag); + + tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *tag = (struct ibv_exp_flow_spec_action_tag){ + .type = IBV_EXP_FLOW_SPEC_ACTION_TAG, + .size = size, + .tag_id = mlx5_flow_mark_set(mark_id), + }; + ++flow->ibv_attr->num_of_specs; + return 0; +} + +/** + * Complete flow rule creation. + * + * @param priv + * Pointer to private structure. + * @param ibv_attr + * Verbs flow attributes. + * @param action + * Target action structure. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * A flow if the rule could be created. + */ +static struct rte_flow * +priv_flow_create_action_queue(struct priv *priv, + struct ibv_exp_flow_attr *ibv_attr, + struct mlx5_flow_action *action, + struct rte_flow_error *error) +{ + struct rxq_ctrl *rxq; + struct rte_flow *rte_flow; + + assert(priv->pd); + assert(priv->ctx); + rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0); + if (!rte_flow) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate flow memory"); + return NULL; + } + if (action->drop) { + rte_flow->cq = + ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0, + &(struct ibv_exp_cq_init_attr){ + .comp_mask = 0, + }); + if (!rte_flow->cq) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate CQ"); + goto error; + } + rte_flow->wq = ibv_exp_create_wq(priv->ctx, + &(struct ibv_exp_wq_init_attr){ + .wq_type = IBV_EXP_WQT_RQ, + .max_recv_wr = 1, + .max_recv_sge = 1, + .pd = priv->pd, + .cq = rte_flow->cq, + }); + } else { + rxq = container_of((*priv->rxqs)[action->queue_id], + struct rxq_ctrl, rxq); + rte_flow->rxq = &rxq->rxq; + rxq->rxq.mark |= action->mark; + rte_flow->wq = rxq->wq; + } + rte_flow->mark = action->mark; + rte_flow->ibv_attr = ibv_attr; + rte_flow->ind_table = ibv_exp_create_rwq_ind_table( + priv->ctx, + &(struct ibv_exp_rwq_ind_table_init_attr){ + .pd = priv->pd, + .log_ind_tbl_size = 0, + .ind_tbl = &rte_flow->wq, + .comp_mask = 0, + }); + if (!rte_flow->ind_table) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate indirection table"); + goto error; + } + rte_flow->qp = ibv_exp_create_qp( + priv->ctx, + &(struct ibv_exp_qp_init_attr){ + .qp_type = IBV_QPT_RAW_PACKET, + .comp_mask = + IBV_EXP_QP_INIT_ATTR_PD | + IBV_EXP_QP_INIT_ATTR_PORT | + IBV_EXP_QP_INIT_ATTR_RX_HASH, + .pd = priv->pd, + .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){ + .rx_hash_function = + IBV_EXP_RX_HASH_FUNC_TOEPLITZ, + .rx_hash_key_len = rss_hash_default_key_len, + .rx_hash_key = rss_hash_default_key, + .rx_hash_fields_mask = 0, + .rwq_ind_tbl = rte_flow->ind_table, + }, + .port_num = priv->port, + }); + if (!rte_flow->qp) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate QP"); + goto error; + } + if (!priv->started) + return rte_flow; + rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp, + rte_flow->ibv_attr); + if (!rte_flow->ibv_flow) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "flow rule creation failure"); + goto error; + } + return rte_flow; +error: + assert(rte_flow); + if (rte_flow->qp) + ibv_destroy_qp(rte_flow->qp); + if (rte_flow->ind_table) + ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table); + if (!rte_flow->rxq && rte_flow->wq) + ibv_exp_destroy_wq(rte_flow->wq); + if (!rte_flow->rxq && rte_flow->cq) + ibv_destroy_cq(rte_flow->cq); + rte_free(rte_flow->ibv_attr); + rte_free(rte_flow); + return NULL; +} + +/** + * Convert a flow. + * + * @param priv + * Pointer to private structure. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * A flow on success, NULL otherwise. + */ +static struct rte_flow * +priv_flow_create(struct priv *priv, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_flow *rte_flow; + struct mlx5_flow_action action; + struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), }; + int err; + + err = priv_flow_validate(priv, attr, items, actions, error, &flow); + if (err) + goto exit; + flow.ibv_attr = rte_malloc(__func__, flow.offset, 0); + flow.offset = sizeof(struct ibv_exp_flow_attr); + if (!flow.ibv_attr) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate ibv_attr memory"); + goto exit; + } + *flow.ibv_attr = (struct ibv_exp_flow_attr){ + .type = IBV_EXP_FLOW_ATTR_NORMAL, + .size = sizeof(struct ibv_exp_flow_attr), + .priority = attr->priority, + .num_of_specs = 0, + .port = 0, + .flags = 0, + .reserved = 0, + }; + flow.inner = 0; + claim_zero(priv_flow_validate(priv, attr, items, actions, + error, &flow)); + action = (struct mlx5_flow_action){ + .queue = 0, + .drop = 0, + .mark = 0, + .mark_id = MLX5_FLOW_MARK_DEFAULT, + }; + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { + if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { + continue; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + action.queue = 1; + action.queue_id = + ((const struct rte_flow_action_queue *) + actions->conf)->index; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { + action.drop = 1; + action.mark = 0; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) { + const struct rte_flow_action_mark *mark = + (const struct rte_flow_action_mark *) + actions->conf; + + if (mark) + action.mark_id = mark->id; + action.mark = !action.drop; + } else { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, "unsupported action"); + goto exit; + } + } + if (action.mark) { + mlx5_flow_create_flag_mark(&flow, action.mark_id); + flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag); + } + rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr, + &action, error); + return rte_flow; +exit: + rte_free(flow.ibv_attr); + return NULL; +} + +/** + * Create a flow. + * + * @see rte_flow_create() + * @see rte_flow_ops + */ +struct rte_flow * +mlx5_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct priv *priv = dev->data->dev_private; + struct rte_flow *flow; + + priv_lock(priv); + flow = priv_flow_create(priv, attr, items, actions, error); + if (flow) { + LIST_INSERT_HEAD(&priv->flows, flow, next); + DEBUG("Flow created %p", (void *)flow); + } + priv_unlock(priv); + return flow; +} + +/** + * Destroy a flow. + * + * @param priv + * Pointer to private structure. + * @param[in] flow + * Flow to destroy. + */ +static void +priv_flow_destroy(struct priv *priv, + struct rte_flow *flow) +{ + (void)priv; + LIST_REMOVE(flow, next); + if (flow->ibv_flow) + claim_zero(ibv_exp_destroy_flow(flow->ibv_flow)); + if (flow->qp) + claim_zero(ibv_destroy_qp(flow->qp)); + if (flow->ind_table) + claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table)); + if (!flow->rxq && flow->wq) + claim_zero(ibv_exp_destroy_wq(flow->wq)); + if (!flow->rxq && flow->cq) + claim_zero(ibv_destroy_cq(flow->cq)); + if (flow->mark) { + struct rte_flow *tmp; + uint32_t mark_n = 0; + + for (tmp = LIST_FIRST(&priv->flows); + tmp; + tmp = LIST_NEXT(tmp, next)) { + if ((flow->rxq == tmp->rxq) && tmp->mark) + ++mark_n; + } + flow->rxq->mark = !!mark_n; + } + rte_free(flow->ibv_attr); + DEBUG("Flow destroyed %p", (void *)flow); + rte_free(flow); +} + +/** + * Destroy a flow. + * + * @see rte_flow_destroy() + * @see rte_flow_ops + */ +int +mlx5_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct priv *priv = dev->data->dev_private; + + (void)error; + priv_lock(priv); + priv_flow_destroy(priv, flow); + priv_unlock(priv); + return 0; +} + +/** + * Destroy all flows. + * + * @param priv + * Pointer to private structure. + */ +static void +priv_flow_flush(struct priv *priv) +{ + while (!LIST_EMPTY(&priv->flows)) { + struct rte_flow *flow; + + flow = LIST_FIRST(&priv->flows); + priv_flow_destroy(priv, flow); + } +} + +/** + * Destroy all flows. + * + * @see rte_flow_flush() + * @see rte_flow_ops + */ +int +mlx5_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error) +{ + struct priv *priv = dev->data->dev_private; + + (void)error; + priv_lock(priv); + priv_flow_flush(priv); + priv_unlock(priv); + return 0; +} + +/** + * Remove all flows. + * + * Called by dev_stop() to remove all flows. + * + * @param priv + * Pointer to private structure. + */ +void +priv_flow_stop(struct priv *priv) +{ + struct rte_flow *flow; + + for (flow = LIST_FIRST(&priv->flows); + flow; + flow = LIST_NEXT(flow, next)) { + claim_zero(ibv_exp_destroy_flow(flow->ibv_flow)); + flow->ibv_flow = NULL; + if (flow->mark) + flow->rxq->mark = 0; + DEBUG("Flow %p removed", (void *)flow); + } +} + +/** + * Add all flows. + * + * @param priv + * Pointer to private structure. + * + * @return + * 0 on success, a errno value otherwise and rte_errno is set. + */ +int +priv_flow_start(struct priv *priv) +{ + struct rte_flow *flow; + + for (flow = LIST_FIRST(&priv->flows); + flow; + flow = LIST_NEXT(flow, next)) { + flow->ibv_flow = ibv_exp_create_flow(flow->qp, + flow->ibv_attr); + if (!flow->ibv_flow) { + DEBUG("Flow %p cannot be applied", (void *)flow); + rte_errno = EINVAL; + return rte_errno; + } + DEBUG("Flow %p applied", (void *)flow); + if (flow->rxq) + flow->rxq->mark |= flow->mark; + } + return 0; +} diff --git a/src/dpdk/drivers/net/mlx5/mlx5_prm.h b/src/dpdk/drivers/net/mlx5/mlx5_prm.h index 8426adb3..755b5d77 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_prm.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_prm.h @@ -34,6 +34,8 @@ #ifndef RTE_PMD_MLX5_PRM_H_ #define RTE_PMD_MLX5_PRM_H_ +#include + /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC @@ -44,6 +46,7 @@ #pragma GCC diagnostic error "-Wpedantic" #endif +#include #include "mlx5_autoconf.h" /* Get CQE owner bit. */ @@ -61,9 +64,6 @@ /* Invalidate a CQE. */ #define MLX5_CQE_INVALIDATE (MLX5_CQE_INVALID << 4) -/* CQE value to inform that VLAN is stripped. */ -#define MLX5_CQE_VLAN_STRIPPED 0x1 - /* Maximum number of packets a multi-packet WQE can handle. */ #define MLX5_MPW_DSEG_MAX 5 @@ -80,30 +80,54 @@ /* Room for inline data in multi-packet WQE. */ #define MLX5_MWQE64_INL_DATA 28 -//#ifndef HAVE_VERBS_MLX5_OPCODE_TSO -//#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */ -//#endif +#ifndef HAVE_VERBS_MLX5_OPCODE_TSO +#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */ +#endif + +/* CQE value to inform that VLAN is stripped. */ +#define MLX5_CQE_VLAN_STRIPPED (1u << 0) -/* IPv4 packet. */ -#define MLX5_CQE_RX_IPV4_PACKET (1u << 2) +/* IPv4 options. */ +#define MLX5_CQE_RX_IP_EXT_OPTS_PACKET (1u << 1) /* IPv6 packet. */ -#define MLX5_CQE_RX_IPV6_PACKET (1u << 3) +#define MLX5_CQE_RX_IPV6_PACKET (1u << 2) + +/* IPv4 packet. */ +#define MLX5_CQE_RX_IPV4_PACKET (1u << 3) + +/* TCP packet. */ +#define MLX5_CQE_RX_TCP_PACKET (1u << 4) + +/* UDP packet. */ +#define MLX5_CQE_RX_UDP_PACKET (1u << 5) + +/* IP is fragmented. */ +#define MLX5_CQE_RX_IP_FRAG_PACKET (1u << 7) -/* Outer IPv4 packet. */ -#define MLX5_CQE_RX_OUTER_IPV4_PACKET (1u << 7) +/* L2 header is valid. */ +#define MLX5_CQE_RX_L2_HDR_VALID (1u << 8) -/* Outer IPv6 packet. */ -#define MLX5_CQE_RX_OUTER_IPV6_PACKET (1u << 8) +/* L3 header is valid. */ +#define MLX5_CQE_RX_L3_HDR_VALID (1u << 9) + +/* L4 header is valid. */ +#define MLX5_CQE_RX_L4_HDR_VALID (1u << 10) + +/* Outer packet, 0 IPv4, 1 IPv6. */ +#define MLX5_CQE_RX_OUTER_PACKET (1u << 1) /* Tunnel packet bit in the CQE. */ -#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 4) +#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 0) -/* Outer IP checksum OK. */ -#define MLX5_CQE_RX_OUTER_IP_CSUM_OK (1u << 5) +/* INVALID is used by packets matching no flow rules. */ +#define MLX5_FLOW_MARK_INVALID 0 -/* Outer UDP header and checksum OK. */ -#define MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK (1u << 6) +/* Maximum allowed value to mark a packet. */ +#define MLX5_FLOW_MARK_MAX 0xfffff0 + +/* Default mark value used when none is provided. */ +#define MLX5_FLOW_MARK_DEFAULT 0xffffff /* Subset of struct mlx5_wqe_eth_seg. */ struct mlx5_wqe_eth_seg_small { @@ -114,12 +138,19 @@ struct mlx5_wqe_eth_seg_small { uint32_t rsvd2; uint16_t inline_hdr_sz; uint8_t inline_hdr[2]; -}; +} __rte_aligned(MLX5_WQE_DWORD_SIZE); struct mlx5_wqe_inl_small { uint32_t byte_cnt; uint8_t raw; -}; +} __rte_aligned(MLX5_WQE_DWORD_SIZE); + +struct mlx5_wqe_ctrl { + uint32_t ctrl0; + uint32_t ctrl1; + uint32_t ctrl2; + uint32_t ctrl3; +} __rte_aligned(MLX5_WQE_DWORD_SIZE); /* Small common part of the WQE. */ struct mlx5_wqe { @@ -127,11 +158,17 @@ struct mlx5_wqe { struct mlx5_wqe_eth_seg_small eseg; }; +/* Vectorize WQE header. */ +struct mlx5_wqe_v { + rte_v128u32_t ctrl; + rte_v128u32_t eseg; +}; + /* WQE. */ struct mlx5_wqe64 { struct mlx5_wqe hdr; uint8_t raw[32]; -} __rte_aligned(64); +} __rte_aligned(MLX5_WQE_SIZE); /* MPW session status. */ enum mlx5_mpw_state { @@ -163,16 +200,72 @@ struct mlx5_cqe { uint32_t rx_hash_res; uint8_t rx_hash_type; uint8_t rsvd1[11]; - uint8_t hds_ip_ext; - uint8_t l4_hdr_type_etc; + uint16_t hdr_type_etc; uint16_t vlan_info; uint8_t rsvd2[12]; uint32_t byte_cnt; uint64_t timestamp; - uint8_t rsvd3[4]; + uint32_t sop_drop_qpn; uint16_t wqe_counter; uint8_t rsvd4; uint8_t op_own; }; +/** + * Convert a user mark to flow mark. + * + * @param val + * Mark value to convert. + * + * @return + * Converted mark value. + */ +static inline uint32_t +mlx5_flow_mark_set(uint32_t val) +{ + uint32_t ret; + + /* + * Add one to the user value to differentiate un-marked flows from + * marked flows. + */ + ++val; +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + /* + * Mark is 24 bits (minus reserved values) but is stored on a 32 bit + * word, byte-swapped by the kernel on little-endian systems. In this + * case, left-shifting the resulting big-endian value ensures the + * least significant 24 bits are retained when converting it back. + */ + ret = rte_cpu_to_be_32(val) >> 8; +#else + ret = val; +#endif + assert(ret <= MLX5_FLOW_MARK_MAX); + return ret; +} + +/** + * Convert a mark to user mark. + * + * @param val + * Mark value to convert. + * + * @return + * Converted mark value. + */ +static inline uint32_t +mlx5_flow_mark_get(uint32_t val) +{ + /* + * Subtract one from the retrieved value. It was added by + * mlx5_flow_mark_set() to distinguish unmarked flows. + */ +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + return (val >> 8) - 1; +#else + return val - 1; +#endif +} + #endif /* RTE_PMD_MLX5_PRM_H_ */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxq.c b/src/dpdk/drivers/net/mlx5/mlx5_rxq.c index c5746fa0..28e93d3e 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxq.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxq.c @@ -102,7 +102,7 @@ const struct hash_rxq_init hash_rxq_init[] = { ETH_RSS_FRAG_IPV4), .flow_priority = 1, .flow_spec.ipv4 = { - .type = IBV_EXP_FLOW_SPEC_IPV4_EXT, + .type = IBV_EXP_FLOW_SPEC_IPV4, .size = sizeof(hash_rxq_init[0].flow_spec.ipv4), }, .underlayer = &hash_rxq_init[HASH_RXQ_ETH], @@ -140,7 +140,7 @@ const struct hash_rxq_init hash_rxq_init[] = { ETH_RSS_FRAG_IPV6), .flow_priority = 1, .flow_spec.ipv6 = { - .type = IBV_EXP_FLOW_SPEC_IPV6_EXT, + .type = IBV_EXP_FLOW_SPEC_IPV6, .size = sizeof(hash_rxq_init[0].flow_spec.ipv6), }, .underlayer = &hash_rxq_init[HASH_RXQ_ETH], diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c index b56c0a11..88b03544 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c @@ -69,7 +69,34 @@ #include "mlx5_defs.h" #include "mlx5_prm.h" -//#define MLX5_OPCODE_TSO 0xe +static inline int +check_cqe(volatile struct mlx5_cqe *cqe, + unsigned int cqes_n, const uint16_t ci) + __attribute__((always_inline)); + +static inline void +txq_complete(struct txq *txq) __attribute__((always_inline)); + +static inline uint32_t +txq_mp2mr(struct txq *txq, struct rte_mempool *mp) + __attribute__((always_inline)); + +static inline void +mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe) + __attribute__((always_inline)); + +static inline uint32_t +rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) + __attribute__((always_inline)); + +static inline int +mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, + uint16_t cqe_cnt, uint32_t *rss_hash) + __attribute__((always_inline)); + +static inline uint32_t +rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe) + __attribute__((always_inline)); #ifndef NDEBUG @@ -86,7 +113,7 @@ static inline int check_cqe_seen(volatile struct mlx5_cqe *cqe) { static const uint8_t magic[] = "seen"; - volatile uint8_t (*buf)[sizeof(cqe->rsvd3)] = &cqe->rsvd3; + volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0; int ret = 1; unsigned int i; @@ -100,11 +127,6 @@ check_cqe_seen(volatile struct mlx5_cqe *cqe) #endif /* NDEBUG */ -static inline int -check_cqe(volatile struct mlx5_cqe *cqe, - unsigned int cqes_n, const uint16_t ci) - __attribute__((always_inline)); - /** * Check whether CQE is valid. * @@ -154,8 +176,23 @@ check_cqe(volatile struct mlx5_cqe *cqe, return 0; } -static inline void -txq_complete(struct txq *txq) __attribute__((always_inline)); +/** + * Return the address of the WQE. + * + * @param txq + * Pointer to TX queue structure. + * @param wqe_ci + * WQE consumer index. + * + * @return + * WQE address. + */ +static inline uintptr_t * +tx_mlx5_wqe(struct txq *txq, uint16_t ci) +{ + ci &= ((1 << txq->wqe_n) - 1); + return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE); +} /** * Manage TX completions. @@ -175,7 +212,7 @@ txq_complete(struct txq *txq) uint16_t elts_tail; uint16_t cq_ci = txq->cq_ci; volatile struct mlx5_cqe *cqe = NULL; - volatile struct mlx5_wqe *wqe; + volatile struct mlx5_wqe_ctrl *ctrl; do { volatile struct mlx5_cqe *tmp; @@ -201,9 +238,10 @@ txq_complete(struct txq *txq) } while (1); if (unlikely(cqe == NULL)) return; - wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & - ((1 << txq->wqe_n) - 1)].hdr; - elts_tail = wqe->ctrl[3]; + txq->wqe_pi = ntohs(cqe->wqe_counter); + ctrl = (volatile struct mlx5_wqe_ctrl *) + tx_mlx5_wqe(txq, txq->wqe_pi); + elts_tail = ctrl->ctrl3; assert(elts_tail < (1 << txq->wqe_n)); /* Free buffers. */ while (elts_free != elts_tail) { @@ -248,10 +286,6 @@ txq_mb2mp(struct rte_mbuf *buf) return buf->pool; } -static inline uint32_t -txq_mp2mr(struct txq *txq, struct rte_mempool *mp) - __attribute__((always_inline)); - /** * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[]. * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full, @@ -294,57 +328,20 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp) * * @param txq * Pointer to TX queue structure. + * @param wqe + * Pointer to the last WQE posted in the NIC. */ static inline void -mlx5_tx_dbrec(struct txq *txq) +mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe) { - uint8_t *dst = (uint8_t *)((uintptr_t)txq->bf_reg + txq->bf_offset); - uint32_t data[4] = { - htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND), - htonl(txq->qp_num_8s), - 0, - 0, - }; + uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg); + volatile uint64_t *src = ((volatile uint64_t *)wqe); + rte_wmb(); *txq->qp_db = htonl(txq->wqe_ci); /* Ensure ordering between DB record and BF copy. */ rte_wmb(); - memcpy(dst, (uint8_t *)data, 16); - txq->bf_offset ^= (1 << txq->bf_buf_size); -} - -/** - * Prefetch a CQE. - * - * @param txq - * Pointer to TX queue structure. - * @param cqe_ci - * CQE consumer index. - */ -static inline void -tx_prefetch_cqe(struct txq *txq, uint16_t ci) -{ - volatile struct mlx5_cqe *cqe; - - cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)]; - rte_prefetch0(cqe); -} - -/** - * Prefetch a WQE. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe_ci - * WQE consumer index. - */ -static inline void -tx_prefetch_wqe(struct txq *txq, uint16_t ci) -{ - volatile struct mlx5_wqe64 *wqe; - - wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)]; - rte_prefetch0(wqe); + *dst = *src; } /** @@ -369,8 +366,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int i = 0; unsigned int j = 0; unsigned int max; + uint16_t max_wqe; unsigned int comp; - volatile struct mlx5_wqe *wqe = NULL; + volatile struct mlx5_wqe_v *wqe = NULL; unsigned int segs_n = 0; struct rte_mbuf *buf = NULL; uint8_t *raw; @@ -378,19 +376,24 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (unlikely(!pkts_n)) return 0; /* Prefetch first packet cacheline. */ - tx_prefetch_cqe(txq, txq->cq_ci); - tx_prefetch_cqe(txq, txq->cq_ci + 1); rte_prefetch0(*pkts); /* Start processing. */ txq_complete(txq); max = (elts_n - (elts_head - txq->elts_tail)); if (max > elts_n) max -= elts_n; + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); + if (unlikely(!max_wqe)) + return 0; do { - volatile struct mlx5_wqe_data_seg *dseg = NULL; + volatile rte_v128u32_t *dseg = NULL; uint32_t length; unsigned int ds = 0; uintptr_t addr; + uint64_t naddr; + uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; + uint16_t ehdr; + uint8_t cs_flags = 0; #ifdef MLX5_PMD_SOFT_COUNTERS uint32_t total_length = 0; #endif @@ -409,13 +412,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) --segs_n; if (!segs_n) --pkts_n; - wqe = &(*txq->wqes)[txq->wqe_ci & - ((1 << txq->wqe_n) - 1)].hdr; - tx_prefetch_wqe(txq, txq->wqe_ci + 1); + if (unlikely(--max_wqe == 0)) + break; + wqe = (volatile struct mlx5_wqe_v *) + tx_mlx5_wqe(txq, txq->wqe_ci); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); if (pkts_n > 1) rte_prefetch0(*pkts); addr = rte_pktmbuf_mtod(buf, uintptr_t); length = DATA_LEN(buf); + ehdr = (((uint8_t *)addr)[1] << 8) | + ((uint8_t *)addr)[0]; #ifdef MLX5_PMD_SOFT_COUNTERS total_length = length; #endif @@ -433,78 +440,88 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - wqe->eseg.cs_flags = - MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } else { - wqe->eseg.cs_flags = 0; + cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; } - raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0]; - /* Start the know and common part of the WQE structure. */ - wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->ctrl[2] = 0; - wqe->ctrl[3] = 0; - wqe->eseg.rsvd0 = 0; - wqe->eseg.rsvd1 = 0; - wqe->eseg.mss = 0; - wqe->eseg.rsvd2 = 0; - /* Start by copying the Ethernet Header. */ - memcpy((uint8_t *)raw, ((uint8_t *)addr), 16); - length -= MLX5_WQE_DWORD_SIZE; - addr += MLX5_WQE_DWORD_SIZE; + raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; /* Replace the Ethernet type by the VLAN if necessary. */ if (buf->ol_flags & PKT_TX_VLAN_PKT) { uint32_t vlan = htonl(0x81000000 | buf->vlan_tci); - - memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - - sizeof(vlan)), - &vlan, sizeof(vlan)); - addr -= sizeof(vlan); - length += sizeof(vlan); + unsigned int len = 2 * ETHER_ADDR_LEN - 2; + + addr += 2; + length -= 2; + /* Copy Destination and source mac address. */ + memcpy((uint8_t *)raw, ((uint8_t *)addr), len); + /* Copy VLAN. */ + memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan)); + /* Copy missing two bytes to end the DSeg. */ + memcpy((uint8_t *)raw + len + sizeof(vlan), + ((uint8_t *)addr) + len, 2); + addr += len + 2; + length -= (len + 2); + } else { + memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, + MLX5_WQE_DWORD_SIZE); + length -= pkt_inline_sz; + addr += pkt_inline_sz; } /* Inline if enough room. */ - if (txq->max_inline != 0) { - uintptr_t end = - (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n]; - uint16_t max_inline = - txq->max_inline * RTE_CACHE_LINE_SIZE; - uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE; - uint16_t room; + if (txq->max_inline) { + uintptr_t end = (uintptr_t) + (((uintptr_t)txq->wqes) + + (1 << txq->wqe_n) * MLX5_WQE_SIZE); + unsigned int max_inline = txq->max_inline * + RTE_CACHE_LINE_SIZE - + MLX5_WQE_DWORD_SIZE; + uintptr_t addr_end = (addr + max_inline) & + ~(RTE_CACHE_LINE_SIZE - 1); + unsigned int copy_b = (addr_end > addr) ? + RTE_MIN((addr_end - addr), length) : + 0; raw += MLX5_WQE_DWORD_SIZE; - room = end - (uintptr_t)raw; - if (room > max_inline) { - uintptr_t addr_end = (addr + max_inline) & - ~(RTE_CACHE_LINE_SIZE - 1); - uint16_t copy_b = ((addr_end - addr) > length) ? - length : - (addr_end - addr); + if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { + /* + * One Dseg remains in the current WQE. To + * keep the computation positive, it is + * removed after the bytes to Dseg conversion. + */ + uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; + if (unlikely(max_wqe < n)) + break; + max_wqe -= n; rte_memcpy((void *)raw, (void *)addr, copy_b); addr += copy_b; length -= copy_b; pkt_inline_sz += copy_b; - /* Sanity check. */ - assert(addr <= addr_end); } - /* Store the inlined packet size in the WQE. */ - wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz); /* - * 2 DWORDs consumed by the WQE header + 1 DSEG + + * 2 DWORDs consumed by the WQE header + ETH segment + * the size of the inline part of the packet. */ ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); if (length > 0) { - dseg = (struct mlx5_wqe_data_seg *) - ((uintptr_t)wqe + - (ds * MLX5_WQE_DWORD_SIZE)); - if ((uintptr_t)dseg >= end) - dseg = (struct mlx5_wqe_data_seg *) - ((uintptr_t)&(*txq->wqes)[0]); + if (ds % (MLX5_WQE_SIZE / + MLX5_WQE_DWORD_SIZE) == 0) { + if (unlikely(--max_wqe == 0)) + break; + dseg = (volatile rte_v128u32_t *) + tx_mlx5_wqe(txq, txq->wqe_ci + + ds / 4); + } else { + dseg = (volatile rte_v128u32_t *) + ((uintptr_t)wqe + + (ds * MLX5_WQE_DWORD_SIZE)); + } goto use_dseg; } else if (!segs_n) { goto next_pkt; } else { + /* dseg will be advance as part of next_seg */ + dseg = (volatile rte_v128u32_t *) + ((uintptr_t)wqe + + ((ds - 1) * MLX5_WQE_DWORD_SIZE)); goto next_seg; } } else { @@ -512,16 +529,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) * No inline has been done in the packet, only the * Ethernet Header as been stored. */ - wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE); - dseg = (struct mlx5_wqe_data_seg *) + dseg = (volatile rte_v128u32_t *) ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); ds = 3; use_dseg: /* Add the remaining packet as a simple ds. */ - *dseg = (struct mlx5_wqe_data_seg) { - .addr = htonll(addr), - .byte_count = htonl(length), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + naddr = htonll(addr); + *dseg = (rte_v128u32_t){ + htonl(length), + txq_mp2mr(txq, txq_mb2mp(buf)), + naddr, + naddr >> 32, }; ++ds; if (!segs_n) @@ -538,17 +556,13 @@ next_seg: */ assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { - unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) & - ((1 << txq->wqe_n) - 1); - - dseg = (struct mlx5_wqe_data_seg *) - ((uintptr_t)&(*txq->wqes)[n]); - tx_prefetch_wqe(txq, n + 1); - } else if (!dseg) { - dseg = (struct mlx5_wqe_data_seg *) - ((uintptr_t)wqe + - (ds * MLX5_WQE_DWORD_SIZE)); - } else { + if (unlikely(--max_wqe == 0)) + break; + dseg = (volatile rte_v128u32_t *) + tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4); + rte_prefetch0(tx_mlx5_wqe(txq, + txq->wqe_ci + ds / 4 + 1)); + } else { ++dseg; } ++ds; @@ -559,10 +573,12 @@ next_seg: total_length += length; #endif /* Store segment information. */ - *dseg = (struct mlx5_wqe_data_seg) { - .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)), - .byte_count = htonl(length), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); + *dseg = (rte_v128u32_t){ + htonl(length), + txq_mp2mr(txq, txq_mb2mp(buf)), + naddr, + naddr >> 32, }; (*txq->elts)[elts_head] = buf; elts_head = (elts_head + 1) & (elts_n - 1); @@ -574,7 +590,19 @@ next_seg: --pkts_n; next_pkt: ++i; - wqe->ctrl[1] = htonl(txq->qp_num_8s | ds); + /* Initialize known and common part of the WQE structure. */ + wqe->ctrl = (rte_v128u32_t){ + htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND), + htonl(txq->qp_num_8s | ds), + 0, + 0, + }; + wqe->eseg = (rte_v128u32_t){ + 0, + cs_flags, + 0, + (ehdr << 16) | htons(pkt_inline_sz), + }; txq->wqe_ci += (ds + 3) / 4; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ @@ -587,10 +615,13 @@ next_pkt: /* Check whether completion threshold has been reached. */ comp = txq->elts_comp + i + j; if (comp >= MLX5_TX_COMP_THRESH) { + volatile struct mlx5_wqe_ctrl *w = + (volatile struct mlx5_wqe_ctrl *)wqe; + /* Request completion on last WQE. */ - wqe->ctrl[2] = htonl(8); + w->ctrl2 = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - wqe->ctrl[3] = elts_head; + w->ctrl3 = elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; @@ -600,7 +631,7 @@ next_pkt: txq->stats.opackets += i; #endif /* Ring QP doorbell. */ - mlx5_tx_dbrec(txq); + mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)wqe); txq->elts_head = elts_head; return i; } @@ -621,13 +652,13 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = (volatile struct mlx5_wqe_data_seg (*)[]) - (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)]; + tx_mlx5_wqe(txq, idx + 1); mpw->state = MLX5_MPW_STATE_OPENED; mpw->pkts_n = 0; mpw->len = length; mpw->total_len = 0; - mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr; + mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); mpw->wqe->eseg.mss = htons(length); mpw->wqe->eseg.inline_hdr_sz = 0; mpw->wqe->eseg.rsvd0 = 0; @@ -669,8 +700,8 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw) ++txq->wqe_ci; else txq->wqe_ci += 2; - tx_prefetch_wqe(txq, txq->wqe_ci); - tx_prefetch_wqe(txq, txq->wqe_ci + 1); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); } /** @@ -695,6 +726,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int i = 0; unsigned int j = 0; unsigned int max; + uint16_t max_wqe; unsigned int comp; struct mlx5_mpw mpw = { .state = MLX5_MPW_STATE_CLOSED, @@ -703,14 +735,16 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (unlikely(!pkts_n)) return 0; /* Prefetch first packet cacheline. */ - tx_prefetch_cqe(txq, txq->cq_ci); - tx_prefetch_wqe(txq, txq->wqe_ci); - tx_prefetch_wqe(txq, txq->wqe_ci + 1); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); /* Start processing. */ txq_complete(txq); max = (elts_n - (elts_head - txq->elts_tail)); if (max > elts_n) max -= elts_n; + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); + if (unlikely(!max_wqe)) + return 0; do { struct rte_mbuf *buf = *(pkts++); unsigned int elts_head_next; @@ -744,6 +778,14 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) (mpw.wqe->eseg.cs_flags != cs_flags))) mlx5_mpw_close(txq, &mpw); if (mpw.state == MLX5_MPW_STATE_CLOSED) { + /* + * Multi-Packet WQE consumes at most two WQE. + * mlx5_mpw_new() expects to be able to use such + * resources. + */ + if (unlikely(max_wqe < 2)) + break; + max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; } @@ -808,7 +850,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Ring QP doorbell. */ if (mpw.state == MLX5_MPW_STATE_OPENED) mlx5_mpw_close(txq, &mpw); - mlx5_tx_dbrec(txq); + mlx5_tx_dbrec(txq, mpw.wqe); txq->elts_head = elts_head; return i; } @@ -833,7 +875,7 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) mpw->pkts_n = 0; mpw->len = length; mpw->total_len = 0; - mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr; + mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) | (txq->wqe_ci << 8) | MLX5_OPCODE_TSO); @@ -899,18 +941,30 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, unsigned int i = 0; unsigned int j = 0; unsigned int max; + uint16_t max_wqe; unsigned int comp; unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; struct mlx5_mpw mpw = { .state = MLX5_MPW_STATE_CLOSED, }; + /* + * Compute the maximum number of WQE which can be consumed by inline + * code. + * - 2 DSEG for: + * - 1 control segment, + * - 1 Ethernet segment, + * - N Dseg from the inline request. + */ + const unsigned int wqe_inl_n = + ((2 * MLX5_WQE_DWORD_SIZE + + txq->max_inline * RTE_CACHE_LINE_SIZE) + + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; if (unlikely(!pkts_n)) return 0; /* Prefetch first packet cacheline. */ - tx_prefetch_cqe(txq, txq->cq_ci); - tx_prefetch_wqe(txq, txq->wqe_ci); - tx_prefetch_wqe(txq, txq->wqe_ci + 1); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); /* Start processing. */ txq_complete(txq); max = (elts_n - (elts_head - txq->elts_tail)); @@ -936,6 +990,11 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, break; max -= segs_n; --pkts_n; + /* + * Compute max_wqe in case less WQE were consumed in previous + * iteration. + */ + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) @@ -961,9 +1020,20 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, if (mpw.state == MLX5_MPW_STATE_CLOSED) { if ((segs_n != 1) || (length > inline_room)) { + /* + * Multi-Packet WQE consumes at most two WQE. + * mlx5_mpw_new() expects to be able to use + * such resources. + */ + if (unlikely(max_wqe < 2)) + break; + max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; } else { + if (unlikely(max_wqe < wqe_inl_n)) + break; + max_wqe -= wqe_inl_n; mlx5_mpw_inline_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; } @@ -1011,14 +1081,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, addr = rte_pktmbuf_mtod(buf, uintptr_t); (*txq->elts)[elts_head] = buf; /* Maximum number of bytes before wrapping. */ - max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] - + max = ((((uintptr_t)(txq->wqes)) + + (1 << txq->wqe_n) * + MLX5_WQE_SIZE) - (uintptr_t)mpw.data.raw); if (length > max) { rte_memcpy((void *)(uintptr_t)mpw.data.raw, (void *)addr, max); - mpw.data.raw = - (volatile void *)&(*txq->wqes)[0]; + mpw.data.raw = (volatile void *)txq->wqes; rte_memcpy((void *)(uintptr_t)mpw.data.raw, (void *)(addr + max), length - max); @@ -1027,13 +1098,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, rte_memcpy((void *)(uintptr_t)mpw.data.raw, (void *)addr, length); - mpw.data.raw += length; + + if (length == max) + mpw.data.raw = + (volatile void *)txq->wqes; + else + mpw.data.raw += length; } - if ((uintptr_t)mpw.data.raw == - (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n]) - mpw.data.raw = - (volatile void *)&(*txq->wqes)[0]; ++mpw.pkts_n; + mpw.total_len += length; ++j; if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { mlx5_mpw_inline_close(txq, &mpw); @@ -1043,7 +1116,6 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, inline_room -= length; } } - mpw.total_len += length; elts_head = elts_head_next; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ @@ -1077,7 +1149,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, mlx5_mpw_inline_close(txq, &mpw); else if (mpw.state == MLX5_MPW_STATE_OPENED) mlx5_mpw_close(txq, &mpw); - mlx5_tx_dbrec(txq); + mlx5_tx_dbrec(txq, mpw.wqe); txq->elts_head = elts_head; return i; } @@ -1097,30 +1169,28 @@ static inline uint32_t rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) { uint32_t pkt_type; - uint8_t flags = cqe->l4_hdr_type_etc; + uint16_t flags = ntohs(cqe->hdr_type_etc); - if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) + if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) { pkt_type = - TRANSPOSE(flags, - MLX5_CQE_RX_OUTER_IPV4_PACKET, - RTE_PTYPE_L3_IPV4) | - TRANSPOSE(flags, - MLX5_CQE_RX_OUTER_IPV6_PACKET, - RTE_PTYPE_L3_IPV6) | TRANSPOSE(flags, MLX5_CQE_RX_IPV4_PACKET, - RTE_PTYPE_INNER_L3_IPV4) | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) | TRANSPOSE(flags, MLX5_CQE_RX_IPV6_PACKET, - RTE_PTYPE_INNER_L3_IPV6); - else + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN); + pkt_type |= ((cqe->pkt_info & MLX5_CQE_RX_OUTER_PACKET) ? + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN); + } else { pkt_type = TRANSPOSE(flags, MLX5_CQE_L3_HDR_TYPE_IPV6, - RTE_PTYPE_L3_IPV6) | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) | TRANSPOSE(flags, MLX5_CQE_L3_HDR_TYPE_IPV4, - RTE_PTYPE_L3_IPV4); + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN); + } return pkt_type; } @@ -1147,6 +1217,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, struct rxq_zip *zip = &rxq->zip; uint16_t cqe_n = cqe_cnt + 1; int len = 0; + uint16_t idx, end; /* Process compressed data in the CQE and mini arrays. */ if (zip->ai) { @@ -1157,6 +1228,14 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, len = ntohl((*mc)[zip->ai & 7].byte_cnt); *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result); if ((++zip->ai & 7) == 0) { + /* Invalidate consumed CQEs */ + idx = zip->ca; + end = zip->na; + while (idx != end) { + (*rxq->cqes)[idx & cqe_cnt].op_own = + MLX5_CQE_INVALIDATE; + ++idx; + } /* * Increment consumer index to skip the number of * CQEs consumed. Hardware leaves holes in the CQ @@ -1166,8 +1245,9 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, zip->na += 8; } if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { - uint16_t idx = rxq->cq_ci; - uint16_t end = zip->cq_ci; + /* Invalidate the rest */ + idx = zip->ca; + end = zip->cq_ci; while (idx != end) { (*rxq->cqes)[idx & cqe_cnt].op_own = @@ -1203,7 +1283,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, * special case the second one is located 7 CQEs after * the initial CQE instead of 8 for subsequent ones. */ - zip->ca = rxq->cq_ci & cqe_cnt; + zip->ca = rxq->cq_ci; zip->na = zip->ca + 7; /* Compute the next non compressed CQE. */ --rxq->cq_ci; @@ -1212,6 +1292,13 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, len = ntohl((*mc)[0].byte_cnt); *rss_hash = ntohl((*mc)[0].rx_hash_result); zip->ai = 1; + /* Prefetch all the entries to be invalidated */ + idx = zip->ca; + end = zip->cq_ci; + while (idx != end) { + rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]); + ++idx; + } } else { len = ntohl(cqe->byte_cnt); *rss_hash = ntohl(cqe->rx_hash_res); @@ -1238,28 +1325,22 @@ static inline uint32_t rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe) { uint32_t ol_flags = 0; - uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK; - uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK; - - if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) || - (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6)) - ol_flags |= TRANSPOSE(cqe->hds_ip_ext, - MLX5_CQE_L3_OK, - PKT_RX_IP_CKSUM_GOOD); - if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) || - (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) || - (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) || - (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP)) - ol_flags |= TRANSPOSE(cqe->hds_ip_ext, - MLX5_CQE_L4_OK, - PKT_RX_L4_CKSUM_GOOD); + uint16_t flags = ntohs(cqe->hdr_type_etc); + + ol_flags = + TRANSPOSE(flags, + MLX5_CQE_RX_L3_HDR_VALID, + PKT_RX_IP_CKSUM_GOOD) | + TRANSPOSE(flags, + MLX5_CQE_RX_L4_HDR_VALID, + PKT_RX_L4_CKSUM_GOOD); if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) ol_flags |= - TRANSPOSE(cqe->l4_hdr_type_etc, - MLX5_CQE_RX_OUTER_IP_CSUM_OK, + TRANSPOSE(flags, + MLX5_CQE_RX_L3_HDR_VALID, PKT_RX_IP_CKSUM_GOOD) | - TRANSPOSE(cqe->l4_hdr_type_etc, - MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK, + TRANSPOSE(flags, + MLX5_CQE_RX_L4_HDR_VALID, PKT_RX_L4_CKSUM_GOOD); return ol_flags; } @@ -1316,10 +1397,10 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } while (pkt != seg) { assert(pkt != (*rxq->elts)[idx]); - seg = NEXT(pkt); + rep = NEXT(pkt); rte_mbuf_refcnt_set(pkt, 0); __rte_mbuf_raw_free(pkt); - pkt = seg; + pkt = rep; } break; } @@ -1344,10 +1425,20 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Update packet information. */ pkt->packet_type = 0; pkt->ol_flags = 0; - if (rxq->rss_hash) { + if (rss_hash_res && rxq->rss_hash) { pkt->hash.rss = rss_hash_res; pkt->ol_flags = PKT_RX_RSS_HASH; } + if (rxq->mark && + ((cqe->sop_drop_qpn != + htonl(MLX5_FLOW_MARK_INVALID)) || + (cqe->sop_drop_qpn != + htonl(MLX5_FLOW_MARK_DEFAULT)))) { + pkt->hash.fdir.hi = + mlx5_flow_mark_get(cqe->sop_drop_qpn); + pkt->ol_flags &= ~PKT_RX_RSS_HASH; + pkt->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; + } if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip | rxq->crc_present) { if (rxq->csum) { @@ -1356,7 +1447,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe); } - if (cqe->l4_hdr_type_etc & + if (cqe->hdr_type_etc & MLX5_CQE_VLAN_STRIPPED) { pkt->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h index f45e3f51..41a34d7f 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h @@ -114,7 +114,8 @@ struct rxq { unsigned int elts_n:4; /* Log 2 of Mbufs. */ unsigned int port_id:8; unsigned int rss_hash:1; /* RSS hash result is enabled. */ - unsigned int :9; /* Remaining bits. */ + unsigned int mark:1; /* Marked flow available on the queue. */ + unsigned int :8; /* Remaining bits. */ volatile uint32_t *rq_db; volatile uint32_t *cq_db; uint16_t rq_ci; @@ -178,8 +179,8 @@ struct hash_rxq_init { uint16_t size; } hdr; struct ibv_exp_flow_spec_tcp_udp tcp_udp; - struct ibv_exp_flow_spec_ipv4_ext ipv4; - struct ibv_exp_flow_spec_ipv6_ext ipv6; + struct ibv_exp_flow_spec_ipv4 ipv4; + struct ibv_exp_flow_spec_ipv6 ipv6; struct ibv_exp_flow_spec_eth eth; } flow_spec; /* Flow specification template. */ const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */ @@ -240,13 +241,6 @@ struct hash_rxq { [MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS]; }; -/** C extension macro for environments lacking C11 features. */ -#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L -#define RTE_STD_C11 __extension__ -#else -#define RTE_STD_C11 -#endif - /* TX queue descriptor. */ RTE_STD_C11 struct txq { @@ -255,15 +249,14 @@ struct txq { uint16_t elts_comp; /* Counter since last completion request. */ uint16_t cq_ci; /* Consumer index for completion queue. */ uint16_t wqe_ci; /* Consumer index for work queue. */ + uint16_t wqe_pi; /* Producer index for work queue. */ uint16_t elts_n:4; /* (*elts)[] length (in log2). */ uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */ - uint16_t bf_buf_size:4; /* Log2 Blueflame size. */ - uint16_t bf_offset; /* Blueflame offset. */ uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ - volatile struct mlx5_wqe64 (*wqes)[]; /* Work queue. */ + volatile void *wqes; /* Work queue (use volatile to write into). */ volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ volatile void *bf_reg; /* Blueflame register. */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_stats.c b/src/dpdk/drivers/net/mlx5/mlx5_stats.c index c6087d4e..20c957e8 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_stats.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_stats.c @@ -31,11 +31,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include +#include + /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic" #endif #include +#include +#include #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -44,243 +49,314 @@ #include "mlx5_rxtx.h" #include "mlx5_defs.h" -#include -#include - -static void -mlx5_stats_read_hw(struct rte_eth_dev *dev, - struct rte_eth_stats *stats){ - struct priv *priv = mlx5_get_priv(dev); - struct mlx5_stats_priv * lps = &priv->m_stats; - unsigned int i; - - struct rte_eth_stats tmp = {0}; - struct ethtool_stats *et_stats = (struct ethtool_stats *)lps->et_stats; - struct ifreq ifr; - - et_stats->cmd = ETHTOOL_GSTATS; - et_stats->n_stats = lps->n_stats; - - ifr.ifr_data = (caddr_t) et_stats; - - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get statistic values for mlnx5 "); - } - - tmp.ibytes += et_stats->data[lps->inx_rx_vport_unicast_bytes] + - et_stats->data[lps->inx_rx_vport_multicast_bytes] + - et_stats->data[lps->inx_rx_vport_broadcast_bytes]; - - tmp.ipackets += et_stats->data[lps->inx_rx_vport_unicast_packets] + - et_stats->data[lps->inx_rx_vport_multicast_packets] + - et_stats->data[lps->inx_rx_vport_broadcast_packets]; - - tmp.ierrors += (et_stats->data[lps->inx_rx_wqe_err] + - et_stats->data[lps->inx_rx_crc_errors_phy] + - et_stats->data[lps->inx_rx_in_range_len_errors_phy] + - et_stats->data[lps->inx_rx_symbol_err_phy]); +struct mlx5_counter_ctrl { + /* Name of the counter. */ + char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE]; + /* Name of the counter on the device table. */ + char ctr_name[RTE_ETH_XSTATS_NAME_SIZE]; +}; + +static const struct mlx5_counter_ctrl mlx5_counters_init[] = { + { + .dpdk_name = "rx_port_unicast_bytes", + .ctr_name = "rx_vport_unicast_bytes", + }, + { + .dpdk_name = "rx_port_multicast_bytes", + .ctr_name = "rx_vport_multicast_bytes", + }, + { + .dpdk_name = "rx_port_broadcast_bytes", + .ctr_name = "rx_vport_broadcast_bytes", + }, + { + .dpdk_name = "rx_port_unicast_packets", + .ctr_name = "rx_vport_unicast_packets", + }, + { + .dpdk_name = "rx_port_multicast_packets", + .ctr_name = "rx_vport_multicast_packets", + }, + { + .dpdk_name = "rx_port_broadcast_packets", + .ctr_name = "rx_vport_broadcast_packets", + }, + { + .dpdk_name = "tx_port_unicast_bytes", + .ctr_name = "tx_vport_unicast_bytes", + }, + { + .dpdk_name = "tx_port_multicast_bytes", + .ctr_name = "tx_vport_multicast_bytes", + }, + { + .dpdk_name = "tx_port_broadcast_bytes", + .ctr_name = "tx_vport_broadcast_bytes", + }, + { + .dpdk_name = "tx_port_unicast_packets", + .ctr_name = "tx_vport_unicast_packets", + }, + { + .dpdk_name = "tx_port_multicast_packets", + .ctr_name = "tx_vport_multicast_packets", + }, + { + .dpdk_name = "tx_port_broadcast_packets", + .ctr_name = "tx_vport_broadcast_packets", + }, + { + .dpdk_name = "rx_wqe_err", + .ctr_name = "rx_wqe_err", + }, + { + .dpdk_name = "rx_crc_errors_phy", + .ctr_name = "rx_crc_errors_phy", + }, + { + .dpdk_name = "rx_in_range_len_errors_phy", + .ctr_name = "rx_in_range_len_errors_phy", + }, + { + .dpdk_name = "rx_symbol_err_phy", + .ctr_name = "rx_symbol_err_phy", + }, + { + .dpdk_name = "tx_errors_phy", + .ctr_name = "tx_errors_phy", + }, +}; + +static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init); - tmp.obytes += et_stats->data[lps->inx_tx_vport_unicast_bytes] + - et_stats->data[lps->inx_tx_vport_multicast_bytes] + - et_stats->data[lps->inx_tx_vport_broadcast_bytes]; - - tmp.opackets += (et_stats->data[lps->inx_tx_vport_unicast_packets] + - et_stats->data[lps->inx_tx_vport_multicast_packets] + - et_stats->data[lps->inx_tx_vport_broadcast_packets]); - - tmp.oerrors += et_stats->data[lps->inx_tx_errors_phy]; - - /* SW Rx */ - for (i = 0; (i != priv->rxqs_n); ++i) { - struct rxq *rxq = (*priv->rxqs)[i]; - if (rxq) { - tmp.imissed += rxq->stats.idropped; - tmp.rx_nombuf += rxq->stats.rx_nombuf; - } - } - - /*SW Tx */ - for (i = 0; (i != priv->txqs_n); ++i) { - struct txq *txq = (*priv->txqs)[i]; - if (txq) { - tmp.oerrors += txq->stats.odropped; - } - } - - *stats =tmp; +/** + * Read device counters table. + * + * @param priv + * Pointer to private structure. + * @param[out] stats + * Counters table output buffer. + * + * @return + * 0 on success and stats is filled, negative on error. + */ +static int +priv_read_dev_counters(struct priv *priv, uint64_t *stats) +{ + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + unsigned int i; + struct ifreq ifr; + unsigned int stats_sz = (xstats_ctrl->stats_n * sizeof(uint64_t)) + + sizeof(struct ethtool_stats); + struct ethtool_stats et_stats[(stats_sz + ( + sizeof(struct ethtool_stats) - 1)) / + sizeof(struct ethtool_stats)]; + + et_stats->cmd = ETHTOOL_GSTATS; + et_stats->n_stats = xstats_ctrl->stats_n; + ifr.ifr_data = (caddr_t)et_stats; + if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { + WARN("unable to read statistic values from device"); + return -1; + } + for (i = 0; i != xstats_n; ++i) + stats[i] = (uint64_t) + et_stats->data[xstats_ctrl->dev_table_idx[i]]; + return 0; } +/** + * Init the structures to read device counters. + * + * @param priv + * Pointer to private structure. + */ void -mlx5_stats_free(struct rte_eth_dev *dev) +priv_xstats_init(struct priv *priv) { - struct priv *priv = mlx5_get_priv(dev); - struct mlx5_stats_priv * lps = &priv->m_stats; - - if ( lps->et_stats ){ - free(lps->et_stats); - lps->et_stats=0; - } + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + unsigned int i; + unsigned int j; + char ifname[IF_NAMESIZE]; + struct ifreq ifr; + struct ethtool_drvinfo drvinfo; + struct ethtool_gstrings *strings = NULL; + unsigned int dev_stats_n; + unsigned int str_sz; + + if (priv_get_ifname(priv, &ifname)) { + WARN("unable to get interface name"); + return; + } + /* How many statistics are available. */ + drvinfo.cmd = ETHTOOL_GDRVINFO; + ifr.ifr_data = (caddr_t)&drvinfo; + if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { + WARN("unable to get driver info"); + return; + } + dev_stats_n = drvinfo.n_stats; + if (dev_stats_n < 1) { + WARN("no extended statistics available"); + return; + } + xstats_ctrl->stats_n = dev_stats_n; + /* Allocate memory to grab stat names and values. */ + str_sz = dev_stats_n * ETH_GSTRING_LEN; + strings = (struct ethtool_gstrings *) + rte_malloc("xstats_strings", + str_sz + sizeof(struct ethtool_gstrings), 0); + if (!strings) { + WARN("unable to allocate memory for xstats"); + return; + } + strings->cmd = ETHTOOL_GSTRINGS; + strings->string_set = ETH_SS_STATS; + strings->len = dev_stats_n; + ifr.ifr_data = (caddr_t)strings; + if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { + WARN("unable to get statistic names"); + goto free; + } + for (j = 0; j != xstats_n; ++j) + xstats_ctrl->dev_table_idx[j] = dev_stats_n; + for (i = 0; i != dev_stats_n; ++i) { + const char *curr_string = (const char *) + &strings->data[i * ETH_GSTRING_LEN]; + + for (j = 0; j != xstats_n; ++j) { + if (!strcmp(mlx5_counters_init[j].ctr_name, + curr_string)) { + xstats_ctrl->dev_table_idx[j] = i; + break; + } + } + } + for (j = 0; j != xstats_n; ++j) { + if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) { + WARN("counter \"%s\" is not recognized", + mlx5_counters_init[j].dpdk_name); + goto free; + } + } + /* Copy to base at first time. */ + assert(xstats_n <= MLX5_MAX_XSTATS); + priv_read_dev_counters(priv, xstats_ctrl->base); +free: + rte_free(strings); } - -static void -mlx5_stats_init(struct rte_eth_dev *dev) +/** + * Get device extended statistics. + * + * @param priv + * Pointer to private structure. + * @param[out] stats + * Pointer to rte extended stats table. + * + * @return + * Number of extended stats on success and stats is filled, + * negative on error. + */ +static int +priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats) { - struct priv *priv = mlx5_get_priv(dev); - struct mlx5_stats_priv * lps = &priv->m_stats; - struct rte_eth_stats tmp = {0}; - - unsigned int i; - unsigned int idx; - char ifname[IF_NAMESIZE]; - struct ifreq ifr; - - struct ethtool_stats *et_stats = NULL; - struct ethtool_drvinfo drvinfo; - struct ethtool_gstrings *strings = NULL; - unsigned int n_stats, sz_str, sz_stats; - - if (priv_get_ifname(priv, &ifname)) { - WARN("unable to get interface name"); - return; - } - /* How many statistics are available ? */ - drvinfo.cmd = ETHTOOL_GDRVINFO; - ifr.ifr_data = (caddr_t) &drvinfo; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get driver info for %s", ifname); - return; - } - - n_stats = drvinfo.n_stats; - if (n_stats < 1) { - WARN("no statistics available for %s", ifname); - return; - } - lps->n_stats = n_stats; - - /* Allocate memory to grab stat names and values */ - sz_str = n_stats * ETH_GSTRING_LEN; - sz_stats = n_stats * sizeof(uint64_t); - strings = calloc(1, sz_str + sizeof(struct ethtool_gstrings)); - if (!strings) { - WARN("unable to allocate memory for strings"); - return; - } - - et_stats = calloc(1, sz_stats + sizeof(struct ethtool_stats)); - if (!et_stats) { - free(strings); - WARN("unable to allocate memory for stats"); - } - - strings->cmd = ETHTOOL_GSTRINGS; - strings->string_set = ETH_SS_STATS; - strings->len = n_stats; - ifr.ifr_data = (caddr_t) strings; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get statistic names for %s", ifname); - free(strings); - free(et_stats); - return; - } - - for (i = 0; (i != n_stats); ++i) { - - const char * curr_string = (const char*) &(strings->data[i * ETH_GSTRING_LEN]); - - if (!strcmp("rx_vport_unicast_bytes", curr_string)) lps->inx_rx_vport_unicast_bytes = i; - if (!strcmp("rx_vport_multicast_bytes", curr_string)) lps->inx_rx_vport_multicast_bytes = i; - if (!strcmp("rx_vport_broadcast_bytes", curr_string)) lps->inx_rx_vport_broadcast_bytes = i; - - if (!strcmp("rx_vport_unicast_packets", curr_string)) lps->inx_rx_vport_unicast_packets = i; - if (!strcmp("rx_vport_multicast_packets", curr_string)) lps->inx_rx_vport_multicast_packets = i; - if (!strcmp("rx_vport_broadcast_packets", curr_string)) lps->inx_rx_vport_broadcast_packets = i; - - if (!strcmp("tx_vport_unicast_bytes", curr_string)) lps->inx_tx_vport_unicast_bytes = i; - if (!strcmp("tx_vport_multicast_bytes", curr_string)) lps->inx_tx_vport_multicast_bytes = i; - if (!strcmp("tx_vport_broadcast_bytes", curr_string)) lps->inx_tx_vport_broadcast_bytes = i; - - if (!strcmp("tx_vport_unicast_packets", curr_string)) lps->inx_tx_vport_unicast_packets = i; - if (!strcmp("tx_vport_multicast_packets", curr_string)) lps->inx_tx_vport_multicast_packets = i; - if (!strcmp("tx_vport_broadcast_packets", curr_string)) lps->inx_tx_vport_broadcast_packets = i; - - if (!strcmp("rx_wqe_err", curr_string)) lps->inx_rx_wqe_err = i; - if (!strcmp("rx_crc_errors_phy", curr_string)) lps->inx_rx_crc_errors_phy = i; - if (!strcmp("rx_in_range_len_errors_phy", curr_string)) lps->inx_rx_in_range_len_errors_phy = i; - if (!strcmp("rx_symbol_err_phy", curr_string)) lps->inx_rx_symbol_err_phy = i; - - if (!strcmp("tx_errors_phy", curr_string)) lps->inx_tx_errors_phy = i; - } - - lps->et_stats =(void *)et_stats; - - if (!lps->inx_rx_vport_unicast_bytes || - !lps->inx_rx_vport_multicast_bytes || - !lps->inx_rx_vport_broadcast_bytes || - !lps->inx_rx_vport_unicast_packets || - !lps->inx_rx_vport_multicast_packets || - !lps->inx_rx_vport_broadcast_packets || - !lps->inx_tx_vport_unicast_bytes || - !lps->inx_tx_vport_multicast_bytes || - !lps->inx_tx_vport_broadcast_bytes || - !lps->inx_tx_vport_unicast_packets || - !lps->inx_tx_vport_multicast_packets || - !lps->inx_tx_vport_broadcast_packets || - !lps->inx_rx_wqe_err || - !lps->inx_rx_crc_errors_phy || - !lps->inx_rx_in_range_len_errors_phy) { - WARN("Counters are not recognized %s", ifname); - return; - } - - mlx5_stats_read_hw(dev,&tmp); - - /* copy yo shadow at first time */ - lps->m_shadow = tmp; - - free(strings); + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + unsigned int i; + unsigned int n = xstats_n; + uint64_t counters[n]; + + if (priv_read_dev_counters(priv, counters) < 0) + return -1; + for (i = 0; i != xstats_n; ++i) { + stats[i].id = i; + stats[i].value = (counters[i] - xstats_ctrl->base[i]); + } + return n; } - +/** + * Reset device extended statistics. + * + * @param priv + * Pointer to private structure. + */ static void -mlx5_stats_diff(struct rte_eth_stats *a, - struct rte_eth_stats *b, - struct rte_eth_stats *c){ - #define MLX5_DIFF(cnt) { a->cnt = (b->cnt - c->cnt); } - - MLX5_DIFF(ipackets); - MLX5_DIFF(opackets); - MLX5_DIFF(ibytes); - MLX5_DIFF(obytes); - MLX5_DIFF(imissed); - - MLX5_DIFF(ierrors); - MLX5_DIFF(oerrors); - MLX5_DIFF(rx_nombuf); +priv_xstats_reset(struct priv *priv) +{ + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + unsigned int i; + unsigned int n = xstats_n; + uint64_t counters[n]; + + if (priv_read_dev_counters(priv, counters) < 0) + return; + for (i = 0; i != n; ++i) + xstats_ctrl->base[i] = counters[i]; } - - +/** + * DPDK callback to get device statistics. + * + * @param dev + * Pointer to Ethernet device structure. + * @param[out] stats + * Stats structure output buffer. + */ void mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { struct priv *priv = mlx5_get_priv(dev); - - struct mlx5_stats_priv * lps = &priv->m_stats; - priv_lock(priv); - - if (lps->et_stats == NULL) { - mlx5_stats_init(dev); - } - struct rte_eth_stats tmp = {0}; - - mlx5_stats_read_hw(dev,&tmp); - - mlx5_stats_diff(stats, - &tmp, - &lps->m_shadow); - + struct rte_eth_stats tmp = {0}; + unsigned int i; + unsigned int idx; + + priv_lock(priv); + /* Add software counters. */ + for (i = 0; (i != priv->rxqs_n); ++i) { + struct rxq *rxq = (*priv->rxqs)[i]; + + if (rxq == NULL) + continue; + idx = rxq->stats.idx; + if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) { +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.q_ipackets[idx] += rxq->stats.ipackets; + tmp.q_ibytes[idx] += rxq->stats.ibytes; +#endif + tmp.q_errors[idx] += (rxq->stats.idropped + + rxq->stats.rx_nombuf); + } +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.ipackets += rxq->stats.ipackets; + tmp.ibytes += rxq->stats.ibytes; +#endif + tmp.ierrors += rxq->stats.idropped; + tmp.rx_nombuf += rxq->stats.rx_nombuf; + } + for (i = 0; (i != priv->txqs_n); ++i) { + struct txq *txq = (*priv->txqs)[i]; + + if (txq == NULL) + continue; + idx = txq->stats.idx; + if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) { +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.q_opackets[idx] += txq->stats.opackets; + tmp.q_obytes[idx] += txq->stats.obytes; +#endif + tmp.q_errors[idx] += txq->stats.odropped; + } +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.opackets += txq->stats.opackets; + tmp.obytes += txq->stats.obytes; +#endif + tmp.oerrors += txq->stats.odropped; + } +#ifndef MLX5_PMD_SOFT_COUNTERS + /* FIXME: retrieve and add hardware counters. */ +#endif + *stats = tmp; priv_unlock(priv); } @@ -294,20 +370,103 @@ void mlx5_stats_reset(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; - struct mlx5_stats_priv * lps = &priv->m_stats; - - priv_lock(priv); - - if (lps->et_stats == NULL) { - mlx5_stats_init(dev); - } - struct rte_eth_stats tmp = {0}; - + unsigned int i; + unsigned int idx; + + priv_lock(priv); + for (i = 0; (i != priv->rxqs_n); ++i) { + if ((*priv->rxqs)[i] == NULL) + continue; + idx = (*priv->rxqs)[i]->stats.idx; + (*priv->rxqs)[i]->stats = + (struct mlx5_rxq_stats){ .idx = idx }; + } + for (i = 0; (i != priv->txqs_n); ++i) { + if ((*priv->txqs)[i] == NULL) + continue; + idx = (*priv->txqs)[i]->stats.idx; + (*priv->txqs)[i]->stats = + (struct mlx5_txq_stats){ .idx = idx }; + } +#ifndef MLX5_PMD_SOFT_COUNTERS + /* FIXME: reset hardware counters. */ +#endif + priv_unlock(priv); +} - mlx5_stats_read_hw(dev,&tmp); +/** + * DPDK callback to get extended device statistics. + * + * @param dev + * Pointer to Ethernet device structure. + * @param[out] stats + * Stats table output buffer. + * @param n + * The size of the stats table. + * + * @return + * Number of xstats on success, negative on failure. + */ +int +mlx5_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *stats, unsigned int n) +{ + struct priv *priv = mlx5_get_priv(dev); + int ret = xstats_n; + + if (n >= xstats_n && stats) { + priv_lock(priv); + ret = priv_xstats_get(priv, stats); + priv_unlock(priv); + } + return ret; +} - /* copy to shadow */ - lps->m_shadow = tmp; +/** + * DPDK callback to clear device extended statistics. + * + * @param dev + * Pointer to Ethernet device structure. + */ +void +mlx5_xstats_reset(struct rte_eth_dev *dev) +{ + struct priv *priv = mlx5_get_priv(dev); + priv_lock(priv); + priv_xstats_reset(priv); priv_unlock(priv); } + +/** + * DPDK callback to retrieve names of extended device statistics + * + * @param dev + * Pointer to Ethernet device structure. + * @param[out] xstats_names + * Buffer to insert names into. + * @param n + * Number of names. + * + * @return + * Number of xstats names. + */ +int +mlx5_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, unsigned int n) +{ + struct priv *priv = mlx5_get_priv(dev); + unsigned int i; + + if (n >= xstats_n && xstats_names) { + priv_lock(priv); + for (i = 0; i != xstats_n; ++i) { + strncpy(xstats_names[i].name, + mlx5_counters_init[i].dpdk_name, + RTE_ETH_XSTATS_NAME_SIZE); + xstats_names[i].name[RTE_ETH_XSTATS_NAME_SIZE - 1] = 0; + } + priv_unlock(priv); + } + return xstats_n; +} diff --git a/src/dpdk/drivers/net/mlx5/mlx5_trigger.c b/src/dpdk/drivers/net/mlx5/mlx5_trigger.c index d4dccd88..30addd2d 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_trigger.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_trigger.c @@ -90,6 +90,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE) priv_fdir_enable(priv); priv_dev_interrupt_handler_install(priv, dev); + err = priv_flow_start(priv); + priv_xstats_init(priv); priv_unlock(priv); return -err; } @@ -120,6 +122,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev) priv_mac_addrs_disable(priv); priv_destroy_hash_rxqs(priv); priv_fdir_disable(priv); + priv_flow_stop(priv); priv_dev_interrupt_handler_uninstall(priv, dev); priv->started = 0; priv_unlock(priv); diff --git a/src/dpdk/drivers/net/mlx5/mlx5_txq.c b/src/dpdk/drivers/net/mlx5/mlx5_txq.c index 053665d5..949035bd 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_txq.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_txq.c @@ -82,7 +82,9 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n) for (i = 0; (i != elts_n); ++i) (*txq_ctrl->txq.elts)[i] = NULL; for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) { - volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i]; + volatile struct mlx5_wqe64 *wqe = + (volatile struct mlx5_wqe64 *) + txq_ctrl->txq.wqes + i; memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe)); } @@ -214,14 +216,10 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl) } tmpl->txq.cqe_n = log2above(ibcq->cqe); tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8; - tmpl->txq.wqes = - (volatile struct mlx5_wqe64 (*)[]) - (uintptr_t)qp->gen_data.sqstart; + tmpl->txq.wqes = qp->gen_data.sqstart; tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt); tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR]; tmpl->txq.bf_reg = qp->gen_data.bf->reg; - tmpl->txq.bf_offset = qp->gen_data.bf->offset; - tmpl->txq.bf_buf_size = log2above(qp->gen_data.bf->buf_size); tmpl->txq.cq_db = cq->dbrec; tmpl->txq.cqes = (volatile struct mlx5_cqe (*)[]) @@ -412,7 +410,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl, .obj = tmpl.qp, /* Enable multi-packet send if supported. */ .family_flags = - ((priv->mps && !priv->sriov) ? + (priv->mps ? IBV_EXP_QP_BURST_CREATE_ENABLE_MULTI_PACKET_SEND_WR : 0), }; diff --git a/src/dpdk/drivers/net/null/rte_eth_null.c b/src/dpdk/drivers/net/null/rte_eth_null.c index 7a248842..57203e2e 100644 --- a/src/dpdk/drivers/net/null/rte_eth_null.c +++ b/src/dpdk/drivers/net/null/rte_eth_null.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include @@ -88,7 +88,6 @@ struct pmd_internals { static struct ether_addr eth_addr = { .addr_bytes = {0} }; -static const char *drivername = "Null PMD"; static struct rte_eth_link pmd_link = { .link_speed = ETH_SPEED_NUM_10G, .link_duplex = ETH_LINK_FULL_DUPLEX, @@ -295,13 +294,11 @@ eth_dev_info(struct rte_eth_dev *dev, return; internals = dev->data->dev_private; - dev_info->driver_name = drivername; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)-1; dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues); dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues); dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; dev_info->reta_size = internals->reta_size; dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; } @@ -480,6 +477,8 @@ static const struct eth_dev_ops ops = { .rss_hash_conf_get = eth_rss_hash_conf_get }; +static struct rte_vdev_driver pmd_null_drv; + int eth_dev_null_create(const char *name, const unsigned numa_node, @@ -517,7 +516,7 @@ eth_dev_null_create(const char *name, goto error; /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) goto error; @@ -550,12 +549,10 @@ eth_dev_null_create(const char *name, eth_dev->data = data; eth_dev->dev_ops = &ops; - TAILQ_INIT(ð_dev->link_intr_cbs); - eth_dev->driver = NULL; data->dev_flags = RTE_ETH_DEV_DETACHABLE; data->kdrv = RTE_KDRV_NONE; - data->drv_name = drivername; + data->drv_name = pmd_null_drv.driver.name; data->numa_node = numa_node; /* finally assign rx and tx ops */ @@ -611,7 +608,7 @@ get_packet_copy_arg(const char *key __rte_unused, } static int -rte_pmd_null_devinit(const char *name, const char *params) +rte_pmd_null_probe(const char *name, const char *params) { unsigned numa_node; unsigned packet_size = default_packet_size; @@ -663,7 +660,7 @@ free_kvlist: } static int -rte_pmd_null_devuninit(const char *name) +rte_pmd_null_remove(const char *name) { struct rte_eth_dev *eth_dev = NULL; @@ -686,13 +683,13 @@ rte_pmd_null_devuninit(const char *name) return 0; } -static struct rte_driver pmd_null_drv = { - .type = PMD_VDEV, - .init = rte_pmd_null_devinit, - .uninit = rte_pmd_null_devuninit, +static struct rte_vdev_driver pmd_null_drv = { + .probe = rte_pmd_null_probe, + .remove = rte_pmd_null_remove, }; -PMD_REGISTER_DRIVER(pmd_null_drv, eth_null); -DRIVER_REGISTER_PARAM_STRING(eth_null, +RTE_PMD_REGISTER_VDEV(net_null, pmd_null_drv); +RTE_PMD_REGISTER_ALIAS(net_null, eth_null); +RTE_PMD_REGISTER_PARAM_STRING(net_null, "size= " "copy="); diff --git a/src/dpdk/drivers/net/ring/rte_eth_ring.c b/src/dpdk/drivers/net/ring/rte_eth_ring.c index a7048c77..6f9cc1a6 100644 --- a/src/dpdk/drivers/net/ring/rte_eth_ring.c +++ b/src/dpdk/drivers/net/ring/rte_eth_ring.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include @@ -75,7 +75,6 @@ struct pmd_internals { }; -static const char *drivername = "Rings PMD"; static struct rte_eth_link pmd_link = { .link_speed = ETH_SPEED_NUM_10G, .link_duplex = ETH_LINK_FULL_DUPLEX, @@ -173,13 +172,11 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; - dev_info->driver_name = drivername; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)-1; dev_info->max_rx_queues = (uint16_t)internals->max_rx_queues; dev_info->max_tx_queues = (uint16_t)internals->max_tx_queues; dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; } static void @@ -259,6 +256,8 @@ static const struct eth_dev_ops ops = { .mac_addr_add = eth_mac_addr_add, }; +static struct rte_vdev_driver pmd_ring_drv; + static int do_eth_dev_ring_create(const char *name, struct rte_ring * const rx_queues[], const unsigned nb_rx_queues, @@ -303,7 +302,7 @@ do_eth_dev_ring_create(const char *name, } /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) { rte_errno = ENOSPC; goto error; @@ -343,11 +342,9 @@ do_eth_dev_ring_create(const char *name, eth_dev->dev_ops = &ops; data->dev_flags = RTE_ETH_DEV_DETACHABLE; data->kdrv = RTE_KDRV_NONE; - data->drv_name = drivername; + data->drv_name = pmd_ring_drv.driver.name; data->numa_node = numa_node; - TAILQ_INIT(&(eth_dev->link_intr_cbs)); - /* finally assign rx and tx ops */ eth_dev->rx_pkt_burst = eth_ring_rx; eth_dev->tx_pkt_burst = eth_ring_tx; @@ -505,7 +502,7 @@ out: } static int -rte_pmd_ring_devinit(const char *name, const char *params) +rte_pmd_ring_probe(const char *name, const char *params) { struct rte_kvargs *kvlist = NULL; int ret = 0; @@ -557,7 +554,7 @@ rte_pmd_ring_devinit(const char *name, const char *params) goto out_free; for (info->count = 0; info->count < info->total; info->count++) { - ret = eth_dev_ring_create(name, + ret = eth_dev_ring_create(info->list[info->count].name, info->list[info->count].node, info->list[info->count].action); if ((ret == -1) && @@ -580,7 +577,7 @@ out_free: } static int -rte_pmd_ring_devuninit(const char *name) +rte_pmd_ring_remove(const char *name) { struct rte_eth_dev *eth_dev = NULL; struct pmd_internals *internals = NULL; @@ -599,36 +596,34 @@ rte_pmd_ring_devuninit(const char *name) eth_dev_stop(eth_dev); - if (eth_dev->data) { - internals = eth_dev->data->dev_private; - if (internals->action == DEV_CREATE) { - /* - * it is only necessary to delete the rings in rx_queues because - * they are the same used in tx_queues - */ - for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { - r = eth_dev->data->rx_queues[i]; - rte_ring_free(r->rng); - } + internals = eth_dev->data->dev_private; + if (internals->action == DEV_CREATE) { + /* + * it is only necessary to delete the rings in rx_queues because + * they are the same used in tx_queues + */ + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + r = eth_dev->data->rx_queues[i]; + rte_ring_free(r->rng); } - - rte_free(eth_dev->data->rx_queues); - rte_free(eth_dev->data->tx_queues); - rte_free(eth_dev->data->dev_private); } + rte_free(eth_dev->data->rx_queues); + rte_free(eth_dev->data->tx_queues); + rte_free(eth_dev->data->dev_private); + rte_free(eth_dev->data); rte_eth_dev_release_port(eth_dev); return 0; } -static struct rte_driver pmd_ring_drv = { - .type = PMD_VDEV, - .init = rte_pmd_ring_devinit, - .uninit = rte_pmd_ring_devuninit, +static struct rte_vdev_driver pmd_ring_drv = { + .probe = rte_pmd_ring_probe, + .remove = rte_pmd_ring_remove, }; -PMD_REGISTER_DRIVER(pmd_ring_drv, eth_ring); -DRIVER_REGISTER_PARAM_STRING(eth_ring, - "nodeaction=[attach|detach]"); +RTE_PMD_REGISTER_VDEV(net_ring, pmd_ring_drv); +RTE_PMD_REGISTER_ALIAS(net_ring, eth_ring); +RTE_PMD_REGISTER_PARAM_STRING(net_ring, + ETH_RING_NUMA_NODE_ACTION_ARG "=name:node:action(ATTACH|CREATE)"); diff --git a/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c b/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c index 483d7894..fe7a6b3b 100644 --- a/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c +++ b/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c @@ -62,7 +62,7 @@ */ #define RTE_SZE2_PACKET_HEADER_SIZE_ALIGNED 8 -#define RTE_SZEDATA2_DRIVER_NAME rte_szedata2_pmd +#define RTE_SZEDATA2_DRIVER_NAME net_szedata2 #define RTE_SZEDATA2_PCI_DRIVER_NAME "rte_szedata2_pmd" #define SZEDATA2_DEV_PATH_FMT "/dev/szedataII%u" @@ -91,6 +91,7 @@ struct pmd_internals { uint16_t max_rx_queues; uint16_t max_tx_queues; char sze_dev[PATH_MAX]; + struct rte_mem_resource *pci_rsc; }; static struct ether_addr eth_addr = { @@ -1030,6 +1031,7 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->if_index = 0; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)-1; @@ -1144,8 +1146,10 @@ eth_link_update(struct rte_eth_dev *dev, struct rte_eth_link link; struct rte_eth_link *link_ptr = &link; struct rte_eth_link *dev_link = &dev->data->dev_link; + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); switch (cgmii_link_speed(ibuf)) { @@ -1180,11 +1184,13 @@ eth_link_update(struct rte_eth_dev *dev, static int eth_dev_set_link_up(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); volatile struct szedata2_cgmii_obuf *obuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_OBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_OBUF_BASE_OFF, volatile struct szedata2_cgmii_obuf *); cgmii_ibuf_enable(ibuf); @@ -1195,11 +1201,13 @@ eth_dev_set_link_up(struct rte_eth_dev *dev) static int eth_dev_set_link_down(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); volatile struct szedata2_cgmii_obuf *obuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_OBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_OBUF_BASE_OFF, volatile struct szedata2_cgmii_obuf *); cgmii_ibuf_disable(ibuf); @@ -1281,8 +1289,10 @@ eth_mac_addr_set(struct rte_eth_dev *dev __rte_unused, static void eth_promiscuous_enable(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_PROMISC); } @@ -1290,8 +1300,10 @@ eth_promiscuous_enable(struct rte_eth_dev *dev) static void eth_promiscuous_disable(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_ONLY_VALID); } @@ -1299,8 +1311,10 @@ eth_promiscuous_disable(struct rte_eth_dev *dev) static void eth_allmulticast_enable(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_ALL_MULTICAST); } @@ -1308,8 +1322,10 @@ eth_allmulticast_enable(struct rte_eth_dev *dev) static void eth_allmulticast_disable(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_ONLY_VALID); } @@ -1349,7 +1365,7 @@ static const struct eth_dev_ops ops = { * -1 on error */ static int -get_szedata2_index(struct rte_eth_dev *dev, uint32_t *index) +get_szedata2_index(const struct rte_pci_addr *pcislot_addr, uint32_t *index) { DIR *dir; struct dirent *entry; @@ -1357,7 +1373,6 @@ get_szedata2_index(struct rte_eth_dev *dev, uint32_t *index) uint32_t tmp_index; FILE *fd; char pcislot_path[PATH_MAX]; - struct rte_pci_addr pcislot_addr = dev->pci_dev->addr; uint32_t domain; uint32_t bus; uint32_t devid; @@ -1392,10 +1407,10 @@ get_szedata2_index(struct rte_eth_dev *dev, uint32_t *index) if (ret != 4) continue; - if (pcislot_addr.domain == domain && - pcislot_addr.bus == bus && - pcislot_addr.devid == devid && - pcislot_addr.function == function) { + if (pcislot_addr->domain == domain && + pcislot_addr->bus == bus && + pcislot_addr->devid == devid && + pcislot_addr->function == function) { *index = tmp_index; closedir(dir); return 0; @@ -1415,9 +1430,10 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) struct szedata *szedata_temp; int ret; uint32_t szedata2_index; - struct rte_pci_addr *pci_addr = &dev->pci_dev->addr; - struct rte_pci_resource *pci_rsc = - &dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER]; + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + struct rte_pci_addr *pci_addr = &pci_dev->addr; + struct rte_mem_resource *pci_rsc = + &pci_dev->mem_resource[PCI_RESOURCE_NUMBER]; char rsc_filename[PATH_MAX]; void *pci_resource_ptr = NULL; int fd; @@ -1427,7 +1443,7 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) pci_addr->function); /* Get index of szedata2 device file and create path to device file */ - ret = get_szedata2_index(dev, &szedata2_index); + ret = get_szedata2_index(pci_addr, &szedata2_index); if (ret != 0) { RTE_LOG(ERR, PMD, "Failed to get szedata2 device index!\n"); return -ENODEV; @@ -1471,10 +1487,10 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) /* Set function callbacks for Ethernet API */ dev->dev_ops = &ops; - rte_eth_copy_pci_info(dev, dev->pci_dev); + rte_eth_copy_pci_info(dev, pci_dev); - /* mmap pci resource0 file to rte_pci_resource structure */ - if (dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].phys_addr == + /* mmap pci resource0 file to rte_mem_resource structure */ + if (pci_dev->mem_resource[PCI_RESOURCE_NUMBER].phys_addr == 0) { RTE_LOG(ERR, PMD, "Missing resource%u file\n", PCI_RESOURCE_NUMBER); @@ -1491,7 +1507,7 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) } pci_resource_ptr = mmap(0, - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len, + pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); if (pci_resource_ptr == NULL) { @@ -1499,8 +1515,8 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) rsc_filename, fd); return -EINVAL; } - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr = - pci_resource_ptr; + pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr = pci_resource_ptr; + internals->pci_rsc = pci_rsc; RTE_LOG(DEBUG, PMD, "resource%u phys_addr = 0x%llx len = %llu " "virt addr = %llx\n", PCI_RESOURCE_NUMBER, @@ -1516,8 +1532,8 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) RTE_CACHE_LINE_SIZE); if (data->mac_addrs == NULL) { RTE_LOG(ERR, PMD, "Could not alloc space for MAC address!\n"); - munmap(dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr, - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len); + munmap(pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr, + pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len); return -EINVAL; } @@ -1537,12 +1553,13 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) static int rte_szedata2_eth_dev_uninit(struct rte_eth_dev *dev) { - struct rte_pci_addr *pci_addr = &dev->pci_dev->addr; + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + struct rte_pci_addr *pci_addr = &pci_dev->addr; rte_free(dev->data->mac_addrs); dev->data->mac_addrs = NULL; - munmap(dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr, - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len); + munmap(pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr, + pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len); RTE_LOG(INFO, PMD, "szedata2 device (" PCI_PRI_FMT ") successfully uninitialized\n", @@ -1572,33 +1589,16 @@ static const struct rte_pci_id rte_szedata2_pci_id_table[] = { static struct eth_driver szedata2_eth_driver = { .pci_drv = { - .name = RTE_SZEDATA2_PCI_DRIVER_NAME, .id_table = rte_szedata2_pci_id_table, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = rte_szedata2_eth_dev_init, .eth_dev_uninit = rte_szedata2_eth_dev_uninit, .dev_private_size = sizeof(struct pmd_internals), }; -static int -rte_szedata2_init(const char *name __rte_unused, - const char *args __rte_unused) -{ - rte_eth_driver_register(&szedata2_eth_driver); - return 0; -} - -static int -rte_szedata2_uninit(const char *name __rte_unused) -{ - return 0; -} - -static struct rte_driver rte_szedata2_driver = { - .type = PMD_PDEV, - .init = rte_szedata2_init, - .uninit = rte_szedata2_uninit, -}; - -PMD_REGISTER_DRIVER(rte_szedata2_driver, RTE_SZEDATA2_DRIVER_NAME); -DRIVER_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME, rte_szedata2_pci_id_table); +RTE_PMD_REGISTER_PCI(RTE_SZEDATA2_DRIVER_NAME, szedata2_eth_driver.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME, rte_szedata2_pci_id_table); +RTE_PMD_REGISTER_KMOD_DEP(RTE_SZEDATA2_DRIVER_NAME, + "* combo6core & combov3 & szedata2 & szedata2_cv3"); diff --git a/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h b/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h index 522cf47f..afe8a383 100644 --- a/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h +++ b/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h @@ -117,94 +117,82 @@ struct szedata { * @return Byte from PCI resource at offset "offset". */ static inline uint8_t -pci_resource_read8(struct rte_eth_dev *dev, uint32_t offset) +pci_resource_read8(struct rte_mem_resource *rsc, uint32_t offset) { - return *((uint8_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)); + return *((uint8_t *)((uint8_t *)rsc->addr + offset)); } /* * @return Two bytes from PCI resource starting at offset "offset". */ static inline uint16_t -pci_resource_read16(struct rte_eth_dev *dev, uint32_t offset) +pci_resource_read16(struct rte_mem_resource *rsc, uint32_t offset) { - return rte_le_to_cpu_16(*((uint16_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset))); + return rte_le_to_cpu_16(*((uint16_t *)((uint8_t *)rsc->addr + + offset))); } /* * @return Four bytes from PCI resource starting at offset "offset". */ static inline uint32_t -pci_resource_read32(struct rte_eth_dev *dev, uint32_t offset) +pci_resource_read32(struct rte_mem_resource *rsc, uint32_t offset) { - return rte_le_to_cpu_32(*((uint32_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset))); + return rte_le_to_cpu_32(*((uint32_t *)((uint8_t *)rsc->addr + + offset))); } /* * @return Eight bytes from PCI resource starting at offset "offset". */ static inline uint64_t -pci_resource_read64(struct rte_eth_dev *dev, uint32_t offset) +pci_resource_read64(struct rte_mem_resource *rsc, uint32_t offset) { - return rte_le_to_cpu_64(*((uint64_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset))); + return rte_le_to_cpu_64(*((uint64_t *)((uint8_t *)rsc->addr + + offset))); } /* * Write one byte to PCI resource address space at offset "offset". */ static inline void -pci_resource_write8(struct rte_eth_dev *dev, uint32_t offset, uint8_t val) +pci_resource_write8(struct rte_mem_resource *rsc, uint32_t offset, uint8_t val) { - *((uint8_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)) = val; + *((uint8_t *)((uint8_t *)rsc->addr + offset)) = val; } /* * Write two bytes to PCI resource address space at offset "offset". */ static inline void -pci_resource_write16(struct rte_eth_dev *dev, uint32_t offset, uint16_t val) +pci_resource_write16(struct rte_mem_resource *rsc, uint32_t offset, + uint16_t val) { - *((uint16_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)) = rte_cpu_to_le_16(val); + *((uint16_t *)((uint8_t *)rsc->addr + offset)) = rte_cpu_to_le_16(val); } /* * Write four bytes to PCI resource address space at offset "offset". */ static inline void -pci_resource_write32(struct rte_eth_dev *dev, uint32_t offset, uint32_t val) +pci_resource_write32(struct rte_mem_resource *rsc, uint32_t offset, + uint32_t val) { - *((uint32_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)) = rte_cpu_to_le_32(val); + *((uint32_t *)((uint8_t *)rsc->addr + offset)) = rte_cpu_to_le_32(val); } /* * Write eight bytes to PCI resource address space at offset "offset". */ static inline void -pci_resource_write64(struct rte_eth_dev *dev, uint32_t offset, uint64_t val) +pci_resource_write64(struct rte_mem_resource *rsc, uint32_t offset, + uint64_t val) { - *((uint64_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)) = rte_cpu_to_le_64(val); + *((uint64_t *)((uint8_t *)rsc->addr + offset)) = rte_cpu_to_le_64(val); } -#define SZEDATA2_PCI_RESOURCE_PTR(dev, offset, type) \ - ((type)((uint8_t *) \ - ((dev)->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr) \ - + (offset))) +#define SZEDATA2_PCI_RESOURCE_PTR(rsc, offset, type) \ + ((type)(((uint8_t *)(rsc)->addr) + (offset))) enum szedata2_link_speed { SZEDATA2_LINK_SPEED_DEFAULT = 0, diff --git a/src/dpdk/drivers/net/virtio/virtio_ethdev.c b/src/dpdk/drivers/net/virtio/virtio_ethdev.c index 35e67b90..1d572b5d 100644 --- a/src/dpdk/drivers/net/virtio/virtio_ethdev.c +++ b/src/dpdk/drivers/net/virtio/virtio_ethdev.c @@ -103,7 +103,8 @@ static int virtio_dev_queue_stats_mapping_set( * The set of PCI devices this driver supports */ static const struct rte_pci_id pci_id_virtio_map[] = { - { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_DEVICEID_MIN) }, + { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) }, + { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) }, { .vendor_id = 0, /* sentinel */ }, }; @@ -125,8 +126,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = { {"size_128_255_packets", offsetof(struct virtnet_rx, stats.size_bins[3])}, {"size_256_511_packets", offsetof(struct virtnet_rx, stats.size_bins[4])}, {"size_512_1023_packets", offsetof(struct virtnet_rx, stats.size_bins[5])}, - {"size_1024_1517_packets", offsetof(struct virtnet_rx, stats.size_bins[6])}, - {"size_1518_max_packets", offsetof(struct virtnet_rx, stats.size_bins[7])}, + {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])}, + {"size_1519_max_packets", offsetof(struct virtnet_rx, stats.size_bins[7])}, }; /* [rt]x_qX_ is prepended to the name string here */ @@ -142,8 +143,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = { {"size_128_255_packets", offsetof(struct virtnet_tx, stats.size_bins[3])}, {"size_256_511_packets", offsetof(struct virtnet_tx, stats.size_bins[4])}, {"size_512_1023_packets", offsetof(struct virtnet_tx, stats.size_bins[5])}, - {"size_1024_1517_packets", offsetof(struct virtnet_tx, stats.size_bins[6])}, - {"size_1518_max_packets", offsetof(struct virtnet_tx, stats.size_bins[7])}, + {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])}, + {"size_1519_max_packets", offsetof(struct virtnet_tx, stats.size_bins[7])}, }; #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \ @@ -151,6 +152,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = { #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \ sizeof(rte_virtio_txq_stat_strings[0])) +struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS]; + static int virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, int *dlen, int pkt_num) @@ -279,28 +282,65 @@ virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues) return 0; } -void -virtio_dev_queue_release(struct virtqueue *vq) +static void +virtio_dev_queue_release(void *queue __rte_unused) { - struct virtio_hw *hw; + /* do nothing */ +} - if (vq) { - hw = vq->hw; - if (vq->configured) - hw->vtpci_ops->del_queue(hw, vq); +static int +virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx) +{ + if (vtpci_queue_idx == hw->max_queue_pairs * 2) + return VTNET_CQ; + else if (vtpci_queue_idx % 2 == 0) + return VTNET_RQ; + else + return VTNET_TQ; +} - rte_free(vq->sw_ring); - rte_free(vq); - } +static uint16_t +virtio_get_nr_vq(struct virtio_hw *hw) +{ + uint16_t nr_vq = hw->max_queue_pairs * 2; + + if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) + nr_vq += 1; + + return nr_vq; +} + +static void +virtio_init_vring(struct virtqueue *vq) +{ + int size = vq->vq_nentries; + struct vring *vr = &vq->vq_ring; + uint8_t *ring_mem = vq->vq_ring_virt_mem; + + PMD_INIT_FUNC_TRACE(); + + /* + * Reinitialise since virtio port might have been stopped and restarted + */ + memset(ring_mem, 0, vq->vq_ring_size); + vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN); + vq->vq_used_cons_idx = 0; + vq->vq_desc_head_idx = 0; + vq->vq_avail_idx = 0; + vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); + vq->vq_free_cnt = vq->vq_nentries; + memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); + + vring_desc_init(vr->desc, size); + + /* + * Disable device(host) interrupting guest + */ + virtqueue_disable_intr(vq); } -int virtio_dev_queue_setup(struct rte_eth_dev *dev, - int queue_type, - uint16_t queue_idx, - uint16_t vtpci_queue_idx, - uint16_t nb_desc, - unsigned int socket_id, - void **pvq) +static int +virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) { char vq_name[VIRTQUEUE_MAX_NAME_SZ]; char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ]; @@ -311,9 +351,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, struct virtnet_tx *txvq = NULL; struct virtnet_ctl *cvq = NULL; struct virtqueue *vq; - const char *queue_names[] = {"rvq", "txq", "cvq"}; - size_t sz_vq, sz_q = 0, sz_hdr_mz = 0; + size_t sz_hdr_mz = 0; void *sw_ring = NULL; + int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx); int ret; PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx); @@ -322,8 +362,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, * Read the virtqueue size from the Queue Size field * Always power of 2 and if 0 virtqueue does not exist */ - vq_size = hw->vtpci_ops->get_queue_num(hw, vtpci_queue_idx); - PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc); + vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx); + PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size); if (vq_size == 0) { PMD_INIT_LOG(ERR, "virtqueue does not exist"); return -EINVAL; @@ -334,40 +374,35 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } - snprintf(vq_name, sizeof(vq_name), "port%d_%s%d", - dev->data->port_id, queue_names[queue_type], queue_idx); + snprintf(vq_name, sizeof(vq_name), "port%d_vq%d", + dev->data->port_id, vtpci_queue_idx); - sz_vq = RTE_ALIGN_CEIL(sizeof(*vq) + + size = RTE_ALIGN_CEIL(sizeof(*vq) + vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - if (queue_type == VTNET_RQ) { - sz_q = sz_vq + sizeof(*rxvq); - } else if (queue_type == VTNET_TQ) { - sz_q = sz_vq + sizeof(*txvq); + if (queue_type == VTNET_TQ) { /* * For each xmit packet, allocate a virtio_net_hdr * and indirect ring elements */ sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region); } else if (queue_type == VTNET_CQ) { - sz_q = sz_vq + sizeof(*cvq); /* Allocate a page for control vq command, data and status */ sz_hdr_mz = PAGE_SIZE; } - vq = rte_zmalloc_socket(vq_name, sz_q, RTE_CACHE_LINE_SIZE, socket_id); + vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE, + SOCKET_ID_ANY); if (vq == NULL) { PMD_INIT_LOG(ERR, "can not allocate vq"); return -ENOMEM; } + hw->vqs[vtpci_queue_idx] = vq; + vq->hw = hw; vq->vq_queue_index = vtpci_queue_idx; vq->vq_nentries = vq_size; - if (nb_desc == 0 || nb_desc > vq_size) - nb_desc = vq_size; - vq->vq_free_cnt = nb_desc; - /* * Reserve a memzone for vring elements */ @@ -376,7 +411,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size); - mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, socket_id, + mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, + SOCKET_ID_ANY, 0, VIRTIO_PCI_VRING_ALIGN); if (mz == NULL) { if (rte_errno == EEXIST) @@ -396,12 +432,13 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64, (uint64_t)(uintptr_t)mz->addr); + virtio_init_vring(vq); + if (sz_hdr_mz) { - snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_%s%d_hdr", - dev->data->port_id, queue_names[queue_type], - queue_idx); + snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr", + dev->data->port_id, vtpci_queue_idx); hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz, - socket_id, 0, + SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE); if (hdr_mz == NULL) { if (rte_errno == EEXIST) @@ -418,7 +455,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, sizeof(vq->sw_ring[0]); sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, - RTE_CACHE_LINE_SIZE, socket_id); + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); if (!sw_ring) { PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); ret = -ENOMEM; @@ -426,37 +463,33 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } vq->sw_ring = sw_ring; - rxvq = (struct virtnet_rx *)RTE_PTR_ADD(vq, sz_vq); + rxvq = &vq->rxq; rxvq->vq = vq; rxvq->port_id = dev->data->port_id; - rxvq->queue_id = queue_idx; rxvq->mz = mz; - *pvq = rxvq; } else if (queue_type == VTNET_TQ) { - txvq = (struct virtnet_tx *)RTE_PTR_ADD(vq, sz_vq); + txvq = &vq->txq; txvq->vq = vq; txvq->port_id = dev->data->port_id; - txvq->queue_id = queue_idx; txvq->mz = mz; txvq->virtio_net_hdr_mz = hdr_mz; txvq->virtio_net_hdr_mem = hdr_mz->phys_addr; - - *pvq = txvq; } else if (queue_type == VTNET_CQ) { - cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq); + cvq = &vq->cq; cvq->vq = vq; cvq->mz = mz; cvq->virtio_net_hdr_mz = hdr_mz; cvq->virtio_net_hdr_mem = hdr_mz->phys_addr; memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); - *pvq = cvq; + + hw->cvq = cvq; } - /* For virtio_user case (that is when dev->pci_dev is NULL), we use + /* For virtio_user case (that is when hw->dev is NULL), we use * virtual address. And we need properly set _offset_, please see * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information. */ - if (dev->pci_dev) + if (!hw->virtio_user_dev) vq->offset = offsetof(struct rte_mbuf, buf_physaddr); else { vq->vq_ring_mem = (uintptr_t)mz->addr; @@ -488,13 +521,11 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } } - if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { + if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) { PMD_INIT_LOG(ERR, "setup_queue failed"); - virtio_dev_queue_release(vq); return -EINVAL; } - vq->configured = 1; return 0; fail_q_alloc: @@ -506,58 +537,88 @@ fail_q_alloc: return ret; } -static int -virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx, - uint32_t socket_id) +static void +virtio_free_queues(struct virtio_hw *hw) { - struct virtnet_ctl *cvq; - int ret; - struct virtio_hw *hw = dev->data->dev_private; + uint16_t nr_vq = virtio_get_nr_vq(hw); + struct virtqueue *vq; + int queue_type; + uint16_t i; - PMD_INIT_FUNC_TRACE(); - ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX, - vtpci_queue_idx, 0, socket_id, (void **)&cvq); - if (ret < 0) { - PMD_INIT_LOG(ERR, "control vq initialization failed"); - return ret; + for (i = 0; i < nr_vq; i++) { + vq = hw->vqs[i]; + if (!vq) + continue; + + queue_type = virtio_get_queue_type(hw, i); + if (queue_type == VTNET_RQ) { + rte_free(vq->sw_ring); + rte_memzone_free(vq->rxq.mz); + } else if (queue_type == VTNET_TQ) { + rte_memzone_free(vq->txq.mz); + rte_memzone_free(vq->txq.virtio_net_hdr_mz); + } else { + rte_memzone_free(vq->cq.mz); + rte_memzone_free(vq->cq.virtio_net_hdr_mz); + } + + rte_free(vq); } - hw->cvq = cvq; - return 0; + rte_free(hw->vqs); } -static void -virtio_free_queues(struct rte_eth_dev *dev) +static int +virtio_alloc_queues(struct rte_eth_dev *dev) { - unsigned int i; - - for (i = 0; i < dev->data->nb_rx_queues; i++) - virtio_dev_rx_queue_release(dev->data->rx_queues[i]); + struct virtio_hw *hw = dev->data->dev_private; + uint16_t nr_vq = virtio_get_nr_vq(hw); + uint16_t i; + int ret; - dev->data->nb_rx_queues = 0; + hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0); + if (!hw->vqs) { + PMD_INIT_LOG(ERR, "failed to allocate vqs"); + return -ENOMEM; + } - for (i = 0; i < dev->data->nb_tx_queues; i++) - virtio_dev_tx_queue_release(dev->data->tx_queues[i]); + for (i = 0; i < nr_vq; i++) { + ret = virtio_init_queue(dev, i); + if (ret < 0) { + virtio_free_queues(hw); + return ret; + } + } - dev->data->nb_tx_queues = 0; + return 0; } +static void virtio_queues_unbind_intr(struct rte_eth_dev *dev); + static void virtio_dev_close(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; + struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; PMD_INIT_LOG(DEBUG, "virtio_dev_close"); - if (hw->started == 1) - virtio_dev_stop(dev); - /* reset the NIC */ if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); + VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR); + if (intr_conf->rxq) + virtio_queues_unbind_intr(dev); + + if (intr_conf->lsc || intr_conf->rxq) { + rte_intr_disable(dev->intr_handle); + rte_intr_efd_disable(dev->intr_handle); + rte_free(dev->intr_handle->intr_vec); + dev->intr_handle->intr_vec = NULL; + } + vtpci_reset(hw); virtio_dev_free_mbufs(dev); - virtio_free_queues(dev); + virtio_free_queues(hw); } static void @@ -569,7 +630,7 @@ virtio_dev_promiscuous_enable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -592,7 +653,7 @@ virtio_dev_promiscuous_disable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -615,7 +676,7 @@ virtio_dev_allmulticast_enable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -638,7 +699,7 @@ virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -652,6 +713,43 @@ virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) PMD_INIT_LOG(ERR, "Failed to disable allmulticast"); } +#define VLAN_TAG_LEN 4 /* 802.3ac tag (not DMA'd) */ +static int +virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct virtio_hw *hw = dev->data->dev_private; + uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN + + hw->vtnet_hdr_size; + uint32_t frame_size = mtu + ether_hdr_len; + + if (mtu < ETHER_MIN_MTU || frame_size > VIRTIO_MAX_RX_PKTLEN) { + PMD_INIT_LOG(ERR, "MTU should be between %d and %d", + ETHER_MIN_MTU, VIRTIO_MAX_RX_PKTLEN - ether_hdr_len); + return -EINVAL; + } + return 0; +} + +static int +virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; + struct virtqueue *vq = rxvq->vq; + + virtqueue_enable_intr(vq); + return 0; +} + +static int +virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; + struct virtqueue *vq = rxvq->vq; + + virtqueue_disable_intr(vq); + return 0; +} + /* * dev_ops for virtio, bare necessities for basic operation */ @@ -664,7 +762,7 @@ static const struct eth_dev_ops virtio_eth_dev_ops = { .promiscuous_disable = virtio_dev_promiscuous_disable, .allmulticast_enable = virtio_dev_allmulticast_enable, .allmulticast_disable = virtio_dev_allmulticast_disable, - + .mtu_set = virtio_mtu_set, .dev_infos_get = virtio_dev_info_get, .stats_get = virtio_dev_stats_get, .xstats_get = virtio_dev_xstats_get, @@ -673,9 +771,12 @@ static const struct eth_dev_ops virtio_eth_dev_ops = { .xstats_reset = virtio_dev_stats_reset, .link_update = virtio_dev_link_update, .rx_queue_setup = virtio_dev_rx_queue_setup, - .rx_queue_release = virtio_dev_rx_queue_release, + .rx_queue_intr_enable = virtio_dev_rx_queue_intr_enable, + .rx_queue_intr_disable = virtio_dev_rx_queue_intr_disable, + .rx_queue_release = virtio_dev_queue_release, + .rx_descriptor_done = virtio_dev_rx_queue_done, .tx_queue_setup = virtio_dev_tx_queue_setup, - .tx_queue_release = virtio_dev_tx_queue_release, + .tx_queue_release = virtio_dev_queue_release, /* collect stats per queue */ .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set, .vlan_filter_set = virtio_vlan_filter_set, @@ -830,6 +931,7 @@ virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { xstats[count].value = *(uint64_t *)(((char *)rxvq) + rte_virtio_rxq_stat_strings[t].offset); + xstats[count].id = count; count++; } } @@ -845,6 +947,7 @@ virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { xstats[count].value = *(uint64_t *)(((char *)txvq) + rte_virtio_txq_stat_strings[t].offset); + xstats[count].id = count; count++; } } @@ -1042,17 +1145,16 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) } static int -virtio_negotiate_features(struct virtio_hw *hw) +virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features) { uint64_t host_features; /* Prepare guest_features: feature that driver wants to support */ - hw->guest_features = VIRTIO_PMD_GUEST_FEATURES; PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64, - hw->guest_features); + req_features); /* Read device(host) feature bits */ - host_features = hw->vtpci_ops->get_features(hw); + host_features = VTPCI_OPS(hw)->get_features(hw); PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64, host_features); @@ -1060,6 +1162,7 @@ virtio_negotiate_features(struct virtio_hw *hw) * Negotiate features: Subset of device feature bits are written back * guest feature bits. */ + hw->guest_features = req_features; hw->guest_features = vtpci_negotiate_features(hw, host_features); PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64, hw->guest_features); @@ -1078,6 +1181,8 @@ virtio_negotiate_features(struct virtio_hw *hw) } } + hw->req_guest_features = req_features; + return 0; } @@ -1086,7 +1191,7 @@ virtio_negotiate_features(struct virtio_hw *hw) * if link state changed. */ static void -virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle, +virtio_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = param; @@ -1097,13 +1202,13 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle, isr = vtpci_isr(hw); PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); - if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) + if (rte_intr_enable(handle) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); if (isr & VIRTIO_PCI_ISR_CONFIG) { if (virtio_dev_link_update(dev, 0) == 0) _rte_eth_dev_callback_process(dev, - RTE_ETH_EVENT_INTR_LSC); + RTE_ETH_EVENT_INTR_LSC, NULL); } } @@ -1118,47 +1223,105 @@ rx_func_get(struct rte_eth_dev *eth_dev) eth_dev->rx_pkt_burst = &virtio_recv_pkts; } -/* - * This function is based on probe() function in virtio_pci.c - * It returns 0 on success. +/* Only support 1:1 queue/interrupt mapping so far. + * TODO: support n:1 queue/interrupt mapping when there are limited number of + * interrupt vectors (data->dev_private; - struct virtio_net_config *config; - struct virtio_net_config local_config; - struct rte_pci_device *pci_dev; - uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE; - int ret; + uint32_t i; + struct virtio_hw *hw = dev->data->dev_private; - RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf)); + PMD_INIT_LOG(INFO, "queue/interrupt binding"); + for (i = 0; i < dev->data->nb_rx_queues; ++i) { + dev->intr_handle->intr_vec[i] = i + 1; + if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) == + VIRTIO_MSI_NO_VECTOR) { + PMD_DRV_LOG(ERR, "failed to set queue vector"); + return -EBUSY; + } + } - eth_dev->dev_ops = &virtio_eth_dev_ops; - eth_dev->tx_pkt_burst = &virtio_xmit_pkts; + return 0; +} - if (rte_eal_process_type() == RTE_PROC_SECONDARY) { - rx_func_get(eth_dev); - return 0; +static void +virtio_queues_unbind_intr(struct rte_eth_dev *dev) +{ + uint32_t i; + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_LOG(INFO, "queue/interrupt unbinding"); + for (i = 0; i < dev->data->nb_rx_queues; ++i) + VTPCI_OPS(hw)->set_queue_irq(hw, + hw->vqs[i * VTNET_CQ], + VIRTIO_MSI_NO_VECTOR); +} + +static int +virtio_configure_intr(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (!rte_intr_cap_multiple(dev->intr_handle)) { + PMD_INIT_LOG(ERR, "Multiple intr vector not supported"); + return -ENOTSUP; } - /* Allocate memory for storing MAC addresses */ - eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0); - if (eth_dev->data->mac_addrs == NULL) { - PMD_INIT_LOG(ERR, - "Failed to allocate %d bytes needed to store MAC addresses", - VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN); - return -ENOMEM; + if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) { + PMD_INIT_LOG(ERR, "Fail to create eventfd"); + return -1; } - pci_dev = eth_dev->pci_dev; + if (!dev->intr_handle->intr_vec) { + dev->intr_handle->intr_vec = + rte_zmalloc("intr_vec", + hw->max_queue_pairs * sizeof(int), 0); + if (!dev->intr_handle->intr_vec) { + PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors", + hw->max_queue_pairs); + return -ENOMEM; + } + } - if (pci_dev) { - ret = vtpci_init(pci_dev, hw, &dev_flags); - if (ret) - return ret; + /* Re-register callback to update max_intr */ + rte_intr_callback_unregister(dev->intr_handle, + virtio_interrupt_handler, + dev); + rte_intr_callback_register(dev->intr_handle, + virtio_interrupt_handler, + dev); + + /* DO NOT try to remove this! This function will enable msix, or QEMU + * will encounter SIGSEGV when DRIVER_OK is sent. + * And for legacy devices, this should be done before queue/vec binding + * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR + * (22) will be ignored. + */ + if (rte_intr_enable(dev->intr_handle) < 0) { + PMD_DRV_LOG(ERR, "interrupt enable failed"); + return -1; } + if (virtio_queues_bind_intr(dev) < 0) { + PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt"); + return -1; + } + + return 0; +} + +/* reset device and renegotiate features if needed */ +static int +virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) +{ + struct virtio_hw *hw = eth_dev->data->dev_private; + struct virtio_net_config *config; + struct virtio_net_config local_config; + struct rte_pci_device *pci_dev = NULL; + int ret; + /* Reset the device although not necessary at startup */ vtpci_reset(hw); @@ -1167,15 +1330,19 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) /* Tell the host we've known how to drive the device. */ vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); - if (virtio_negotiate_features(hw) < 0) + if (virtio_negotiate_features(hw, req_features) < 0) return -1; + if (eth_dev->device) { + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); + rte_eth_copy_pci_info(eth_dev, pci_dev); + } + /* If host does not support status then disable LSC */ if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) - dev_flags &= ~RTE_ETH_DEV_INTR_LSC; - - rte_eth_copy_pci_info(eth_dev, pci_dev); - eth_dev->data->dev_flags = dev_flags; + eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; + else + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; rx_func_get(eth_dev); @@ -1223,16 +1390,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) config->max_virtqueue_pairs = 1; } - hw->max_rx_queues = - (VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ? - VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs; - hw->max_tx_queues = - (VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ? - VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs; - - virtio_dev_cq_queue_setup(eth_dev, - config->max_virtqueue_pairs * 2, - SOCKET_ID_ANY); + hw->max_queue_pairs = config->max_virtqueue_pairs; PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d", config->max_virtqueue_pairs); @@ -1243,23 +1401,142 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) config->mac[2], config->mac[3], config->mac[4], config->mac[5]); } else { - hw->max_rx_queues = 1; - hw->max_tx_queues = 1; + PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1"); + hw->max_queue_pairs = 1; } - PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d", - hw->max_rx_queues, hw->max_tx_queues); + ret = virtio_alloc_queues(eth_dev); + if (ret < 0) + return ret; + + if (eth_dev->data->dev_conf.intr_conf.rxq) { + if (virtio_configure_intr(eth_dev) < 0) { + PMD_INIT_LOG(ERR, "failed to configure interrupt"); + return -1; + } + } + + vtpci_reinit_complete(hw); + if (pci_dev) PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); + return 0; +} + +/* + * Remap the PCI device again (IO port map for legacy device and + * memory map for modern device), so that the secondary process + * could have the PCI initiated correctly. + */ +static int +virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw) +{ + if (hw->modern) { + /* + * We don't have to re-parse the PCI config space, since + * rte_eal_pci_map_device() makes sure the mapped address + * in secondary process would equal to the one mapped in + * the primary process: error will be returned if that + * requirement is not met. + * + * That said, we could simply reuse all cap pointers + * (such as dev_cfg, common_cfg, etc.) parsed from the + * primary process, which is stored in shared memory. + */ + if (rte_eal_pci_map_device(pci_dev)) { + PMD_INIT_LOG(DEBUG, "failed to map pci device!"); + return -1; + } + } else { + if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) + return -1; + } + + return 0; +} + +static void +virtio_set_vtpci_ops(struct virtio_hw *hw) +{ +#ifdef RTE_VIRTIO_USER + if (hw->virtio_user_dev) + VTPCI_OPS(hw) = &virtio_user_ops; + else +#endif + if (hw->modern) + VTPCI_OPS(hw) = &modern_ops; + else + VTPCI_OPS(hw) = &legacy_ops; +} + +/* + * This function is based on probe() function in virtio_pci.c + * It returns 0 on success. + */ +int +eth_virtio_dev_init(struct rte_eth_dev *eth_dev) +{ + struct virtio_hw *hw = eth_dev->data->dev_private; + uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE; + int ret; + + RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf)); + + eth_dev->dev_ops = &virtio_eth_dev_ops; + eth_dev->tx_pkt_burst = &virtio_xmit_pkts; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + if (!hw->virtio_user_dev) { + ret = virtio_remap_pci(RTE_DEV_TO_PCI(eth_dev->device), + hw); + if (ret) + return ret; + } + + virtio_set_vtpci_ops(hw); + if (hw->use_simple_rxtx) { + eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple; + eth_dev->rx_pkt_burst = virtio_recv_pkts_vec; + } else { + rx_func_get(eth_dev); + } + return 0; + } + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, + "Failed to allocate %d bytes needed to store MAC addresses", + VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN); + return -ENOMEM; + } + + hw->port_id = eth_dev->data->port_id; + /* For virtio_user case the hw->virtio_user_dev is populated by + * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called. + */ + if (!hw->virtio_user_dev) { + ret = vtpci_init(RTE_DEV_TO_PCI(eth_dev->device), hw, + &dev_flags); + if (ret) + return ret; + } + + eth_dev->data->dev_flags = dev_flags; + + /* reset device and negotiate default features */ + ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES); + if (ret < 0) + return ret; + /* Setup interrupt callback */ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - rte_intr_callback_register(&pci_dev->intr_handle, - virtio_interrupt_handler, eth_dev); - - virtio_dev_cq_start(eth_dev); + rte_intr_callback_register(eth_dev->intr_handle, + virtio_interrupt_handler, eth_dev); return 0; } @@ -1267,35 +1544,28 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; - struct virtio_hw *hw = eth_dev->data->dev_private; - PMD_INIT_FUNC_TRACE(); if (rte_eal_process_type() == RTE_PROC_SECONDARY) return -EPERM; - /* Close it anyway since there's no way to know if closed */ + virtio_dev_stop(eth_dev); virtio_dev_close(eth_dev); - pci_dev = eth_dev->pci_dev; - eth_dev->dev_ops = NULL; eth_dev->tx_pkt_burst = NULL; eth_dev->rx_pkt_burst = NULL; - if (hw->cvq) - virtio_dev_queue_release(hw->cvq->vq); - rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; /* reset interrupt callback */ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - rte_intr_callback_unregister(&pci_dev->intr_handle, + rte_intr_callback_unregister(eth_dev->intr_handle, virtio_interrupt_handler, eth_dev); - rte_eal_pci_unmap_device(pci_dev); + if (eth_dev->device) + rte_eal_pci_unmap_device(RTE_DEV_TO_PCI(eth_dev->device)); PMD_INIT_LOG(DEBUG, "dev_uninit completed"); @@ -1304,32 +1574,29 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) static struct eth_driver rte_virtio_pmd = { .pci_drv = { - .name = "rte_virtio_pmd", + .driver = { + .name = "net_virtio", + }, .id_table = pci_id_virtio_map, - .drv_flags = RTE_PCI_DRV_DETACHABLE, + .drv_flags = 0, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_virtio_dev_init, .eth_dev_uninit = eth_virtio_dev_uninit, .dev_private_size = sizeof(struct virtio_hw), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI virtio devices. - * Returns 0 on success. - */ -static int -rte_virtio_pmd_init(const char *name __rte_unused, - const char *param __rte_unused) +RTE_INIT(rte_virtio_pmd_init); +static void +rte_virtio_pmd_init(void) { if (rte_eal_iopl_init() != 0) { PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD"); - return -1; + return; } - rte_eth_driver_register(&rte_virtio_pmd); - return 0; + rte_eal_pci_register(&rte_virtio_pmd.pci_drv); } /* @@ -1341,14 +1608,44 @@ virtio_dev_configure(struct rte_eth_dev *dev) { const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; struct virtio_hw *hw = dev->data->dev_private; + uint64_t req_features; + int ret; PMD_INIT_LOG(DEBUG, "configure"); + req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES; + if (rxmode->hw_ip_checksum) + req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM); + if (rxmode->enable_lro) + req_features |= + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | + (1ULL << VIRTIO_NET_F_GUEST_TSO6); + + /* if request features changed, reinit the device */ + if (req_features != hw->req_guest_features) { + ret = virtio_init_device(dev, req_features); + if (ret < 0) + return ret; + } - if (rxmode->hw_ip_checksum) { - PMD_DRV_LOG(ERR, "HW IP checksum not supported"); - return -EINVAL; + if (rxmode->hw_ip_checksum && + !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) { + PMD_DRV_LOG(NOTICE, + "rx ip checksum not available on this host"); + return -ENOTSUP; + } + + if (rxmode->enable_lro && + (!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || + !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4))) { + PMD_DRV_LOG(NOTICE, + "lro not available on this host"); + return -ENOTSUP; } + /* start control queue */ + if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) + virtio_dev_cq_start(dev); + hw->vlan_strip = rxmode->hw_vlan_strip; if (rxmode->hw_vlan_filter @@ -1359,7 +1656,9 @@ virtio_dev_configure(struct rte_eth_dev *dev) } if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) { + /* Enable vector (0) for Link State Intrerrupt */ + if (VTPCI_OPS(hw)->set_config_irq(hw, 0) == + VIRTIO_MSI_NO_VECTOR) { PMD_DRV_LOG(ERR, "failed to set config vector"); return -EBUSY; } @@ -1372,9 +1671,9 @@ static int virtio_dev_start(struct rte_eth_dev *dev) { uint16_t nb_queues, i; - struct virtio_hw *hw = dev->data->dev_private; struct virtnet_rx *rxvq; struct virtnet_tx *txvq __rte_unused; + struct virtio_hw *hw = dev->data->dev_private; /* check if lsc interrupt feature is enabled */ if (dev->data->dev_conf.intr_conf.lsc) { @@ -1382,8 +1681,17 @@ virtio_dev_start(struct rte_eth_dev *dev) PMD_DRV_LOG(ERR, "link status not supported by host"); return -ENOTSUP; } + } + + /* Enable uio/vfio intr/eventfd mapping: althrough we already did that + * in device configure, but it could be unmapped when device is + * stopped. + */ + if (dev->data->dev_conf.intr_conf.lsc || + dev->data->dev_conf.intr_conf.rxq) { + rte_intr_disable(dev->intr_handle); - if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) { + if (rte_intr_enable(dev->intr_handle) < 0) { PMD_DRV_LOG(ERR, "interrupt enable failed"); return -EIO; } @@ -1392,29 +1700,19 @@ virtio_dev_start(struct rte_eth_dev *dev) /* Initialize Link state */ virtio_dev_link_update(dev, 0); - /* On restart after stop do not touch queues */ - if (hw->started) - return 0; - - /* Do final configuration before rx/tx engine starts */ - virtio_dev_rxtx_start(dev); - vtpci_reinit_complete(hw); - - hw->started = 1; - /*Notify the backend *Otherwise the tap backend might already stop its queue due to fullness. *vhost backend will have no chance to be waked up */ - nb_queues = dev->data->nb_rx_queues; - if (nb_queues > 1) { + nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues); + if (hw->max_queue_pairs > 1) { if (virtio_set_multiple_queues(dev, nb_queues) != 0) return -EINVAL; } PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues); - for (i = 0; i < nb_queues; i++) { + for (i = 0; i < dev->data->nb_rx_queues; i++) { rxvq = dev->data->rx_queues[i]; virtqueue_notify(rxvq->vq); } @@ -1486,14 +1784,12 @@ static void virtio_dev_stop(struct rte_eth_dev *dev) { struct rte_eth_link link; - struct virtio_hw *hw = dev->data->dev_private; + struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; PMD_INIT_LOG(DEBUG, "stop"); - hw->started = 0; - - if (dev->data->dev_conf.intr_conf.lsc) - rte_intr_disable(&dev->pci_dev->intr_handle); + if (intr_conf->lsc || intr_conf->rxq) + rte_intr_disable(dev->intr_handle); memset(&link, 0, sizeof(link)); virtio_dev_atomic_write_link_status(dev, &link); @@ -1536,21 +1832,43 @@ virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complet static void virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { + uint64_t tso_mask, host_features; struct virtio_hw *hw = dev->data->dev_private; - if (dev->pci_dev) - dev_info->driver_name = dev->driver->pci_drv.name; - else - dev_info->driver_name = "virtio_user PMD"; - dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; - dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; + dev_info->pci_dev = dev->device ? RTE_DEV_TO_PCI(dev->device) : NULL; + dev_info->max_rx_queues = + RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES); + dev_info->max_tx_queues = + RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES); dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN; dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; dev_info->default_txconf = (struct rte_eth_txconf) { .txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS }; - /* TRex patch */ + + host_features = VTPCI_OPS(hw)->get_features(hw); + dev_info->rx_offload_capa = 0; + if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { + dev_info->rx_offload_capa |= + DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_UDP_CKSUM; + } + tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) | + (1ULL << VIRTIO_NET_F_GUEST_TSO6); + if ((host_features & tso_mask) == tso_mask) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO; + + dev_info->tx_offload_capa = 0; + if (hw->guest_features & (1ULL << VIRTIO_NET_F_CSUM)) { + dev_info->tx_offload_capa |= + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM; + } + tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) | + (1ULL << VIRTIO_NET_F_HOST_TSO6); + if ((hw->guest_features & tso_mask) == tso_mask) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; dev_info->speed_capa = ETH_LINK_SPEED_10G; } @@ -1565,10 +1883,6 @@ __rte_unused uint8_t is_rx) return 0; } -static struct rte_driver rte_virtio_driver = { - .type = PMD_PDEV, - .init = rte_virtio_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_virtio_driver, virtio_net); -DRIVER_REGISTER_PCI_TABLE(virtio_net, pci_id_virtio_map); +RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map); +RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/virtio/virtio_ethdev.h b/src/dpdk/drivers/net/virtio/virtio_ethdev.h index 2ecec6eb..777a14be 100644 --- a/src/dpdk/drivers/net/virtio/virtio_ethdev.h +++ b/src/dpdk/drivers/net/virtio/virtio_ethdev.h @@ -47,14 +47,14 @@ #define PAGE_SIZE 4096 #endif -#define VIRTIO_MAX_RX_QUEUES 128 -#define VIRTIO_MAX_TX_QUEUES 128 +#define VIRTIO_MAX_RX_QUEUES 128U +#define VIRTIO_MAX_TX_QUEUES 128U #define VIRTIO_MAX_MAC_ADDRS 64 #define VIRTIO_MIN_RX_BUFSIZE 64 #define VIRTIO_MAX_RX_PKTLEN 9728 /* Features desired/implemented by this driver. */ -#define VIRTIO_PMD_GUEST_FEATURES \ +#define VIRTIO_PMD_DEFAULT_GUEST_FEATURES \ (1u << VIRTIO_NET_F_MAC | \ 1u << VIRTIO_NET_F_STATUS | \ 1u << VIRTIO_NET_F_MQ | \ @@ -62,9 +62,19 @@ 1u << VIRTIO_NET_F_CTRL_VQ | \ 1u << VIRTIO_NET_F_CTRL_RX | \ 1u << VIRTIO_NET_F_CTRL_VLAN | \ + 1u << VIRTIO_NET_F_CSUM | \ + 1u << VIRTIO_NET_F_HOST_TSO4 | \ + 1u << VIRTIO_NET_F_HOST_TSO6 | \ 1u << VIRTIO_NET_F_MRG_RXBUF | \ - 1ULL << VIRTIO_F_VERSION_1) - + 1u << VIRTIO_RING_F_INDIRECT_DESC | \ + 1ULL << VIRTIO_F_VERSION_1 | \ + 1ULL << VIRTIO_F_IOMMU_PLATFORM) + +#define VIRTIO_PMD_SUPPORTED_GUEST_FEATURES \ + (VIRTIO_PMD_DEFAULT_GUEST_FEATURES | \ + 1u << VIRTIO_NET_F_GUEST_CSUM | \ + 1u << VIRTIO_NET_F_GUEST_TSO4 | \ + 1u << VIRTIO_NET_F_GUEST_TSO6) /* * CQ function prototype */ @@ -73,31 +83,18 @@ void virtio_dev_cq_start(struct rte_eth_dev *dev); /* * RX/TX function prototypes */ -void virtio_dev_rxtx_start(struct rte_eth_dev *dev); - -int virtio_dev_queue_setup(struct rte_eth_dev *dev, - int queue_type, - uint16_t queue_idx, - uint16_t vtpci_queue_idx, - uint16_t nb_desc, - unsigned int socket_id, - void **pvq); -void virtio_dev_queue_release(struct virtqueue *vq); +int virtio_dev_rx_queue_done(void *rxq, uint16_t offset); int virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, uint16_t nb_rx_desc, unsigned int socket_id, const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool); -void virtio_dev_rx_queue_release(void *rxq); - int virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, uint16_t nb_tx_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf); -void virtio_dev_tx_queue_release(void *txq); - uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -115,13 +112,4 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); -/* - * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us - * frames larger than 1514 bytes. We do not yet support software LRO - * via tcp_lro_rx(). - */ -#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \ - VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN) - - #endif /* _VIRTIO_ETHDEV_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_pci.c b/src/dpdk/drivers/net/virtio/virtio_pci.c index f1a7ca7e..ce9a9d3f 100644 --- a/src/dpdk/drivers/net/virtio/virtio_pci.c +++ b/src/dpdk/drivers/net/virtio/virtio_pci.c @@ -37,6 +37,8 @@ #include #endif +#include + #include "virtio_pci.h" #include "virtio_logs.h" #include "virtqueue.h" @@ -92,17 +94,17 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset, while (length > 0) { if (length >= 4) { size = 4; - rte_eal_pci_ioport_read(&hw->io, dst, size, + rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size, VIRTIO_PCI_CONFIG(hw) + offset); *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst); } else if (length >= 2) { size = 2; - rte_eal_pci_ioport_read(&hw->io, dst, size, + rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size, VIRTIO_PCI_CONFIG(hw) + offset); *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst); } else { size = 1; - rte_eal_pci_ioport_read(&hw->io, dst, size, + rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size, VIRTIO_PCI_CONFIG(hw) + offset); } @@ -111,7 +113,7 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset, length -= size; } #else - rte_eal_pci_ioport_read(&hw->io, dst, length, + rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, length, VIRTIO_PCI_CONFIG(hw) + offset); #endif } @@ -131,16 +133,16 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset, if (length >= 4) { size = 4; tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src); - rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size, VIRTIO_PCI_CONFIG(hw) + offset); } else if (length >= 2) { size = 2; tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src); - rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size, VIRTIO_PCI_CONFIG(hw) + offset); } else { size = 1; - rte_eal_pci_ioport_write(&hw->io, src, size, + rte_eal_pci_ioport_write(VTPCI_IO(hw), src, size, VIRTIO_PCI_CONFIG(hw) + offset); } @@ -149,7 +151,7 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset, length -= size; } #else - rte_eal_pci_ioport_write(&hw->io, src, length, + rte_eal_pci_ioport_write(VTPCI_IO(hw), src, length, VIRTIO_PCI_CONFIG(hw) + offset); #endif } @@ -159,7 +161,8 @@ legacy_get_features(struct virtio_hw *hw) { uint32_t dst; - rte_eal_pci_ioport_read(&hw->io, &dst, 4, VIRTIO_PCI_HOST_FEATURES); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 4, + VIRTIO_PCI_HOST_FEATURES); return dst; } @@ -171,7 +174,7 @@ legacy_set_features(struct virtio_hw *hw, uint64_t features) "only 32 bit features are allowed for legacy virtio!"); return; } - rte_eal_pci_ioport_write(&hw->io, &features, 4, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &features, 4, VIRTIO_PCI_GUEST_FEATURES); } @@ -180,14 +183,14 @@ legacy_get_status(struct virtio_hw *hw) { uint8_t dst; - rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_STATUS); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS); return dst; } static void legacy_set_status(struct virtio_hw *hw, uint8_t status) { - rte_eal_pci_ioport_write(&hw->io, &status, 1, VIRTIO_PCI_STATUS); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS); } static void @@ -201,7 +204,7 @@ legacy_get_isr(struct virtio_hw *hw) { uint8_t dst; - rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_ISR); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR); return dst; } @@ -211,8 +214,23 @@ legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec) { uint16_t dst; - rte_eal_pci_ioport_write(&hw->io, &vec, 2, VIRTIO_MSI_CONFIG_VECTOR); - rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_MSI_CONFIG_VECTOR); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2, + VIRTIO_MSI_CONFIG_VECTOR); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, + VIRTIO_MSI_CONFIG_VECTOR); + return dst; +} + +static uint16_t +legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec) +{ + uint16_t dst; + + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_SEL); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2, + VIRTIO_MSI_QUEUE_VECTOR); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR); return dst; } @@ -221,8 +239,9 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) { uint16_t dst; - rte_eal_pci_ioport_write(&hw->io, &queue_id, 2, VIRTIO_PCI_QUEUE_SEL); - rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_PCI_QUEUE_NUM); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, + VIRTIO_PCI_QUEUE_SEL); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM); return dst; } @@ -234,10 +253,10 @@ legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) if (!check_vq_phys_addr_ok(vq)) return -1; - rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; - rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN); return 0; } @@ -247,15 +266,15 @@ legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src = 0; - rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); - rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN); } static void legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) { - rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, VIRTIO_PCI_QUEUE_NOTIFY); } @@ -289,7 +308,7 @@ static int legacy_virtio_resource_init(struct rte_pci_device *pci_dev, struct virtio_hw *hw, uint32_t *dev_flags) { - if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0) + if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) return -1; if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN) @@ -300,7 +319,7 @@ legacy_virtio_resource_init(struct rte_pci_device *pci_dev, return 0; } -static const struct virtio_pci_ops legacy_ops = { +const struct virtio_pci_ops legacy_ops = { .read_dev_cfg = legacy_read_dev_config, .write_dev_cfg = legacy_write_dev_config, .reset = legacy_reset, @@ -310,54 +329,18 @@ static const struct virtio_pci_ops legacy_ops = { .set_features = legacy_set_features, .get_isr = legacy_get_isr, .set_config_irq = legacy_set_config_irq, + .set_queue_irq = legacy_set_queue_irq, .get_queue_num = legacy_get_queue_num, .setup_queue = legacy_setup_queue, .del_queue = legacy_del_queue, .notify_queue = legacy_notify_queue, }; - -static inline uint8_t -io_read8(uint8_t *addr) -{ - return *(volatile uint8_t *)addr; -} - -static inline void -io_write8(uint8_t val, uint8_t *addr) -{ - *(volatile uint8_t *)addr = val; -} - -static inline uint16_t -io_read16(uint16_t *addr) -{ - return *(volatile uint16_t *)addr; -} - -static inline void -io_write16(uint16_t val, uint16_t *addr) -{ - *(volatile uint16_t *)addr = val; -} - -static inline uint32_t -io_read32(uint32_t *addr) -{ - return *(volatile uint32_t *)addr; -} - -static inline void -io_write32(uint32_t val, uint32_t *addr) -{ - *(volatile uint32_t *)addr = val; -} - static inline void io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi) { - io_write32(val & ((1ULL << 32) - 1), lo); - io_write32(val >> 32, hi); + rte_write32(val & ((1ULL << 32) - 1), lo); + rte_write32(val >> 32, hi); } static void @@ -369,13 +352,13 @@ modern_read_dev_config(struct virtio_hw *hw, size_t offset, uint8_t old_gen, new_gen; do { - old_gen = io_read8(&hw->common_cfg->config_generation); + old_gen = rte_read8(&hw->common_cfg->config_generation); p = dst; for (i = 0; i < length; i++) - *p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i); + *p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i); - new_gen = io_read8(&hw->common_cfg->config_generation); + new_gen = rte_read8(&hw->common_cfg->config_generation); } while (old_gen != new_gen); } @@ -387,7 +370,7 @@ modern_write_dev_config(struct virtio_hw *hw, size_t offset, const uint8_t *p = src; for (i = 0; i < length; i++) - io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i); + rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i)); } static uint64_t @@ -395,11 +378,11 @@ modern_get_features(struct virtio_hw *hw) { uint32_t features_lo, features_hi; - io_write32(0, &hw->common_cfg->device_feature_select); - features_lo = io_read32(&hw->common_cfg->device_feature); + rte_write32(0, &hw->common_cfg->device_feature_select); + features_lo = rte_read32(&hw->common_cfg->device_feature); - io_write32(1, &hw->common_cfg->device_feature_select); - features_hi = io_read32(&hw->common_cfg->device_feature); + rte_write32(1, &hw->common_cfg->device_feature_select); + features_hi = rte_read32(&hw->common_cfg->device_feature); return ((uint64_t)features_hi << 32) | features_lo; } @@ -407,25 +390,25 @@ modern_get_features(struct virtio_hw *hw) static void modern_set_features(struct virtio_hw *hw, uint64_t features) { - io_write32(0, &hw->common_cfg->guest_feature_select); - io_write32(features & ((1ULL << 32) - 1), - &hw->common_cfg->guest_feature); + rte_write32(0, &hw->common_cfg->guest_feature_select); + rte_write32(features & ((1ULL << 32) - 1), + &hw->common_cfg->guest_feature); - io_write32(1, &hw->common_cfg->guest_feature_select); - io_write32(features >> 32, - &hw->common_cfg->guest_feature); + rte_write32(1, &hw->common_cfg->guest_feature_select); + rte_write32(features >> 32, + &hw->common_cfg->guest_feature); } static uint8_t modern_get_status(struct virtio_hw *hw) { - return io_read8(&hw->common_cfg->device_status); + return rte_read8(&hw->common_cfg->device_status); } static void modern_set_status(struct virtio_hw *hw, uint8_t status) { - io_write8(status, &hw->common_cfg->device_status); + rte_write8(status, &hw->common_cfg->device_status); } static void @@ -438,21 +421,29 @@ modern_reset(struct virtio_hw *hw) static uint8_t modern_get_isr(struct virtio_hw *hw) { - return io_read8(hw->isr); + return rte_read8(hw->isr); } static uint16_t modern_set_config_irq(struct virtio_hw *hw, uint16_t vec) { - io_write16(vec, &hw->common_cfg->msix_config); - return io_read16(&hw->common_cfg->msix_config); + rte_write16(vec, &hw->common_cfg->msix_config); + return rte_read16(&hw->common_cfg->msix_config); +} + +static uint16_t +modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec) +{ + rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + rte_write16(vec, &hw->common_cfg->queue_msix_vector); + return rte_read16(&hw->common_cfg->queue_msix_vector); } static uint16_t modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) { - io_write16(queue_id, &hw->common_cfg->queue_select); - return io_read16(&hw->common_cfg->queue_size); + rte_write16(queue_id, &hw->common_cfg->queue_select); + return rte_read16(&hw->common_cfg->queue_size); } static int @@ -470,7 +461,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) ring[vq->vq_nentries]), VIRTIO_PCI_VRING_ALIGN); - io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo, &hw->common_cfg->queue_desc_hi); @@ -479,11 +470,11 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo, &hw->common_cfg->queue_used_hi); - notify_off = io_read16(&hw->common_cfg->queue_notify_off); + notify_off = rte_read16(&hw->common_cfg->queue_notify_off); vq->notify_addr = (void *)((uint8_t *)hw->notify_base + notify_off * hw->notify_off_multiplier); - io_write16(1, &hw->common_cfg->queue_enable); + rte_write16(1, &hw->common_cfg->queue_enable); PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index); PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr); @@ -498,7 +489,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) static void modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq) { - io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); io_write64_twopart(0, &hw->common_cfg->queue_desc_lo, &hw->common_cfg->queue_desc_hi); @@ -507,16 +498,16 @@ modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq) io_write64_twopart(0, &hw->common_cfg->queue_used_lo, &hw->common_cfg->queue_used_hi); - io_write16(0, &hw->common_cfg->queue_enable); + rte_write16(0, &hw->common_cfg->queue_enable); } static void modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq) { - io_write16(1, vq->notify_addr); + rte_write16(1, vq->notify_addr); } -static const struct virtio_pci_ops modern_ops = { +const struct virtio_pci_ops modern_ops = { .read_dev_cfg = modern_read_dev_config, .write_dev_cfg = modern_write_dev_config, .reset = modern_reset, @@ -526,6 +517,7 @@ static const struct virtio_pci_ops modern_ops = { .set_features = modern_set_features, .get_isr = modern_get_isr, .set_config_irq = modern_set_config_irq, + .set_queue_irq = modern_set_queue_irq, .get_queue_num = modern_get_queue_num, .setup_queue = modern_setup_queue, .del_queue = modern_del_queue, @@ -537,14 +529,14 @@ void vtpci_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) { - hw->vtpci_ops->read_dev_cfg(hw, offset, dst, length); + VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length); } void vtpci_write_dev_config(struct virtio_hw *hw, size_t offset, const void *src, int length) { - hw->vtpci_ops->write_dev_cfg(hw, offset, src, length); + VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length); } uint64_t @@ -557,7 +549,7 @@ vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features) * host all support. */ features = host_features & hw->guest_features; - hw->vtpci_ops->set_features(hw, features); + VTPCI_OPS(hw)->set_features(hw, features); return features; } @@ -565,9 +557,9 @@ vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features) void vtpci_reset(struct virtio_hw *hw) { - hw->vtpci_ops->set_status(hw, VIRTIO_CONFIG_STATUS_RESET); + VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET); /* flush status write */ - hw->vtpci_ops->get_status(hw); + VTPCI_OPS(hw)->get_status(hw); } void @@ -580,29 +572,21 @@ void vtpci_set_status(struct virtio_hw *hw, uint8_t status) { if (status != VIRTIO_CONFIG_STATUS_RESET) - status |= hw->vtpci_ops->get_status(hw); + status |= VTPCI_OPS(hw)->get_status(hw); - hw->vtpci_ops->set_status(hw, status); + VTPCI_OPS(hw)->set_status(hw, status); } uint8_t vtpci_get_status(struct virtio_hw *hw) { - return hw->vtpci_ops->get_status(hw); + return VTPCI_OPS(hw)->get_status(hw); } uint8_t vtpci_isr(struct virtio_hw *hw) { - return hw->vtpci_ops->get_isr(hw); -} - - -/* Enable one vector (0) for Link State Intrerrupt */ -uint16_t -vtpci_irq_config(struct virtio_hw *hw, uint16_t vec) -{ - return hw->vtpci_ops->set_config_irq(hw, vec); + return VTPCI_OPS(hw)->get_isr(hw); } static void * @@ -727,8 +711,6 @@ int vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, uint32_t *dev_flags) { - hw->dev = dev; - /* * Try if we can succeed reading virtio pci caps, which exists * only on modern pci device. If failed, we fallback to legacy @@ -736,8 +718,8 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, */ if (virtio_read_caps(dev, hw) == 0) { PMD_INIT_LOG(INFO, "modern virtio pci detected."); - hw->vtpci_ops = &modern_ops; - hw->modern = 1; + virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops; + hw->modern = 1; *dev_flags |= RTE_ETH_DEV_INTR_LSC; return 0; } @@ -745,8 +727,9 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, PMD_INIT_LOG(INFO, "trying with legacy virtio pci."); if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) { if (dev->kdrv == RTE_KDRV_UNKNOWN && - (!dev->devargs || - dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)) { + (!dev->device.devargs || + dev->device.devargs->type != + RTE_DEVTYPE_WHITELISTED_PCI)) { PMD_INIT_LOG(INFO, "skip kernel managed virtio device."); return 1; @@ -754,7 +737,7 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, return -1; } - hw->vtpci_ops = &legacy_ops; + virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops; hw->use_msix = legacy_virtio_has_msix(&dev->addr); hw->modern = 0; diff --git a/src/dpdk/drivers/net/virtio/virtio_pci.h b/src/dpdk/drivers/net/virtio/virtio_pci.h index dd7693fe..59e45c4d 100644 --- a/src/dpdk/drivers/net/virtio/virtio_pci.h +++ b/src/dpdk/drivers/net/virtio/virtio_pci.h @@ -44,8 +44,8 @@ struct virtnet_ctl; /* VirtIO PCI vendor/device ID. */ #define VIRTIO_PCI_VENDORID 0x1AF4 -#define VIRTIO_PCI_DEVICEID_MIN 0x1000 -#define VIRTIO_PCI_DEVICEID_MAX 0x103F +#define VIRTIO_PCI_LEGACY_DEVICEID_NET 0x1000 +#define VIRTIO_PCI_MODERN_DEVICEID_NET 0x1041 /* VirtIO ABI version, this must match exactly. */ #define VIRTIO_PCI_ABI_VERSION 0 @@ -138,6 +138,7 @@ struct virtnet_ctl; #define VIRTIO_RING_F_INDIRECT_DESC 28 #define VIRTIO_F_VERSION_1 32 +#define VIRTIO_F_IOMMU_PLATFORM 33 /* * Some VirtIO feature bits (currently bits 28 through 31) are @@ -145,7 +146,7 @@ struct virtnet_ctl; * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 32 +#define VIRTIO_TRANSPORT_F_END 34 /* The Guest publishes the used index for which it expects an interrupt * at the end of the avail ring. Host should ignore the avail->flags field. */ @@ -234,6 +235,9 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); + uint16_t (*set_queue_irq)(struct virtio_hw *hw, struct virtqueue *vq, + uint16_t vec); + uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); @@ -244,26 +248,43 @@ struct virtio_net_config; struct virtio_hw { struct virtnet_ctl *cvq; - struct rte_pci_ioport io; + uint64_t req_guest_features; uint64_t guest_features; - uint32_t max_tx_queues; - uint32_t max_rx_queues; + uint32_t max_queue_pairs; uint16_t vtnet_hdr_size; uint8_t vlan_strip; uint8_t use_msix; - uint8_t started; uint8_t modern; + uint8_t use_simple_rxtx; + uint8_t port_id; uint8_t mac_addr[ETHER_ADDR_LEN]; uint32_t notify_off_multiplier; uint8_t *isr; uint16_t *notify_base; - struct rte_pci_device *dev; struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; - const struct virtio_pci_ops *vtpci_ops; void *virtio_user_dev; + + struct virtqueue **vqs; +}; + + +/* + * While virtio_hw is stored in shared memory, this structure stores + * some infos that may vary in the multiple process model locally. + * For example, the vtpci_ops pointer. + */ +struct virtio_hw_internal { + const struct virtio_pci_ops *vtpci_ops; + struct rte_pci_ioport io; }; +#define VTPCI_OPS(hw) (virtio_hw_internal[(hw)->port_id].vtpci_ops) +#define VTPCI_IO(hw) (&virtio_hw_internal[(hw)->port_id].io) + +extern struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS]; + + /* * This structure is just a reference to read * net device specific config space; it just a chodu structure @@ -312,6 +333,8 @@ void vtpci_read_dev_config(struct virtio_hw *, size_t, void *, int); uint8_t vtpci_isr(struct virtio_hw *); -uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t); +extern const struct virtio_pci_ops legacy_ops; +extern const struct virtio_pci_ops modern_ops; +extern const struct virtio_pci_ops virtio_user_ops; #endif /* _VIRTIO_PCI_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx.c b/src/dpdk/drivers/net/virtio/virtio_rxtx.c index 724517e2..cab6e8fc 100644 --- a/src/dpdk/drivers/net/virtio/virtio_rxtx.c +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx.c @@ -50,6 +50,11 @@ #include #include #include +#include +#include +#include +#include +#include #include "virtio_logs.h" #include "virtio_ethdev.h" @@ -67,9 +72,14 @@ #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ ETH_TXQ_FLAGS_NOOFFLOADS) -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 -static int use_simple_rxtx; -#endif +int +virtio_dev_rx_queue_done(void *rxq, uint16_t offset) +{ + struct virtnet_rx *rxvq = rxq; + struct virtqueue *vq = rxvq->vq; + + return VIRTQUEUE_NUSED(vq) >= offset; +} static void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) @@ -123,7 +133,7 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; if (unlikely(cookie == NULL)) { - PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", + PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", vq->vq_used_cons_idx); break; } @@ -208,18 +218,76 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) return 0; } +/* When doing TSO, the IP length is not included in the pseudo header + * checksum of the packet given to the PMD, but for virtio it is + * expected. + */ +static void +virtio_tso_fix_cksum(struct rte_mbuf *m) +{ + /* common case: header is not fragmented */ + if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + + m->l4_len)) { + struct ipv4_hdr *iph; + struct ipv6_hdr *ip6h; + struct tcp_hdr *th; + uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; + uint32_t tmp; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + th = RTE_PTR_ADD(iph, m->l3_len); + if ((iph->version_ihl >> 4) == 4) { + iph->hdr_checksum = 0; + iph->hdr_checksum = rte_ipv4_cksum(iph); + ip_len = iph->total_length; + ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - + m->l3_len); + } else { + ip6h = (struct ipv6_hdr *)iph; + ip_paylen = ip6h->payload_len; + } + + /* calculate the new phdr checksum not including ip_paylen */ + prev_cksum = th->cksum; + tmp = prev_cksum; + tmp += ip_paylen; + tmp = (tmp & 0xffff) + (tmp >> 16); + new_cksum = tmp; + + /* replace it in the packet */ + th->cksum = new_cksum; + } +} + +static inline int +tx_offload_enabled(struct virtio_hw *hw) +{ + return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || + vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || + vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); +} + +/* avoid write operation when necessary, to lessen cache issues */ +#define ASSIGN_UNLESS_EQUAL(var, val) do { \ + if ((var) != (val)) \ + (var) = (val); \ +} while (0) + static inline void virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, uint16_t needed, int use_indirect, int can_push) { + struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; struct vq_desc_extra *dxp; struct virtqueue *vq = txvq->vq; struct vring_desc *start_dp; uint16_t seg_num = cookie->nb_segs; uint16_t head_idx, idx; uint16_t head_size = vq->hw->vtnet_hdr_size; - unsigned long offs; + struct virtio_net_hdr *hdr; + int offload; + offload = tx_offload_enabled(vq->hw); head_idx = vq->vq_desc_head_idx; idx = head_idx; dxp = &vq->vq_descx[idx]; @@ -229,10 +297,18 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, start_dp = vq->vq_ring.desc; if (can_push) { - /* put on zero'd transmit header (no offloads) */ - void *hdr = rte_pktmbuf_prepend(cookie, head_size); - - memset(hdr, 0, head_size); + /* prepend cannot fail, checked by caller */ + hdr = (struct virtio_net_hdr *) + rte_pktmbuf_prepend(cookie, head_size); + /* if offload disabled, it is not zeroed below, do it now */ + if (offload == 0) { + ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); + ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); + ASSIGN_UNLESS_EQUAL(hdr->flags, 0); + ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); + ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); + ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); + } } else if (use_indirect) { /* setup tx ring slot to point to indirect * descriptor list stored in reserved region. @@ -240,14 +316,11 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, * the first slot in indirect ring is already preset * to point to the header in reserved region */ - struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; - - offs = idx * sizeof(struct virtio_tx_region) - + offsetof(struct virtio_tx_region, tx_indir); - - start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].addr = txvq->virtio_net_hdr_mem + + RTE_PTR_DIFF(&txr[idx].tx_indir, txr); start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); start_dp[idx].flags = VRING_DESC_F_INDIRECT; + hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; /* loop below will fill in rest of the indirect elements */ start_dp = txr[idx].tx_indir; @@ -256,15 +329,59 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, /* setup first tx ring slot to point to header * stored in reserved region. */ - offs = idx * sizeof(struct virtio_tx_region) - + offsetof(struct virtio_tx_region, tx_hdr); - - start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].addr = txvq->virtio_net_hdr_mem + + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); start_dp[idx].len = vq->hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_NEXT; + hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; + idx = start_dp[idx].next; } + /* Checksum Offload / TSO */ + if (offload) { + if (cookie->ol_flags & PKT_TX_TCP_SEG) + cookie->ol_flags |= PKT_TX_TCP_CKSUM; + + switch (cookie->ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_UDP_CKSUM: + hdr->csum_start = cookie->l2_len + cookie->l3_len; + hdr->csum_offset = offsetof(struct udp_hdr, + dgram_cksum); + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + break; + + case PKT_TX_TCP_CKSUM: + hdr->csum_start = cookie->l2_len + cookie->l3_len; + hdr->csum_offset = offsetof(struct tcp_hdr, cksum); + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + break; + + default: + ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); + ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); + ASSIGN_UNLESS_EQUAL(hdr->flags, 0); + break; + } + + /* TCP Segmentation Offload */ + if (cookie->ol_flags & PKT_TX_TCP_SEG) { + virtio_tso_fix_cksum(cookie); + hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? + VIRTIO_NET_HDR_GSO_TCPV6 : + VIRTIO_NET_HDR_GSO_TCPV4; + hdr->gso_size = cookie->tso_segsz; + hdr->hdr_len = + cookie->l2_len + + cookie->l3_len + + cookie->l4_len; + } else { + ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); + ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); + ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); + } + } + do { start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); start_dp[idx].len = cookie->data_len; @@ -282,207 +399,120 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, vq_update_avail_ring(vq, head_idx); } -static void -virtio_dev_vring_start(struct virtqueue *vq) -{ - int size = vq->vq_nentries; - struct vring *vr = &vq->vq_ring; - uint8_t *ring_mem = vq->vq_ring_virt_mem; - - PMD_INIT_FUNC_TRACE(); - - /* - * Reinitialise since virtio port might have been stopped and restarted - */ - memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size); - vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN); - vq->vq_used_cons_idx = 0; - vq->vq_desc_head_idx = 0; - vq->vq_avail_idx = 0; - vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); - vq->vq_free_cnt = vq->vq_nentries; - memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); - - vring_desc_init(vr->desc, size); - - /* - * Disable device(host) interrupting guest - */ - virtqueue_disable_intr(vq); -} - void virtio_dev_cq_start(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; if (hw->cvq && hw->cvq->vq) { - virtio_dev_vring_start(hw->cvq->vq); VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); } } -void -virtio_dev_rxtx_start(struct rte_eth_dev *dev) +int +virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id __rte_unused, + __rte_unused const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) { - /* - * Start receive and transmit vrings - * - Setup vring structure for all queues - * - Initialize descriptor for the rx vring - * - Allocate blank mbufs for the each rx descriptor - * - */ - uint16_t i; + uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; + struct virtio_hw *hw = dev->data->dev_private; + struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; + struct virtnet_rx *rxvq; + int error, nbufs; + struct rte_mbuf *m; uint16_t desc_idx; PMD_INIT_FUNC_TRACE(); - /* Start rx vring. */ - for (i = 0; i < dev->data->nb_rx_queues; i++) { - struct virtnet_rx *rxvq = dev->data->rx_queues[i]; - struct virtqueue *vq = rxvq->vq; - int error, nbufs; - struct rte_mbuf *m; - - virtio_dev_vring_start(vq); - if (rxvq->mpool == NULL) { - rte_exit(EXIT_FAILURE, - "Cannot allocate mbufs for rx virtqueue"); - } - - /* Allocate blank mbufs for the each rx descriptor */ - nbufs = 0; - error = ENOSPC; - -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 - if (use_simple_rxtx) { - for (desc_idx = 0; desc_idx < vq->vq_nentries; - desc_idx++) { - vq->vq_ring.avail->ring[desc_idx] = desc_idx; - vq->vq_ring.desc[desc_idx].flags = - VRING_DESC_F_WRITE; - } - } -#endif - memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); - for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; - desc_idx++) { - vq->sw_ring[vq->vq_nentries + desc_idx] = - &rxvq->fake_mbuf; - } - - while (!virtqueue_full(vq)) { - m = rte_mbuf_raw_alloc(rxvq->mpool); - if (m == NULL) - break; + if (nb_desc == 0 || nb_desc > vq->vq_nentries) + nb_desc = vq->vq_nentries; + vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); - /****************************************** - * Enqueue allocated buffers * - *******************************************/ -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 - if (use_simple_rxtx) - error = virtqueue_enqueue_recv_refill_simple(vq, m); - else -#endif - error = virtqueue_enqueue_recv_refill(vq, m); - if (error) { - rte_pktmbuf_free(m); - break; - } - nbufs++; - } + rxvq = &vq->rxq; + rxvq->queue_id = queue_idx; + rxvq->mpool = mp; + if (rxvq->mpool == NULL) { + rte_exit(EXIT_FAILURE, + "Cannot allocate mbufs for rx virtqueue"); + } + dev->data->rx_queues[queue_idx] = rxvq; - vq_update_avail_idx(vq); - PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); + /* Allocate blank mbufs for the each rx descriptor */ + nbufs = 0; + error = ENOSPC; - VIRTQUEUE_DUMP(vq); + if (hw->use_simple_rxtx) { + for (desc_idx = 0; desc_idx < vq->vq_nentries; + desc_idx++) { + vq->vq_ring.avail->ring[desc_idx] = desc_idx; + vq->vq_ring.desc[desc_idx].flags = + VRING_DESC_F_WRITE; + } } - /* Start tx vring. */ - for (i = 0; i < dev->data->nb_tx_queues; i++) { - struct virtnet_tx *txvq = dev->data->tx_queues[i]; - struct virtqueue *vq = txvq->vq; - - virtio_dev_vring_start(vq); -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 - if (use_simple_rxtx) { - uint16_t mid_idx = vq->vq_nentries >> 1; - - for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { - vq->vq_ring.avail->ring[desc_idx] = - desc_idx + mid_idx; - vq->vq_ring.desc[desc_idx + mid_idx].next = - desc_idx; - vq->vq_ring.desc[desc_idx + mid_idx].addr = - txvq->virtio_net_hdr_mem + - offsetof(struct virtio_tx_region, tx_hdr); - vq->vq_ring.desc[desc_idx + mid_idx].len = - vq->hw->vtnet_hdr_size; - vq->vq_ring.desc[desc_idx + mid_idx].flags = - VRING_DESC_F_NEXT; - vq->vq_ring.desc[desc_idx].flags = 0; - } - for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; - desc_idx++) - vq->vq_ring.avail->ring[desc_idx] = desc_idx; - } -#endif - VIRTQUEUE_DUMP(vq); + memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); + for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; + desc_idx++) { + vq->sw_ring[vq->vq_nentries + desc_idx] = + &rxvq->fake_mbuf; } -} -int -virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, - uint16_t queue_idx, - uint16_t nb_desc, - unsigned int socket_id, - __rte_unused const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mp) -{ - uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; - struct virtnet_rx *rxvq; - int ret; + while (!virtqueue_full(vq)) { + m = rte_mbuf_raw_alloc(rxvq->mpool); + if (m == NULL) + break; - PMD_INIT_FUNC_TRACE(); - ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx, - nb_desc, socket_id, (void **)&rxvq); - if (ret < 0) { - PMD_INIT_LOG(ERR, "rvq initialization failed"); - return ret; + /* Enqueue allocated buffers */ + if (hw->use_simple_rxtx) + error = virtqueue_enqueue_recv_refill_simple(vq, m); + else + error = virtqueue_enqueue_recv_refill(vq, m); + + if (error) { + rte_pktmbuf_free(m); + break; + } + nbufs++; } - /* Create mempool for rx mbuf allocation */ - rxvq->mpool = mp; + vq_update_avail_idx(vq); - dev->data->rx_queues[queue_idx] = rxvq; + PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 virtio_rxq_vec_setup(rxvq); -#endif + + VIRTQUEUE_DUMP(vq); return 0; } -void -virtio_dev_rx_queue_release(void *rxq) +static void +virtio_update_rxtx_handler(struct rte_eth_dev *dev, + const struct rte_eth_txconf *tx_conf) { - struct virtnet_rx *rxvq = rxq; - struct virtqueue *vq; - const struct rte_memzone *mz; - - if (rxvq == NULL) - return; - - /* - * rxvq is freed when vq is freed, and as mz should be freed after the - * del_queue, so we reserve the mz pointer first. - */ - vq = rxvq->vq; - mz = rxvq->mz; + uint8_t use_simple_rxtx = 0; + struct virtio_hw *hw = dev->data->dev_private; - virtio_dev_queue_release(vq); - rte_memzone_free(mz); +#if defined RTE_ARCH_X86 + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3)) + use_simple_rxtx = 1; +#elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) + use_simple_rxtx = 1; +#endif + /* Use simple rx/tx func if single segment and no offloads */ + if (use_simple_rxtx && + (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS && + !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { + PMD_INIT_LOG(INFO, "Using simple rx/tx path"); + dev->tx_pkt_burst = virtio_xmit_pkts_simple; + dev->rx_pkt_burst = virtio_recv_pkts_vec; + hw->use_simple_rxtx = use_simple_rxtx; + } } /* @@ -496,45 +526,26 @@ int virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, - unsigned int socket_id, + unsigned int socket_id __rte_unused, const struct rte_eth_txconf *tx_conf) { uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; - -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 struct virtio_hw *hw = dev->data->dev_private; -#endif + struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; struct virtnet_tx *txvq; - struct virtqueue *vq; uint16_t tx_free_thresh; - int ret; + uint16_t desc_idx; PMD_INIT_FUNC_TRACE(); - if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) - != ETH_TXQ_FLAGS_NOXSUMS) { - PMD_INIT_LOG(ERR, "TX checksum offload not supported\n"); - return -EINVAL; - } + virtio_update_rxtx_handler(dev, tx_conf); -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 - /* Use simple rx/tx func if single segment and no offloads */ - if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS && - !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { - PMD_INIT_LOG(INFO, "Using simple rx/tx path"); - dev->tx_pkt_burst = virtio_xmit_pkts_simple; - dev->rx_pkt_burst = virtio_recv_pkts_vec; - use_simple_rxtx = 1; - } -#endif + if (nb_desc == 0 || nb_desc > vq->vq_nentries) + nb_desc = vq->vq_nentries; + vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); - ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx, - nb_desc, socket_id, (void **)&txvq); - if (ret < 0) { - PMD_INIT_LOG(ERR, "tvq initialization failed"); - return ret; - } - vq = txvq->vq; + txvq = &vq->txq; + txvq->queue_id = queue_idx; tx_free_thresh = tx_conf->tx_free_thresh; if (tx_free_thresh == 0) @@ -552,32 +563,32 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, vq->vq_free_thresh = tx_free_thresh; - dev->data->tx_queues[queue_idx] = txvq; - return 0; -} - -void -virtio_dev_tx_queue_release(void *txq) -{ - struct virtnet_tx *txvq = txq; - struct virtqueue *vq; - const struct rte_memzone *mz; - const struct rte_memzone *hdr_mz; - - if (txvq == NULL) - return; + if (hw->use_simple_rxtx) { + uint16_t mid_idx = vq->vq_nentries >> 1; + + for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { + vq->vq_ring.avail->ring[desc_idx] = + desc_idx + mid_idx; + vq->vq_ring.desc[desc_idx + mid_idx].next = + desc_idx; + vq->vq_ring.desc[desc_idx + mid_idx].addr = + txvq->virtio_net_hdr_mem + + offsetof(struct virtio_tx_region, tx_hdr); + vq->vq_ring.desc[desc_idx + mid_idx].len = + vq->hw->vtnet_hdr_size; + vq->vq_ring.desc[desc_idx + mid_idx].flags = + VRING_DESC_F_NEXT; + vq->vq_ring.desc[desc_idx].flags = 0; + } + for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; + desc_idx++) + vq->vq_ring.avail->ring[desc_idx] = desc_idx; + } - /* - * txvq is freed when vq is freed, and as mz should be freed after the - * del_queue, so we reserve the mz pointer first. - */ - vq = txvq->vq; - mz = txvq->mz; - hdr_mz = txvq->virtio_net_hdr_mz; + VIRTQUEUE_DUMP(vq); - virtio_dev_queue_release(vq); - rte_memzone_free(mz); - rte_memzone_free(hdr_mz); + dev->data->tx_queues[queue_idx] = txvq; + return 0; } static void @@ -627,6 +638,86 @@ virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) } } +/* Optionally fill offload information in structure */ +static int +virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) +{ + struct rte_net_hdr_lens hdr_lens; + uint32_t hdrlen, ptype; + int l4_supported = 0; + + /* nothing to do */ + if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) + return 0; + + m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; + + ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); + m->packet_type = ptype; + if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) + l4_supported = 1; + + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; + if (hdr->csum_start <= hdrlen && l4_supported) { + m->ol_flags |= PKT_RX_L4_CKSUM_NONE; + } else { + /* Unknown proto or tunnel, do sw cksum. We can assume + * the cksum field is in the first segment since the + * buffers we provided to the host are large enough. + * In case of SCTP, this will be wrong since it's a CRC + * but there's nothing we can do. + */ + uint16_t csum, off; + + rte_raw_cksum_mbuf(m, hdr->csum_start, + rte_pktmbuf_pkt_len(m) - hdr->csum_start, + &csum); + if (likely(csum != 0xffff)) + csum = ~csum; + off = hdr->csum_offset + hdr->csum_start; + if (rte_pktmbuf_data_len(m) >= off + 1) + *rte_pktmbuf_mtod_offset(m, uint16_t *, + off) = csum; + } + } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { + m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } + + /* GSO request, save required information in mbuf */ + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + /* Check unsupported modes */ + if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || + (hdr->gso_size == 0)) { + return -EINVAL; + } + + /* Update mss lengthes in mbuf */ + m->tso_segsz = hdr->gso_size; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + m->ol_flags |= PKT_RX_LRO | \ + PKT_RX_L4_CKSUM_NONE; + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static inline int +rx_offload_enabled(struct virtio_hw *hw) +{ + return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || + vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || + vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); +} + #define VIRTIO_MBUF_BURST_SZ 64 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) uint16_t @@ -642,6 +733,8 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) int error; uint32_t i, nb_enqueued; uint32_t hdr_size; + int offload; + struct virtio_net_hdr *hdr; nb_used = VIRTQUEUE_NUSED(vq); @@ -659,6 +752,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) nb_rx = 0; nb_enqueued = 0; hdr_size = hw->vtnet_hdr_size; + offload = rx_offload_enabled(hw); for (i = 0; i < num ; i++) { rxm = rcv_pkts[i]; @@ -683,9 +777,18 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxm->pkt_len = (uint32_t)(len[i] - hdr_size); rxm->data_len = (uint16_t)(len[i] - hdr_size); + hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + + RTE_PKTMBUF_HEADROOM - hdr_size); + if (hw->vlan_strip) rte_vlan_strip(rxm); + if (offload && virtio_rx_offload(rxm, hdr) < 0) { + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; + continue; + } + VIRTIO_DUMP_PACKET(rxm, rxm->data_len); rx_pkts[nb_rx++] = rxm; @@ -745,6 +848,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, uint16_t extra_idx; uint32_t seg_res; uint32_t hdr_size; + int offload; nb_used = VIRTQUEUE_NUSED(vq); @@ -760,6 +864,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, extra_idx = 0; seg_res = 0; hdr_size = hw->vtnet_hdr_size; + offload = rx_offload_enabled(hw); while (i < nb_used) { struct virtio_net_hdr_mrg_rxbuf *header; @@ -805,6 +910,12 @@ virtio_recv_mergeable_pkts(void *rx_queue, rx_pkts[nb_rx] = rxm; prev = rxm; + if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; + continue; + } + seg_res = seg_num - 1; while (seg_res != 0) { @@ -925,7 +1036,8 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) } /* optimize ring usage */ - if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) && + if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || + vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && rte_mbuf_refcnt_read(txm) == 1 && RTE_MBUF_DIRECT(txm) && txm->nb_segs == 1 && diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx.h b/src/dpdk/drivers/net/virtio/virtio_rxtx.h index 058b56a1..28f82d6a 100644 --- a/src/dpdk/drivers/net/virtio/virtio_rxtx.h +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx.h @@ -86,10 +86,9 @@ struct virtnet_ctl { const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ }; -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq); int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, struct rte_mbuf *m); -#endif + #endif /* _VIRTIO_RXTX_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c index 6517aa80..b651e53b 100644 --- a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c @@ -37,8 +37,6 @@ #include #include -#include - #include #include #include @@ -53,14 +51,7 @@ #include #include -#include "virtio_logs.h" -#include "virtio_ethdev.h" -#include "virtqueue.h" -#include "virtio_rxtx.h" - -#define RTE_VIRTIO_VPMD_RX_BURST 32 -#define RTE_VIRTIO_DESC_PER_LOOP 8 -#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST +#include "virtio_rxtx_simple.h" #ifndef __INTEL_COMPILER #pragma GCC diagnostic ignored "-Wcast-qual" @@ -92,257 +83,6 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, return 0; } -static inline void -virtio_rxq_rearm_vec(struct virtnet_rx *rxvq) -{ - int i; - uint16_t desc_idx; - struct rte_mbuf **sw_ring; - struct vring_desc *start_dp; - int ret; - struct virtqueue *vq = rxvq->vq; - - desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1); - sw_ring = &vq->sw_ring[desc_idx]; - start_dp = &vq->vq_ring.desc[desc_idx]; - - ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring, - RTE_VIRTIO_VPMD_RX_REARM_THRESH); - if (unlikely(ret)) { - rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed += - RTE_VIRTIO_VPMD_RX_REARM_THRESH; - return; - } - - for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) { - uintptr_t p; - - p = (uintptr_t)&sw_ring[i]->rearm_data; - *(uint64_t *)p = rxvq->mbuf_initializer; - - start_dp[i].addr = - VIRTIO_MBUF_ADDR(sw_ring[i], vq) + - RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size; - start_dp[i].len = sw_ring[i]->buf_len - - RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; - } - - vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH; - vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH; - vq_update_avail_idx(vq); -} - -/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) - * - * This routine is for non-mergeable RX, one desc for each guest buffer. - * This routine is based on the RX ring layout optimization. Each entry in the - * avail ring points to the desc with the same index in the desc ring and this - * will never be changed in the driver. - * - * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet - */ -uint16_t -virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) -{ - struct virtnet_rx *rxvq = rx_queue; - struct virtqueue *vq = rxvq->vq; - uint16_t nb_used; - uint16_t desc_idx; - struct vring_used_elem *rused; - struct rte_mbuf **sw_ring; - struct rte_mbuf **sw_ring_end; - uint16_t nb_pkts_received; - __m128i shuf_msk1, shuf_msk2, len_adjust; - - shuf_msk1 = _mm_set_epi8( - 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, /* vlan tci */ - 5, 4, /* dat len */ - 0xFF, 0xFF, 5, 4, /* pkt len */ - 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ - - ); - - shuf_msk2 = _mm_set_epi8( - 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, /* vlan tci */ - 13, 12, /* dat len */ - 0xFF, 0xFF, 13, 12, /* pkt len */ - 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ - ); - - /* Subtract the header length. - * In which case do we need the header length in used->len ? - */ - len_adjust = _mm_set_epi16( - 0, 0, - 0, - (uint16_t)-vq->hw->vtnet_hdr_size, - 0, (uint16_t)-vq->hw->vtnet_hdr_size, - 0, 0); - - if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) - return 0; - - nb_used = VIRTQUEUE_NUSED(vq); - - rte_compiler_barrier(); - - if (unlikely(nb_used == 0)) - return 0; - - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); - nb_used = RTE_MIN(nb_used, nb_pkts); - - desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); - rused = &vq->vq_ring.used->ring[desc_idx]; - sw_ring = &vq->sw_ring[desc_idx]; - sw_ring_end = &vq->sw_ring[vq->vq_nentries]; - - _mm_prefetch((const void *)rused, _MM_HINT_T0); - - if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { - virtio_rxq_rearm_vec(rxvq); - if (unlikely(virtqueue_kick_prepare(vq))) - virtqueue_notify(vq); - } - - for (nb_pkts_received = 0; - nb_pkts_received < nb_used;) { - __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; - __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; - __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; - - mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0)); - desc[0] = _mm_loadu_si128((__m128i *)(rused + 0)); - _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]); - - mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2)); - desc[1] = _mm_loadu_si128((__m128i *)(rused + 2)); - _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]); - - mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4)); - desc[2] = _mm_loadu_si128((__m128i *)(rused + 4)); - _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]); - - mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6)); - desc[3] = _mm_loadu_si128((__m128i *)(rused + 6)); - _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]); - - pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2); - pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1); - pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust); - pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust); - _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1, - pkt_mb[1]); - _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1, - pkt_mb[0]); - - pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2); - pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1); - pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust); - pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust); - _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1, - pkt_mb[3]); - _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1, - pkt_mb[2]); - - pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2); - pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1); - pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust); - pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust); - _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1, - pkt_mb[5]); - _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1, - pkt_mb[4]); - - pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2); - pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1); - pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust); - pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust); - _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1, - pkt_mb[7]); - _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1, - pkt_mb[6]); - - if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { - if (sw_ring + nb_used <= sw_ring_end) - nb_pkts_received += nb_used; - else - nb_pkts_received += sw_ring_end - sw_ring; - break; - } else { - if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= - sw_ring_end)) { - nb_pkts_received += sw_ring_end - sw_ring; - break; - } else { - nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; - - rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; - sw_ring += RTE_VIRTIO_DESC_PER_LOOP; - rused += RTE_VIRTIO_DESC_PER_LOOP; - nb_used -= RTE_VIRTIO_DESC_PER_LOOP; - } - } - } - - vq->vq_used_cons_idx += nb_pkts_received; - vq->vq_free_cnt += nb_pkts_received; - rxvq->stats.packets += nb_pkts_received; - return nb_pkts_received; -} - -#define VIRTIO_TX_FREE_THRESH 32 -#define VIRTIO_TX_MAX_FREE_BUF_SZ 32 -#define VIRTIO_TX_FREE_NR 32 -/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */ -static inline void -virtio_xmit_cleanup(struct virtqueue *vq) -{ - uint16_t i, desc_idx; - uint32_t nb_free = 0; - struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ]; - - desc_idx = (uint16_t)(vq->vq_used_cons_idx & - ((vq->vq_nentries >> 1) - 1)); - m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; - m = __rte_pktmbuf_prefree_seg(m); - if (likely(m != NULL)) { - free[0] = m; - nb_free = 1; - for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { - m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; - m = __rte_pktmbuf_prefree_seg(m); - if (likely(m != NULL)) { - if (likely(m->pool == free[0]->pool)) - free[nb_free++] = m; - else { - rte_mempool_put_bulk(free[0]->pool, - (void **)free, - RTE_MIN(RTE_DIM(free), - nb_free)); - free[0] = m; - nb_free = 1; - } - } - } - rte_mempool_put_bulk(free[0]->pool, (void **)free, - RTE_MIN(RTE_DIM(free), nb_free)); - } else { - for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { - m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; - m = __rte_pktmbuf_prefree_seg(m); - if (m != NULL) - rte_mempool_put(m->pool, m); - } - } - - vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR; - vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1); -} - uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) @@ -423,3 +163,13 @@ virtio_rxq_vec_setup(struct virtnet_rx *rxq) return 0; } + +/* Stub for linkage when arch specific implementation is not available */ +uint16_t __attribute__((weak)) +virtio_recv_pkts_vec(void *rx_queue __rte_unused, + struct rte_mbuf **rx_pkts __rte_unused, + uint16_t nb_pkts __rte_unused) +{ + rte_panic("Wrong weak function linked by linker\n"); + return 0; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h new file mode 100644 index 00000000..b08f8594 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h @@ -0,0 +1,136 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_RXTX_SIMPLE_H_ +#define _VIRTIO_RXTX_SIMPLE_H_ + +#include + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" + +#define RTE_VIRTIO_VPMD_RX_BURST 32 +#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST + +static inline void +virtio_rxq_rearm_vec(struct virtnet_rx *rxvq) +{ + int i; + uint16_t desc_idx; + struct rte_mbuf **sw_ring; + struct vring_desc *start_dp; + int ret; + struct virtqueue *vq = rxvq->vq; + + desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1); + sw_ring = &vq->sw_ring[desc_idx]; + start_dp = &vq->vq_ring.desc[desc_idx]; + + ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring, + RTE_VIRTIO_VPMD_RX_REARM_THRESH); + if (unlikely(ret)) { + rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed += + RTE_VIRTIO_VPMD_RX_REARM_THRESH; + return; + } + + for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) { + uintptr_t p; + + p = (uintptr_t)&sw_ring[i]->rearm_data; + *(uint64_t *)p = rxvq->mbuf_initializer; + + start_dp[i].addr = + VIRTIO_MBUF_ADDR(sw_ring[i], vq) + + RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size; + start_dp[i].len = sw_ring[i]->buf_len - + RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; + } + + vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH; + vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH; + vq_update_avail_idx(vq); +} + +#define VIRTIO_TX_FREE_THRESH 32 +#define VIRTIO_TX_MAX_FREE_BUF_SZ 32 +#define VIRTIO_TX_FREE_NR 32 +/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */ +static inline void +virtio_xmit_cleanup(struct virtqueue *vq) +{ + uint16_t i, desc_idx; + uint32_t nb_free = 0; + struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ]; + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & + ((vq->vq_nentries >> 1) - 1)); + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (likely(m != NULL)) { + free[0] = m; + nb_free = 1; + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (likely(m != NULL)) { + if (likely(m->pool == free[0]->pool)) + free[nb_free++] = m; + else { + rte_mempool_put_bulk(free[0]->pool, + (void **)free, + RTE_MIN(RTE_DIM(free), + nb_free)); + free[0] = m; + nb_free = 1; + } + } + } + rte_mempool_put_bulk(free[0]->pool, (void **)free, + RTE_MIN(RTE_DIM(free), nb_free)); + } else { + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (m != NULL) + rte_mempool_put(m->pool, m); + } + } + + vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR; + vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1); +} + +#endif /* _VIRTIO_RXTX_SIMPLE_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c new file mode 100644 index 00000000..793eefbe --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c @@ -0,0 +1,235 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virtio_rxtx_simple.h" + +#define RTE_VIRTIO_VPMD_RX_BURST 32 +#define RTE_VIRTIO_DESC_PER_LOOP 8 +#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST + +/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) + * + * This routine is for non-mergeable RX, one desc for each guest buffer. + * This routine is based on the RX ring layout optimization. Each entry in the + * avail ring points to the desc with the same index in the desc ring and this + * will never be changed in the driver. + * + * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet + */ +uint16_t +virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; + uint16_t nb_used; + uint16_t desc_idx; + struct vring_used_elem *rused; + struct rte_mbuf **sw_ring; + struct rte_mbuf **sw_ring_end; + uint16_t nb_pkts_received; + + uint8x16_t shuf_msk1 = { + 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */ + 4, 5, 0xFF, 0xFF, /* pkt len */ + 4, 5, /* dat len */ + 0xFF, 0xFF, /* vlan tci */ + 0xFF, 0xFF, 0xFF, 0xFF + }; + + uint8x16_t shuf_msk2 = { + 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */ + 12, 13, 0xFF, 0xFF, /* pkt len */ + 12, 13, /* dat len */ + 0xFF, 0xFF, /* vlan tci */ + 0xFF, 0xFF, 0xFF, 0xFF + }; + + /* Subtract the header length. + * In which case do we need the header length in used->len ? + */ + uint16x8_t len_adjust = { + 0, 0, + (uint16_t)vq->hw->vtnet_hdr_size, 0, + (uint16_t)vq->hw->vtnet_hdr_size, + 0, + 0, 0 + }; + + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + + nb_used = VIRTQUEUE_NUSED(vq); + + rte_rmb(); + + if (unlikely(nb_used == 0)) + return 0; + + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); + nb_used = RTE_MIN(nb_used, nb_pkts); + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + rused = &vq->vq_ring.used->ring[desc_idx]; + sw_ring = &vq->sw_ring[desc_idx]; + sw_ring_end = &vq->sw_ring[vq->vq_nentries]; + + rte_prefetch_non_temporal(rused); + + if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { + virtio_rxq_rearm_vec(rxvq); + if (unlikely(virtqueue_kick_prepare(vq))) + virtqueue_notify(vq); + } + + for (nb_pkts_received = 0; + nb_pkts_received < nb_used;) { + uint64x2_t desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; + uint64x2_t mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; + uint64x2_t pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; + + mbp[0] = vld1q_u64((uint64_t *)(sw_ring + 0)); + desc[0] = vld1q_u64((uint64_t *)(rused + 0)); + vst1q_u64((uint64_t *)&rx_pkts[0], mbp[0]); + + mbp[1] = vld1q_u64((uint64_t *)(sw_ring + 2)); + desc[1] = vld1q_u64((uint64_t *)(rused + 2)); + vst1q_u64((uint64_t *)&rx_pkts[2], mbp[1]); + + mbp[2] = vld1q_u64((uint64_t *)(sw_ring + 4)); + desc[2] = vld1q_u64((uint64_t *)(rused + 4)); + vst1q_u64((uint64_t *)&rx_pkts[4], mbp[2]); + + mbp[3] = vld1q_u64((uint64_t *)(sw_ring + 6)); + desc[3] = vld1q_u64((uint64_t *)(rused + 6)); + vst1q_u64((uint64_t *)&rx_pkts[6], mbp[3]); + + pkt_mb[1] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[0]), shuf_msk2)); + pkt_mb[0] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[0]), shuf_msk1)); + pkt_mb[1] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[1]), len_adjust)); + pkt_mb[0] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[0]), len_adjust)); + vst1q_u64((void *)&rx_pkts[1]->rx_descriptor_fields1, + pkt_mb[1]); + vst1q_u64((void *)&rx_pkts[0]->rx_descriptor_fields1, + pkt_mb[0]); + + pkt_mb[3] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[1]), shuf_msk2)); + pkt_mb[2] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[1]), shuf_msk1)); + pkt_mb[3] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[3]), len_adjust)); + pkt_mb[2] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[2]), len_adjust)); + vst1q_u64((void *)&rx_pkts[3]->rx_descriptor_fields1, + pkt_mb[3]); + vst1q_u64((void *)&rx_pkts[2]->rx_descriptor_fields1, + pkt_mb[2]); + + pkt_mb[5] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[2]), shuf_msk2)); + pkt_mb[4] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[2]), shuf_msk1)); + pkt_mb[5] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[5]), len_adjust)); + pkt_mb[4] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[4]), len_adjust)); + vst1q_u64((void *)&rx_pkts[5]->rx_descriptor_fields1, + pkt_mb[5]); + vst1q_u64((void *)&rx_pkts[4]->rx_descriptor_fields1, + pkt_mb[4]); + + pkt_mb[7] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[3]), shuf_msk2)); + pkt_mb[6] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[3]), shuf_msk1)); + pkt_mb[7] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[7]), len_adjust)); + pkt_mb[6] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[6]), len_adjust)); + vst1q_u64((void *)&rx_pkts[7]->rx_descriptor_fields1, + pkt_mb[7]); + vst1q_u64((void *)&rx_pkts[6]->rx_descriptor_fields1, + pkt_mb[6]); + + if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { + if (sw_ring + nb_used <= sw_ring_end) + nb_pkts_received += nb_used; + else + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= + sw_ring_end)) { + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; + + rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; + sw_ring += RTE_VIRTIO_DESC_PER_LOOP; + rused += RTE_VIRTIO_DESC_PER_LOOP; + nb_used -= RTE_VIRTIO_DESC_PER_LOOP; + } + } + } + + vq->vq_used_cons_idx += nb_pkts_received; + vq->vq_free_cnt += nb_pkts_received; + rxvq->stats.packets += nb_pkts_received; + return nb_pkts_received; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c new file mode 100644 index 00000000..87bb5c63 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virtio_rxtx_simple.h" + +#define RTE_VIRTIO_VPMD_RX_BURST 32 +#define RTE_VIRTIO_DESC_PER_LOOP 8 +#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST + +/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) + * + * This routine is for non-mergeable RX, one desc for each guest buffer. + * This routine is based on the RX ring layout optimization. Each entry in the + * avail ring points to the desc with the same index in the desc ring and this + * will never be changed in the driver. + * + * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet + */ +uint16_t +virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; + uint16_t nb_used; + uint16_t desc_idx; + struct vring_used_elem *rused; + struct rte_mbuf **sw_ring; + struct rte_mbuf **sw_ring_end; + uint16_t nb_pkts_received; + __m128i shuf_msk1, shuf_msk2, len_adjust; + + shuf_msk1 = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, /* vlan tci */ + 5, 4, /* dat len */ + 0xFF, 0xFF, 5, 4, /* pkt len */ + 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ + + ); + + shuf_msk2 = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, /* vlan tci */ + 13, 12, /* dat len */ + 0xFF, 0xFF, 13, 12, /* pkt len */ + 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ + ); + + /* Subtract the header length. + * In which case do we need the header length in used->len ? + */ + len_adjust = _mm_set_epi16( + 0, 0, + 0, + (uint16_t)-vq->hw->vtnet_hdr_size, + 0, (uint16_t)-vq->hw->vtnet_hdr_size, + 0, 0); + + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + + nb_used = VIRTQUEUE_NUSED(vq); + + rte_compiler_barrier(); + + if (unlikely(nb_used == 0)) + return 0; + + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); + nb_used = RTE_MIN(nb_used, nb_pkts); + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + rused = &vq->vq_ring.used->ring[desc_idx]; + sw_ring = &vq->sw_ring[desc_idx]; + sw_ring_end = &vq->sw_ring[vq->vq_nentries]; + + rte_prefetch0(rused); + + if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { + virtio_rxq_rearm_vec(rxvq); + if (unlikely(virtqueue_kick_prepare(vq))) + virtqueue_notify(vq); + } + + for (nb_pkts_received = 0; + nb_pkts_received < nb_used;) { + __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; + __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; + __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; + + mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0)); + desc[0] = _mm_loadu_si128((__m128i *)(rused + 0)); + _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]); + + mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2)); + desc[1] = _mm_loadu_si128((__m128i *)(rused + 2)); + _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]); + + mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4)); + desc[2] = _mm_loadu_si128((__m128i *)(rused + 4)); + _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]); + + mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6)); + desc[3] = _mm_loadu_si128((__m128i *)(rused + 6)); + _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]); + + pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2); + pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1); + pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust); + pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1, + pkt_mb[1]); + _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1, + pkt_mb[0]); + + pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2); + pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1); + pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust); + pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1, + pkt_mb[3]); + _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1, + pkt_mb[2]); + + pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2); + pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1); + pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust); + pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1, + pkt_mb[5]); + _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1, + pkt_mb[4]); + + pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2); + pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1); + pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust); + pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1, + pkt_mb[7]); + _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1, + pkt_mb[6]); + + if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { + if (sw_ring + nb_used <= sw_ring_end) + nb_pkts_received += nb_used; + else + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= + sw_ring_end)) { + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; + + rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; + sw_ring += RTE_VIRTIO_DESC_PER_LOOP; + rused += RTE_VIRTIO_DESC_PER_LOOP; + nb_used -= RTE_VIRTIO_DESC_PER_LOOP; + } + } + } + + vq->vq_used_cons_idx += nb_pkts_received; + vq->vq_free_cnt += nb_pkts_received; + rxvq->stats.packets += nb_pkts_received; + return nb_pkts_received; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost.h b/src/dpdk/drivers/net/virtio/virtio_user/vhost.h new file mode 100644 index 00000000..5c983bd4 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost.h @@ -0,0 +1,123 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_USER_H +#define _VHOST_NET_USER_H + +#include +#include +#include + +#include "../virtio_pci.h" +#include "../virtio_logs.h" +#include "../virtqueue.h" + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + uint64_t desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + uint64_t used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + uint64_t avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. + */ + uint64_t log_guest_addr; +}; + +enum vhost_user_request { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_MAX +}; + +const char * const vhost_msg_strings[VHOST_USER_MAX]; + +struct vhost_memory_region { + uint64_t guest_phys_addr; + uint64_t memory_size; /* bytes */ + uint64_t userspace_addr; + uint64_t mmap_offset; +}; + +struct virtio_user_dev; + +struct virtio_user_backend_ops { + int (*setup)(struct virtio_user_dev *dev); + int (*send_request)(struct virtio_user_dev *dev, + enum vhost_user_request req, + void *arg); + int (*enable_qp)(struct virtio_user_dev *dev, + uint16_t pair_idx, + int enable); +}; + +struct virtio_user_backend_ops ops_user; +struct virtio_user_backend_ops ops_kernel; + +#endif diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c new file mode 100644 index 00000000..05aa6c6d --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c @@ -0,0 +1,403 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "vhost_kernel_tap.h" + +struct vhost_memory_kernel { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[0]; +}; + +/* vhost kernel ioctls */ +#define VHOST_VIRTIO 0xAF +#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) +#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) +#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) +#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) +#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) +#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) +#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) +#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) +#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) +#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) +#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) + +static uint64_t max_regions = 64; + +static void +get_vhost_kernel_max_regions(void) +{ + int fd; + char buf[20] = {'\0'}; + + fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); + if (fd < 0) + return; + + if (read(fd, buf, sizeof(buf) - 1) > 0) + max_regions = strtoull(buf, NULL, 10); + + close(fd); +} + +static uint64_t vhost_req_user_to_kernel[] = { + [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, + [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, + [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, + [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, + [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, + [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, + [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, + [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, + [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, + [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, + [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, +}; + +/* By default, vhost kernel module allows 64 regions, but DPDK allows + * 256 segments. As a relief, below function merges those virtually + * adjacent memsegs into one region. + */ +static struct vhost_memory_kernel * +prepare_vhost_memory_kernel(void) +{ + uint32_t i, j, k = 0; + struct rte_memseg *seg; + struct vhost_memory_region *mr; + struct vhost_memory_kernel *vm; + + vm = malloc(sizeof(struct vhost_memory_kernel) + + max_regions * + sizeof(struct vhost_memory_region)); + if (!vm) + return NULL; + + for (i = 0; i < RTE_MAX_MEMSEG; ++i) { + seg = &rte_eal_get_configuration()->mem_config->memseg[i]; + if (!seg->addr) + break; + + int new_region = 1; + + for (j = 0; j < k; ++j) { + mr = &vm->regions[j]; + + if (mr->userspace_addr + mr->memory_size == + (uint64_t)(uintptr_t)seg->addr) { + mr->memory_size += seg->len; + new_region = 0; + break; + } + + if ((uint64_t)(uintptr_t)seg->addr + seg->len == + mr->userspace_addr) { + mr->guest_phys_addr = + (uint64_t)(uintptr_t)seg->addr; + mr->userspace_addr = + (uint64_t)(uintptr_t)seg->addr; + mr->memory_size += seg->len; + new_region = 0; + break; + } + } + + if (new_region == 0) + continue; + + mr = &vm->regions[k++]; + /* use vaddr here! */ + mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr; + mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr; + mr->memory_size = seg->len; + mr->mmap_offset = 0; + + if (k >= max_regions) { + free(vm); + return NULL; + } + } + + vm->nregions = k; + vm->padding = 0; + return vm; +} + +/* with below features, vhost kernel does not need to do the checksum and TSO, + * these info will be passed to virtio_user through virtio net header. + */ +#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + +/* with below features, when flows from virtio_user to vhost kernel + * (1) if flows goes up through the kernel networking stack, it does not need + * to verify checksum, which can save CPU cycles; + * (2) if flows goes through a Linux bridge and outside from an interface + * (kernel driver), checksum and TSO will be done by GSO in kernel or even + * offloaded into real physical device. + */ +#define VHOST_KERNEL_HOST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ + (1ULL << VIRTIO_NET_F_CSUM)) + +static int +tap_supporte_mq(void) +{ + int tapfd; + unsigned int tap_features; + + tapfd = open(PATH_NET_TUN, O_RDWR); + if (tapfd < 0) { + PMD_DRV_LOG(ERR, "fail to open %s: %s", + PATH_NET_TUN, strerror(errno)); + return -1; + } + + if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { + PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); + close(tapfd); + return -1; + } + + close(tapfd); + return tap_features & IFF_MULTI_QUEUE; +} + +static int +vhost_kernel_ioctl(struct virtio_user_dev *dev, + enum vhost_user_request req, + void *arg) +{ + int ret = -1; + unsigned int i; + uint64_t req_kernel; + struct vhost_memory_kernel *vm = NULL; + int vhostfd; + unsigned int queue_sel; + + PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); + + req_kernel = vhost_req_user_to_kernel[req]; + + if (req_kernel == VHOST_SET_MEM_TABLE) { + vm = prepare_vhost_memory_kernel(); + if (!vm) + return -1; + arg = (void *)vm; + } + + if (req_kernel == VHOST_SET_FEATURES) { + /* We don't need memory protection here */ + *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); + + /* VHOST kernel does not know about below flags */ + *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; + + *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); + } + + switch (req_kernel) { + case VHOST_SET_VRING_NUM: + case VHOST_SET_VRING_ADDR: + case VHOST_SET_VRING_BASE: + case VHOST_GET_VRING_BASE: + case VHOST_SET_VRING_KICK: + case VHOST_SET_VRING_CALL: + queue_sel = *(unsigned int *)arg; + vhostfd = dev->vhostfds[queue_sel / 2]; + *(unsigned int *)arg = queue_sel % 2; + PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", + vhostfd, *(unsigned int *)arg); + break; + default: + vhostfd = -1; + } + if (vhostfd == -1) { + for (i = 0; i < dev->max_queue_pairs; ++i) { + if (dev->vhostfds[i] < 0) + continue; + + ret = ioctl(dev->vhostfds[i], req_kernel, arg); + if (ret < 0) + break; + } + } else { + ret = ioctl(vhostfd, req_kernel, arg); + } + + if (!ret && req_kernel == VHOST_GET_FEATURES) { + /* with tap as the backend, all these features are supported + * but not claimed by vhost-net, so we add them back when + * reporting to upper layer. + */ + *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; + + /* vhost_kernel will not declare this feature, but it does + * support multi-queue. + */ + if (tap_supporte_mq()) + *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); + } + + if (vm) + free(vm); + + if (ret < 0) + PMD_DRV_LOG(ERR, "%s failed: %s", + vhost_msg_strings[req], strerror(errno)); + + return ret; +} + +/** + * Set up environment to talk with a vhost kernel backend. + * + * @return + * - (-1) if fail to set up; + * - (>=0) if successful. + */ +static int +vhost_kernel_setup(struct virtio_user_dev *dev) +{ + int vhostfd; + uint32_t i; + + get_vhost_kernel_max_regions(); + + for (i = 0; i < dev->max_queue_pairs; ++i) { + vhostfd = open(dev->path, O_RDWR); + if (vhostfd < 0) { + PMD_DRV_LOG(ERR, "fail to open %s, %s", + dev->path, strerror(errno)); + return -1; + } + + dev->vhostfds[i] = vhostfd; + } + + return 0; +} + +static int +vhost_kernel_set_backend(int vhostfd, int tapfd) +{ + struct vhost_vring_file f; + + f.fd = tapfd; + f.index = 0; + if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { + PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", + strerror(errno)); + return -1; + } + + f.index = 1; + if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { + PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", + strerror(errno)); + return -1; + } + + return 0; +} + +static int +vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, + uint16_t pair_idx, + int enable) +{ + int hdr_size; + int vhostfd; + int tapfd; + int req_mq = (dev->max_queue_pairs > 1); + + vhostfd = dev->vhostfds[pair_idx]; + + if (!enable) { + if (dev->tapfds[pair_idx]) { + close(dev->tapfds[pair_idx]); + dev->tapfds[pair_idx] = -1; + } + return vhost_kernel_set_backend(vhostfd, -1); + } else if (dev->tapfds[pair_idx] >= 0) { + return 0; + } + + if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || + (dev->features & (1ULL << VIRTIO_F_VERSION_1))) + hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + hdr_size = sizeof(struct virtio_net_hdr); + + tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq); + if (tapfd < 0) { + PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); + return -1; + } + + if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { + PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); + close(tapfd); + return -1; + } + + dev->tapfds[pair_idx] = tapfd; + return 0; +} + +struct virtio_user_backend_ops ops_kernel = { + .setup = vhost_kernel_setup, + .send_request = vhost_kernel_ioctl, + .enable_qp = vhost_kernel_enable_queue_pair +}; diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c new file mode 100644 index 00000000..f585de8c --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c @@ -0,0 +1,133 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vhost_kernel_tap.h" +#include "../virtio_logs.h" + +int +vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq) +{ + unsigned int tap_features; + int sndbuf = INT_MAX; + struct ifreq ifr; + int tapfd; + unsigned int offload = + TUN_F_CSUM | + TUN_F_TSO4 | + TUN_F_TSO6 | + TUN_F_TSO_ECN | + TUN_F_UFO; + + /* TODO: + * 1. verify we can get/set vnet_hdr_len, tap_probe_vnet_hdr_len + * 2. get number of memory regions from vhost module parameter + * max_mem_regions, supported in newer version linux kernel + */ + tapfd = open(PATH_NET_TUN, O_RDWR); + if (tapfd < 0) { + PMD_DRV_LOG(ERR, "fail to open %s: %s", + PATH_NET_TUN, strerror(errno)); + return -1; + } + + /* Construct ifr */ + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { + PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); + goto error; + } + if (tap_features & IFF_ONE_QUEUE) + ifr.ifr_flags |= IFF_ONE_QUEUE; + + /* Let tap instead of vhost-net handle vnet header, as the latter does + * not support offloading. And in this case, we should not set feature + * bit VHOST_NET_F_VIRTIO_NET_HDR. + */ + if (tap_features & IFF_VNET_HDR) { + ifr.ifr_flags |= IFF_VNET_HDR; + } else { + PMD_DRV_LOG(ERR, "TAP does not support IFF_VNET_HDR"); + goto error; + } + + if (req_mq) + ifr.ifr_flags |= IFF_MULTI_QUEUE; + + if (*p_ifname) + strncpy(ifr.ifr_name, *p_ifname, IFNAMSIZ); + else + strncpy(ifr.ifr_name, "tap%d", IFNAMSIZ); + if (ioctl(tapfd, TUNSETIFF, (void *)&ifr) == -1) { + PMD_DRV_LOG(ERR, "TUNSETIFF failed: %s", strerror(errno)); + goto error; + } + + fcntl(tapfd, F_SETFL, O_NONBLOCK); + + if (ioctl(tapfd, TUNSETVNETHDRSZ, &hdr_size) < 0) { + PMD_DRV_LOG(ERR, "TUNSETVNETHDRSZ failed: %s", strerror(errno)); + goto error; + } + + if (ioctl(tapfd, TUNSETSNDBUF, &sndbuf) < 0) { + PMD_DRV_LOG(ERR, "TUNSETSNDBUF failed: %s", strerror(errno)); + goto error; + } + + /* TODO: before set the offload capabilities, we'd better (1) check + * negotiated features to see if necessary to offload; (2) query tap + * to see if it supports the offload capabilities. + */ + if (ioctl(tapfd, TUNSETOFFLOAD, offload) != 0) + PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s", + strerror(errno)); + + if (!(*p_ifname)) + *p_ifname = strdup(ifr.ifr_name); + + return tapfd; +error: + close(tapfd); + return -1; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h new file mode 100644 index 00000000..eae340cc --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +/* TUN ioctls */ +#define TUNSETIFF _IOW('T', 202, int) +#define TUNGETFEATURES _IOR('T', 207, unsigned int) +#define TUNSETOFFLOAD _IOW('T', 208, unsigned int) +#define TUNGETIFF _IOR('T', 210, unsigned int) +#define TUNSETSNDBUF _IOW('T', 212, int) +#define TUNGETVNETHDRSZ _IOR('T', 215, int) +#define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) +#define TUNSETVNETLE _IOW('T', 220, int) +#define TUNSETVNETBE _IOW('T', 222, int) + +/* TUNSETIFF ifr flags */ +#define IFF_TAP 0x0002 +#define IFF_NO_PI 0x1000 +#define IFF_ONE_QUEUE 0x2000 +#define IFF_VNET_HDR 0x4000 +#define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 + +/* Features for GSO (TUNSETOFFLOAD). */ +#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ +#define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */ +#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */ +#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ +#define TUN_F_UFO 0x10 /* I can handle UFO packets */ + +/* Constants */ +#define PATH_NET_TUN "/dev/net/tun" + +int vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq); diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c b/src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c new file mode 100644 index 00000000..4ad7b21b --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c @@ -0,0 +1,467 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vhost.h" +#include "virtio_user_dev.h" + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION 0x1 + +#define VHOST_MEMORY_MAX_NREGIONS 8 +struct vhost_memory { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS]; +}; + +struct vhost_user_msg { + enum vhost_user_request request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + struct vhost_memory memory; + } payload; + int fds[VHOST_MEMORY_MAX_NREGIONS]; +} __attribute((packed)); + +#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) +#define VHOST_USER_PAYLOAD_SIZE \ + (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) + +static int +vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) +{ + int r; + struct msghdr msgh; + struct iovec iov; + size_t fd_size = fd_num * sizeof(int); + char control[CMSG_SPACE(fd_size)]; + struct cmsghdr *cmsg; + + memset(&msgh, 0, sizeof(msgh)); + memset(control, 0, sizeof(control)); + + iov.iov_base = (uint8_t *)buf; + iov.iov_len = len; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_len = CMSG_LEN(fd_size); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, fd_size); + + do { + r = sendmsg(fd, &msgh, 0); + } while (r < 0 && errno == EINTR); + + return r; +} + +static int +vhost_user_read(int fd, struct vhost_user_msg *msg) +{ + uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; + int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; + + ret = recv(fd, (void *)msg, sz_hdr, 0); + if (ret < sz_hdr) { + PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.", + ret, sz_hdr); + goto fail; + } + + /* validate msg flags */ + if (msg->flags != (valid_flags)) { + PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.", + msg->flags, valid_flags); + goto fail; + } + + sz_payload = msg->size; + if (sz_payload) { + ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); + if (ret < sz_payload) { + PMD_DRV_LOG(ERR, + "Failed to recv msg payload: %d instead of %d.", + ret, msg->size); + goto fail; + } + } + + return 0; + +fail: + return -1; +} + +struct hugepage_file_info { + uint64_t addr; /**< virtual addr */ + size_t size; /**< the file size */ + char path[PATH_MAX]; /**< path to backing file */ +}; + +/* Two possible options: + * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file + * array. This is simple but cannot be used in secondary process because + * secondary process will close and munmap that file. + * 2. Match HUGEFILE_FMT to find hugepage files directly. + * + * We choose option 2. + */ +static int +get_hugepage_file_info(struct hugepage_file_info huges[], int max) +{ + int idx; + FILE *f; + char buf[BUFSIZ], *tmp, *tail; + char *str_underline, *str_start; + int huge_index; + uint64_t v_start, v_end; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + PMD_DRV_LOG(ERR, "cannot open /proc/self/maps"); + return -1; + } + + idx = 0; + while (fgets(buf, sizeof(buf), f) != NULL) { + if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { + PMD_DRV_LOG(ERR, "Failed to parse address"); + goto error; + } + + tmp = strchr(buf, ' ') + 1; /** skip address */ + tmp = strchr(tmp, ' ') + 1; /** skip perm */ + tmp = strchr(tmp, ' ') + 1; /** skip offset */ + tmp = strchr(tmp, ' ') + 1; /** skip dev */ + tmp = strchr(tmp, ' ') + 1; /** skip inode */ + while (*tmp == ' ') /** skip spaces */ + tmp++; + tail = strrchr(tmp, '\n'); /** remove newline if exists */ + if (tail) + *tail = '\0'; + + /* Match HUGEFILE_FMT, aka "%s/%smap_%d", + * which is defined in eal_filesystem.h + */ + str_underline = strrchr(tmp, '_'); + if (!str_underline) + continue; + + str_start = str_underline - strlen("map"); + if (str_start < tmp) + continue; + + if (sscanf(str_start, "map_%d", &huge_index) != 1) + continue; + + if (idx >= max) { + PMD_DRV_LOG(ERR, "Exceed maximum of %d", max); + goto error; + } + huges[idx].addr = v_start; + huges[idx].size = v_end - v_start; + snprintf(huges[idx].path, PATH_MAX, "%s", tmp); + idx++; + } + + fclose(f); + return idx; + +error: + fclose(f); + return -1; +} + +static int +prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) +{ + int i, num; + struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS]; + struct vhost_memory_region *mr; + + num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS); + if (num < 0) { + PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user"); + return -1; + } + + for (i = 0; i < num; ++i) { + mr = &msg->payload.memory.regions[i]; + mr->guest_phys_addr = huges[i].addr; /* use vaddr! */ + mr->userspace_addr = huges[i].addr; + mr->memory_size = huges[i].size; + mr->mmap_offset = 0; + fds[i] = open(huges[i].path, O_RDWR); + } + + msg->payload.memory.nregions = num; + msg->payload.memory.padding = 0; + + return 0; +} + +static struct vhost_user_msg m; + +const char * const vhost_msg_strings[] = { + [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER", + [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER", + [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES", + [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES", + [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL", + [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM", + [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE", + [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE", + [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR", + [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK", + [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE", +}; + +static int +vhost_user_sock(struct virtio_user_dev *dev, + enum vhost_user_request req, + void *arg) +{ + struct vhost_user_msg msg; + struct vhost_vring_file *file = 0; + int need_reply = 0; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fd_num = 0; + int i, len; + int vhostfd = dev->vhostfd; + + RTE_SET_USED(m); + + PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); + + msg.request = req; + msg.flags = VHOST_USER_VERSION; + msg.size = 0; + + switch (req) { + case VHOST_USER_GET_FEATURES: + need_reply = 1; + break; + + case VHOST_USER_SET_FEATURES: + case VHOST_USER_SET_LOG_BASE: + msg.payload.u64 = *((__u64 *)arg); + msg.size = sizeof(m.payload.u64); + break; + + case VHOST_USER_SET_OWNER: + case VHOST_USER_RESET_OWNER: + break; + + case VHOST_USER_SET_MEM_TABLE: + if (prepare_vhost_memory_user(&msg, fds) < 0) + return -1; + fd_num = msg.payload.memory.nregions; + msg.size = sizeof(m.payload.memory.nregions); + msg.size += sizeof(m.payload.memory.padding); + msg.size += fd_num * sizeof(struct vhost_memory_region); + break; + + case VHOST_USER_SET_LOG_FD: + fds[fd_num++] = *((int *)arg); + break; + + case VHOST_USER_SET_VRING_NUM: + case VHOST_USER_SET_VRING_BASE: + case VHOST_USER_SET_VRING_ENABLE: + memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); + msg.size = sizeof(m.payload.state); + break; + + case VHOST_USER_GET_VRING_BASE: + memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); + msg.size = sizeof(m.payload.state); + need_reply = 1; + break; + + case VHOST_USER_SET_VRING_ADDR: + memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); + msg.size = sizeof(m.payload.addr); + break; + + case VHOST_USER_SET_VRING_KICK: + case VHOST_USER_SET_VRING_CALL: + case VHOST_USER_SET_VRING_ERR: + file = arg; + msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; + msg.size = sizeof(m.payload.u64); + if (file->fd > 0) + fds[fd_num++] = file->fd; + else + msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; + break; + + default: + PMD_DRV_LOG(ERR, "trying to send unhandled msg type"); + return -1; + } + + len = VHOST_USER_HDR_SIZE + msg.size; + if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) { + PMD_DRV_LOG(ERR, "%s failed: %s", + vhost_msg_strings[req], strerror(errno)); + return -1; + } + + if (req == VHOST_USER_SET_MEM_TABLE) + for (i = 0; i < fd_num; ++i) + close(fds[i]); + + if (need_reply) { + if (vhost_user_read(vhostfd, &msg) < 0) { + PMD_DRV_LOG(ERR, "Received msg failed: %s", + strerror(errno)); + return -1; + } + + if (req != msg.request) { + PMD_DRV_LOG(ERR, "Received unexpected msg type"); + return -1; + } + + switch (req) { + case VHOST_USER_GET_FEATURES: + if (msg.size != sizeof(m.payload.u64)) { + PMD_DRV_LOG(ERR, "Received bad msg size"); + return -1; + } + *((__u64 *)arg) = msg.payload.u64; + break; + case VHOST_USER_GET_VRING_BASE: + if (msg.size != sizeof(m.payload.state)) { + PMD_DRV_LOG(ERR, "Received bad msg size"); + return -1; + } + memcpy(arg, &msg.payload.state, + sizeof(struct vhost_vring_state)); + break; + default: + PMD_DRV_LOG(ERR, "Received unexpected msg type"); + return -1; + } + } + + return 0; +} + +/** + * Set up environment to talk with a vhost user backend. + * + * @return + * - (-1) if fail; + * - (0) if succeed. + */ +static int +vhost_user_setup(struct virtio_user_dev *dev) +{ + int fd; + int flag; + struct sockaddr_un un; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno)); + return -1; + } + + flag = fcntl(fd, F_GETFD); + if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) + PMD_DRV_LOG(WARNING, "fcntl failed, %s", strerror(errno)); + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path); + if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { + PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno)); + close(fd); + return -1; + } + + dev->vhostfd = fd; + return 0; +} + +static int +vhost_user_enable_queue_pair(struct virtio_user_dev *dev, + uint16_t pair_idx, + int enable) +{ + int i; + + for (i = 0; i < 2; ++i) { + struct vhost_vring_state state = { + .index = pair_idx * 2 + i, + .num = enable, + }; + + if (vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state)) + return -1; + } + + return 0; +} + +struct virtio_user_backend_ops ops_user = { + .setup = vhost_user_setup, + .send_request = vhost_user_sock, + .enable_qp = vhost_user_enable_queue_pair +}; diff --git a/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c new file mode 100644 index 00000000..21ed00d7 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -0,0 +1,414 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "../virtio_ethdev.h" + +static int +virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come + * firstly because vhost depends on this msg to allocate virtqueue + * pair. + */ + int callfd; + struct vhost_vring_file file; + + /* May use invalid flag, but some backend leverages kickfd and callfd as + * criteria to judge if dev is alive. so finally we use real event_fd. + */ + callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (callfd < 0) { + PMD_DRV_LOG(ERR, "callfd error, %s", strerror(errno)); + return -1; + } + file.index = queue_sel; + file.fd = callfd; + dev->ops->send_request(dev, VHOST_USER_SET_VRING_CALL, &file); + dev->callfds[queue_sel] = callfd; + + return 0; +} + +static int +virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + int kickfd; + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vring *vring = &dev->vrings[queue_sel]; + struct vhost_vring_addr addr = { + .index = queue_sel, + .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, + .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, + .used_user_addr = (uint64_t)(uintptr_t)vring->used, + .log_guest_addr = 0, + .flags = 0, /* disable log */ + }; + + state.index = queue_sel; + state.num = vring->num; + dev->ops->send_request(dev, VHOST_USER_SET_VRING_NUM, &state); + + state.index = queue_sel; + state.num = 0; /* no reservation */ + dev->ops->send_request(dev, VHOST_USER_SET_VRING_BASE, &state); + + dev->ops->send_request(dev, VHOST_USER_SET_VRING_ADDR, &addr); + + /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes + * lastly because vhost depends on this msg to judge if + * virtio is ready. + */ + kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (kickfd < 0) { + PMD_DRV_LOG(ERR, "kickfd error, %s", strerror(errno)); + return -1; + } + file.index = queue_sel; + file.fd = kickfd; + dev->ops->send_request(dev, VHOST_USER_SET_VRING_KICK, &file); + dev->kickfds[queue_sel] = kickfd; + + return 0; +} + +static int +virtio_user_queue_setup(struct virtio_user_dev *dev, + int (*fn)(struct virtio_user_dev *, uint32_t)) +{ + uint32_t i, queue_sel; + + for (i = 0; i < dev->max_queue_pairs; ++i) { + queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX; + if (fn(dev, queue_sel) < 0) { + PMD_DRV_LOG(INFO, "setup rx vq fails: %u", i); + return -1; + } + } + for (i = 0; i < dev->max_queue_pairs; ++i) { + queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX; + if (fn(dev, queue_sel) < 0) { + PMD_DRV_LOG(INFO, "setup tx vq fails: %u", i); + return -1; + } + } + + return 0; +} + +int +virtio_user_start_device(struct virtio_user_dev *dev) +{ + uint64_t features; + int ret; + + /* Step 0: tell vhost to create queues */ + if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0) + goto error; + + /* Step 1: set features */ + features = dev->features; + /* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */ + features &= ~(1ull << VIRTIO_NET_F_MAC); + /* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */ + features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features); + if (ret < 0) + goto error; + PMD_DRV_LOG(INFO, "set features: %" PRIx64, features); + + /* Step 2: share memory regions */ + ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL); + if (ret < 0) + goto error; + + /* Step 3: kick queues */ + if (virtio_user_queue_setup(dev, virtio_user_kick_queue) < 0) + goto error; + + /* Step 4: enable queues + * we enable the 1st queue pair by default. + */ + dev->ops->enable_qp(dev, 0, 1); + + return 0; +error: + /* TODO: free resource here or caller to check */ + return -1; +} + +int virtio_user_stop_device(struct virtio_user_dev *dev) +{ + uint32_t i; + + for (i = 0; i < dev->max_queue_pairs * 2; ++i) { + close(dev->callfds[i]); + close(dev->kickfds[i]); + } + + for (i = 0; i < dev->max_queue_pairs; ++i) + dev->ops->enable_qp(dev, i, 0); + + free(dev->ifname); + dev->ifname = NULL; + + return 0; +} + +static inline void +parse_mac(struct virtio_user_dev *dev, const char *mac) +{ + int i, r; + uint32_t tmp[ETHER_ADDR_LEN]; + + if (!mac) + return; + + r = sscanf(mac, "%x:%x:%x:%x:%x:%x", &tmp[0], + &tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5]); + if (r == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + dev->mac_addr[i] = (uint8_t)tmp[i]; + dev->mac_specified = 1; + } else { + /* ignore the wrong mac, use random mac */ + PMD_DRV_LOG(ERR, "wrong format of mac: %s", mac); + } +} + +static int +is_vhost_user_by_type(const char *path) +{ + struct stat sb; + + if (stat(path, &sb) == -1) + return 0; + + return S_ISSOCK(sb.st_mode); +} + +static int +virtio_user_dev_setup(struct virtio_user_dev *dev) +{ + uint32_t i, q; + + dev->vhostfd = -1; + for (i = 0; i < VIRTIO_MAX_VIRTQUEUES * 2 + 1; ++i) { + dev->kickfds[i] = -1; + dev->callfds[i] = -1; + } + + dev->vhostfds = NULL; + dev->tapfds = NULL; + + if (is_vhost_user_by_type(dev->path)) { + dev->ops = &ops_user; + } else { + dev->ops = &ops_kernel; + + dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int)); + dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int)); + if (!dev->vhostfds || !dev->tapfds) { + PMD_INIT_LOG(ERR, "Failed to malloc"); + return -1; + } + + for (q = 0; q < dev->max_queue_pairs; ++q) { + dev->vhostfds[q] = -1; + dev->tapfds[q] = -1; + } + } + + return dev->ops->setup(dev); +} + +int +virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, + int cq, int queue_size, const char *mac) +{ + snprintf(dev->path, PATH_MAX, "%s", path); + dev->max_queue_pairs = queues; + dev->queue_pairs = 1; /* mq disabled by default */ + dev->queue_size = queue_size; + dev->mac_specified = 0; + parse_mac(dev, mac); + + if (virtio_user_dev_setup(dev) < 0) { + PMD_INIT_LOG(ERR, "backend set up fails"); + return -1; + } + if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) { + PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno)); + return -1; + } + + if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES, + &dev->device_features) < 0) { + PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno)); + return -1; + } + if (dev->mac_specified) + dev->device_features |= (1ull << VIRTIO_NET_F_MAC); + + if (cq) { + /* device does not really need to know anything about CQ, + * so if necessary, we just claim to support CQ + */ + dev->device_features |= (1ull << VIRTIO_NET_F_CTRL_VQ); + } else { + dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + /* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */ + dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); + dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN); + dev->device_features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE); + dev->device_features &= ~(1ull << VIRTIO_NET_F_MQ); + dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); + } + + return 0; +} + +void +virtio_user_dev_uninit(struct virtio_user_dev *dev) +{ + uint32_t i; + + virtio_user_stop_device(dev); + + close(dev->vhostfd); + + if (dev->vhostfds) { + for (i = 0; i < dev->max_queue_pairs; ++i) + close(dev->vhostfds[i]); + free(dev->vhostfds); + free(dev->tapfds); + } +} + +static uint8_t +virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) +{ + uint16_t i; + uint8_t ret = 0; + + if (q_pairs > dev->max_queue_pairs) { + PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported", + q_pairs, dev->max_queue_pairs); + return -1; + } + + for (i = 0; i < q_pairs; ++i) + ret |= dev->ops->enable_qp(dev, i, 1); + for (i = q_pairs; i < dev->max_queue_pairs; ++i) + ret |= dev->ops->enable_qp(dev, i, 0); + + dev->queue_pairs = q_pairs; + + return ret; +} + +static uint32_t +virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring, + uint16_t idx_hdr) +{ + struct virtio_net_ctrl_hdr *hdr; + virtio_net_ctrl_ack status = ~0; + uint16_t i, idx_data, idx_status; + uint32_t n_descs = 0; + + /* locate desc for header, data, and status */ + idx_data = vring->desc[idx_hdr].next; + n_descs++; + + i = idx_data; + while (vring->desc[i].flags == VRING_DESC_F_NEXT) { + i = vring->desc[i].next; + n_descs++; + } + + /* locate desc for status */ + idx_status = i; + n_descs++; + + hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr; + if (hdr->class == VIRTIO_NET_CTRL_MQ && + hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { + uint16_t queues; + + queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr; + status = virtio_user_handle_mq(dev, queues); + } + + /* Update status */ + *(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status; + + return n_descs; +} + +void +virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) +{ + uint16_t avail_idx, desc_idx; + struct vring_used_elem *uep; + uint32_t n_descs; + struct vring *vring = &dev->vrings[queue_idx]; + + /* Consume avail ring, using used ring idx as first one */ + while (vring->used->idx != vring->avail->idx) { + avail_idx = (vring->used->idx) & (vring->num - 1); + desc_idx = vring->avail->ring[avail_idx]; + + n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx); + + /* Update used ring */ + uep = &vring->used->ring[avail_idx]; + uep->id = avail_idx; + uep->len = n_descs; + + vring->used->idx++; + } +} diff --git a/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h new file mode 100644 index 00000000..0d39f40c --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -0,0 +1,75 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_USER_DEV_H +#define _VIRTIO_USER_DEV_H + +#include +#include "../virtio_pci.h" +#include "../virtio_ring.h" +#include "vhost.h" + +struct virtio_user_dev { + /* for vhost_user backend */ + int vhostfd; + + /* for vhost_kernel backend */ + char *ifname; + int *vhostfds; + int *tapfds; + + /* for both vhost_user and vhost_kernel */ + int callfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + int kickfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + int mac_specified; + uint32_t max_queue_pairs; + uint32_t queue_pairs; + uint32_t queue_size; + uint64_t features; /* the negotiated features with driver, + * and will be sync with device + */ + uint64_t device_features; /* supported features by device */ + uint8_t status; + uint8_t mac_addr[ETHER_ADDR_LEN]; + char path[PATH_MAX]; + struct vring vrings[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + struct virtio_user_backend_ops *ops; +}; + +int virtio_user_start_device(struct virtio_user_dev *dev); +int virtio_user_stop_device(struct virtio_user_dev *dev); +int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, + int cq, int queue_size, const char *mac); +void virtio_user_dev_uninit(struct virtio_user_dev *dev); +void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx); +#endif diff --git a/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c b/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c index daef09bd..0b226ac7 100644 --- a/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c +++ b/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c @@ -37,6 +37,7 @@ #include #include +#include #include "virtio_ethdev.h" #include "virtio_logs.h" @@ -81,26 +82,29 @@ virtio_user_write_dev_config(struct virtio_hw *hw, size_t offset, for (i = 0; i < ETHER_ADDR_LEN; ++i) dev->mac_addr[i] = ((const uint8_t *)src)[i]; else - PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d\n", + PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d", offset, length); } static void -virtio_user_set_status(struct virtio_hw *hw, uint8_t status) +virtio_user_reset(struct virtio_hw *hw) { struct virtio_user_dev *dev = virtio_user_get_dev(hw); - if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK) - virtio_user_start_device(dev); - dev->status = status; + if (dev->status & VIRTIO_CONFIG_STATUS_DRIVER_OK) + virtio_user_stop_device(dev); } static void -virtio_user_reset(struct virtio_hw *hw) +virtio_user_set_status(struct virtio_hw *hw, uint8_t status) { struct virtio_user_dev *dev = virtio_user_get_dev(hw); - virtio_user_stop_device(dev); + if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK) + virtio_user_start_device(dev); + else if (status == VIRTIO_CONFIG_STATUS_RESET) + virtio_user_reset(hw); + dev->status = status; } static uint8_t @@ -116,7 +120,8 @@ virtio_user_get_features(struct virtio_hw *hw) { struct virtio_user_dev *dev = virtio_user_get_dev(hw); - return dev->features; + /* unmask feature bits defined in vhost user protocol */ + return dev->device_features & VIRTIO_PMD_SUPPORTED_GUEST_FEATURES; } static void @@ -124,7 +129,7 @@ virtio_user_set_features(struct virtio_hw *hw, uint64_t features) { struct virtio_user_dev *dev = virtio_user_get_dev(hw); - dev->features = features; + dev->features = features & dev->device_features; } static uint8_t @@ -207,11 +212,11 @@ virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) } if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) - PMD_DRV_LOG(ERR, "failed to kick backend: %s\n", + PMD_DRV_LOG(ERR, "failed to kick backend: %s", strerror(errno)); } -static const struct virtio_pci_ops virtio_user_ops = { +const struct virtio_pci_ops virtio_user_ops = { .read_dev_cfg = virtio_user_read_dev_config, .write_dev_cfg = virtio_user_write_dev_config, .reset = virtio_user_reset, @@ -269,6 +274,8 @@ get_integer_arg(const char *key __rte_unused, return 0; } +static struct rte_vdev_driver virtio_user_driver; + static struct rte_eth_dev * virtio_user_eth_dev_alloc(const char *name) { @@ -277,7 +284,7 @@ virtio_user_eth_dev_alloc(const char *name) struct virtio_hw *hw; struct virtio_user_dev *dev; - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (!eth_dev) { PMD_INIT_LOG(ERR, "cannot alloc rte_eth_dev"); return NULL; @@ -300,25 +307,38 @@ virtio_user_eth_dev_alloc(const char *name) return NULL; } - hw->vtpci_ops = &virtio_user_ops; + hw->port_id = data->port_id; + virtio_hw_internal[hw->port_id].vtpci_ops = &virtio_user_ops; hw->use_msix = 0; hw->modern = 0; + hw->use_simple_rxtx = 0; hw->virtio_user_dev = dev; data->dev_private = hw; + data->drv_name = virtio_user_driver.driver.name; data->numa_node = SOCKET_ID_ANY; data->kdrv = RTE_KDRV_NONE; data->dev_flags = RTE_ETH_DEV_DETACHABLE; - eth_dev->pci_dev = NULL; eth_dev->driver = NULL; return eth_dev; } +static void +virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev) +{ + struct rte_eth_dev_data *data = eth_dev->data; + struct virtio_hw *hw = data->dev_private; + + rte_free(hw->virtio_user_dev); + rte_free(hw); + rte_eth_dev_release_port(eth_dev); +} + /* Dev initialization routine. Invoked once for each virtio vdev at * EAL init time, see rte_eal_dev_init(). * Returns 0 on success. */ static int -virtio_user_pmd_devinit(const char *name, const char *params) +virtio_user_pmd_probe(const char *name, const char *params) { struct rte_kvargs *kvlist = NULL; struct rte_eth_dev *eth_dev; @@ -343,23 +363,21 @@ virtio_user_pmd_devinit(const char *name, const char *params) } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PATH) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH, - &get_string_arg, &path); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH, + &get_string_arg, &path) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_PATH); goto end; } } else { - PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user\n", + PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user", VIRTIO_USER_ARG_QUEUE_SIZE); goto end; } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC, - &get_string_arg, &mac_addr); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC, + &get_string_arg, &mac_addr) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_MAC); goto end; @@ -367,9 +385,8 @@ virtio_user_pmd_devinit(const char *name, const char *params) } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE, - &get_integer_arg, &queue_size); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE, + &get_integer_arg, &queue_size) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_QUEUE_SIZE); goto end; @@ -377,9 +394,8 @@ virtio_user_pmd_devinit(const char *name, const char *params) } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUES_NUM) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM, - &get_integer_arg, &queues); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM, + &get_integer_arg, &queues) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_QUEUES_NUM); goto end; @@ -387,9 +403,8 @@ virtio_user_pmd_devinit(const char *name, const char *params) } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, - &get_integer_arg, &cq); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, + &get_integer_arg, &cq) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_CQ_NUM); goto end; @@ -411,12 +426,16 @@ virtio_user_pmd_devinit(const char *name, const char *params) hw = eth_dev->data->dev_private; if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq, - queue_size, mac_addr) < 0) + queue_size, mac_addr) < 0) { + PMD_INIT_LOG(ERR, "virtio_user_dev_init fails"); + virtio_user_eth_dev_free(eth_dev); goto end; + } /* previously called by rte_eal_pci_probe() for physical dev */ if (eth_virtio_dev_init(eth_dev) < 0) { PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails"); + virtio_user_eth_dev_free(eth_dev); goto end; } ret = 0; @@ -433,7 +452,7 @@ end: /** Called by rte_eth_dev_detach() */ static int -virtio_user_pmd_devuninit(const char *name) +virtio_user_pmd_remove(const char *name) { struct rte_eth_dev *eth_dev; struct virtio_hw *hw; @@ -442,7 +461,7 @@ virtio_user_pmd_devuninit(const char *name) if (!name) return -EINVAL; - PMD_DRV_LOG(INFO, "Un-Initializing %s\n", name); + PMD_DRV_LOG(INFO, "Un-Initializing %s", name); eth_dev = rte_eth_dev_allocated(name); if (!eth_dev) return -ENODEV; @@ -461,14 +480,14 @@ virtio_user_pmd_devuninit(const char *name) return 0; } -static struct rte_driver virtio_user_driver = { - .type = PMD_VDEV, - .init = virtio_user_pmd_devinit, - .uninit = virtio_user_pmd_devuninit, +static struct rte_vdev_driver virtio_user_driver = { + .probe = virtio_user_pmd_probe, + .remove = virtio_user_pmd_remove, }; -PMD_REGISTER_DRIVER(virtio_user_driver, virtio_user); -DRIVER_REGISTER_PARAM_STRING(virtio_user, +RTE_PMD_REGISTER_VDEV(net_virtio_user, virtio_user_driver); +RTE_PMD_REGISTER_ALIAS(net_virtio_user, virtio_user); +RTE_PMD_REGISTER_PARAM_STRING(net_virtio_user, "path= " "mac= " "cq= " diff --git a/src/dpdk/drivers/net/virtio/virtqueue.c b/src/dpdk/drivers/net/virtio/virtqueue.c index 7f60e3ef..9ad77b8a 100644 --- a/src/dpdk/drivers/net/virtio/virtqueue.c +++ b/src/dpdk/drivers/net/virtio/virtqueue.c @@ -38,17 +38,6 @@ #include "virtio_logs.h" #include "virtio_pci.h" -void -virtqueue_disable_intr(struct virtqueue *vq) -{ - /* - * Set VRING_AVAIL_F_NO_INTERRUPT to hint host - * not to interrupt when it consumes packets - * Note: this is only considered a hint to the host - */ - vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; -} - /* * Two types of mbuf to be cleaned: * 1) mbuf that has been consumed by backend but not used by virtio. diff --git a/src/dpdk/drivers/net/virtio/virtqueue.h b/src/dpdk/drivers/net/virtio/virtqueue.h index 6737b81d..f9e37367 100644 --- a/src/dpdk/drivers/net/virtio/virtqueue.h +++ b/src/dpdk/drivers/net/virtio/virtqueue.h @@ -44,6 +44,7 @@ #include "virtio_pci.h" #include "virtio_ring.h" #include "virtio_logs.h" +#include "virtio_rxtx.h" struct rte_mbuf; @@ -191,6 +192,12 @@ struct virtqueue { void *vq_ring_virt_mem; /**< linear address of vring*/ unsigned int vq_ring_size; + union { + struct virtnet_rx rxq; + struct virtnet_tx txq; + struct virtnet_ctl cq; + }; + phys_addr_t vq_ring_mem; /**< physical address of vring, * or virtual address for virtio_user. */ @@ -204,7 +211,6 @@ struct virtqueue { uint16_t vq_queue_index; /**< PCI queue index */ uint16_t offset; /**< relative offset to obtain addr in mbuf */ uint16_t *notify_addr; - int configured; struct rte_mbuf **sw_ring; /**< RX software ring. */ struct vq_desc_extra vq_descx[0]; }; @@ -223,6 +229,7 @@ struct virtqueue { */ struct virtio_net_hdr { #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/ +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /**< Checksum is valid */ uint8_t flags; #define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */ @@ -267,7 +274,21 @@ vring_desc_init(struct vring_desc *dp, uint16_t n) /** * Tell the backend not to interrupt us. */ -void virtqueue_disable_intr(struct virtqueue *vq); +static inline void +virtqueue_disable_intr(struct virtqueue *vq) +{ + vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; +} + +/** + * Tell the backend to interrupt us. + */ +static inline void +virtqueue_enable_intr(struct virtqueue *vq) +{ + vq->vq_ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT); +} + /** * Dump virtqueue internal structures, for debug purpose only. */ @@ -323,7 +344,7 @@ virtqueue_notify(struct virtqueue *vq) * For virtio on IA, the notificaiton is through io port operation * which is a serialization instruction itself. */ - vq->hw->vtpci_ops->notify_queue(vq->hw, vq); + VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq); } #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP diff --git a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c index 47fdc3ec..ff63a536 100644 --- a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -69,6 +69,8 @@ #define PROCESS_SYS_EVENTS 0 +#define VMXNET3_TX_MAX_SEG UINT8_MAX + static int eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev); static int eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev); static int vmxnet3_dev_configure(struct rte_eth_dev *dev); @@ -81,11 +83,11 @@ static void vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev); static void vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev); static void vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev); static int vmxnet3_dev_link_update(struct rte_eth_dev *dev, - int wait_to_complete); + int wait_to_complete); static void vmxnet3_dev_stats_get(struct rte_eth_dev *dev, - struct rte_eth_stats *stats); + struct rte_eth_stats *stats); static void vmxnet3_dev_info_get(struct rte_eth_dev *dev, - struct rte_eth_dev_info *dev_info); + struct rte_eth_dev_info *dev_info); static const uint32_t * vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev); static int vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev, @@ -118,7 +120,7 @@ static const struct eth_dev_ops vmxnet3_eth_dev_ops = { .allmulticast_disable = vmxnet3_dev_allmulticast_disable, .link_update = vmxnet3_dev_link_update, .stats_get = vmxnet3_dev_stats_get, - .mac_addr_set = vmxnet3_mac_addr_set, + .mac_addr_set = vmxnet3_mac_addr_set, .dev_infos_get = vmxnet3_dev_info_get, .dev_supported_ptypes_get = vmxnet3_dev_supported_ptypes_get, .vlan_filter_set = vmxnet3_dev_vlan_filter_set, @@ -131,20 +133,27 @@ static const struct eth_dev_ops vmxnet3_eth_dev_ops = { static const struct rte_memzone * gpa_zone_reserve(struct rte_eth_dev *dev, uint32_t size, - const char *post_string, int socket_id, uint16_t align) + const char *post_string, int socket_id, + uint16_t align, bool reuse) { char z_name[RTE_MEMZONE_NAMESIZE]; const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%d_%s", - dev->driver->pci_drv.name, dev->data->port_id, post_string); + dev->data->drv_name, dev->data->port_id, post_string); mz = rte_memzone_lookup(z_name); + if (!reuse) { + if (mz) + rte_memzone_free(mz); + return rte_memzone_reserve_aligned(z_name, size, socket_id, + 0, align); + } + if (mz) return mz; - return rte_memzone_reserve_aligned(z_name, size, - socket_id, 0, align); + return rte_memzone_reserve_aligned(z_name, size, socket_id, 0, align); } /** @@ -194,7 +203,7 @@ vmxnet3_dev_atomic_write_link_status(struct rte_eth_dev *dev, struct rte_eth_link *src = link; if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, - *(uint64_t *)src) == 0) + *(uint64_t *)src) == 0) return -1; return 0; @@ -212,7 +221,7 @@ vmxnet3_disable_intr(struct vmxnet3_hw *hw) hw->shared->devRead.intrConf.intrCtrl |= VMXNET3_IC_DISABLE_ALL; for (i = 0; i < VMXNET3_MAX_INTRS; i++) - VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 1); + VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 1); } /* @@ -230,7 +239,8 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &vmxnet3_eth_dev_ops; eth_dev->rx_pkt_burst = &vmxnet3_recv_pkts; eth_dev->tx_pkt_burst = &vmxnet3_xmit_pkts; - pci_dev = eth_dev->pci_dev; + eth_dev->tx_pkt_prepare = vmxnet3_prep_pkts; + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); /* * for secondary processes, we don't initialize any further as primary @@ -240,6 +250,7 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev) return 0; rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; /* Vendor and Device ID need to be set before init of shared code */ hw->device_id = pci_dev->id.device_id; @@ -274,8 +285,8 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev) /* Getting MAC Address */ mac_lo = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACL); mac_hi = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACH); - memcpy(hw->perm_addr , &mac_lo, 4); - memcpy(hw->perm_addr+4, &mac_hi, 2); + memcpy(hw->perm_addr, &mac_lo, 4); + memcpy(hw->perm_addr + 4, &mac_hi, 2); /* Allocate memory for storing MAC addresses */ eth_dev->data->mac_addrs = rte_zmalloc("vmxnet3", ETHER_ADDR_LEN * @@ -319,6 +330,7 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = NULL; eth_dev->rx_pkt_burst = NULL; eth_dev->tx_pkt_burst = NULL; + eth_dev->tx_pkt_prepare = NULL; rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; @@ -328,29 +340,16 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev) static struct eth_driver rte_vmxnet3_pmd = { .pci_drv = { - .name = "rte_vmxnet3_pmd", .id_table = pci_id_vmxnet3_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_vmxnet3_dev_init, .eth_dev_uninit = eth_vmxnet3_dev_uninit, .dev_private_size = sizeof(struct vmxnet3_hw), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of Virtual PCI VMXNET3 devices. - */ -static int -rte_vmxnet3_pmd_init(const char *name __rte_unused, const char *param __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_vmxnet3_pmd); - return 0; -} - static int vmxnet3_dev_configure(struct rte_eth_dev *dev) { @@ -360,9 +359,16 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); - if (dev->data->nb_rx_queues > UINT8_MAX || - dev->data->nb_tx_queues > UINT8_MAX) + if (dev->data->nb_tx_queues > VMXNET3_MAX_TX_QUEUES || + dev->data->nb_rx_queues > VMXNET3_MAX_RX_QUEUES) { + PMD_INIT_LOG(ERR, "ERROR: Number of queues not supported"); return -EINVAL; + } + + if (!rte_is_power_of_2(dev->data->nb_rx_queues)) { + PMD_INIT_LOG(ERR, "ERROR: Number of rx queues not power of 2"); + return -EINVAL; + } size = dev->data->nb_rx_queues * sizeof(struct Vmxnet3_TxQueueDesc) + dev->data->nb_tx_queues * sizeof(struct Vmxnet3_RxQueueDesc); @@ -378,7 +384,7 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev) * on current socket */ mz = gpa_zone_reserve(dev, sizeof(struct Vmxnet3_DriverShared), - "shared", rte_socket_id(), 8); + "shared", rte_socket_id(), 8, 1); if (mz == NULL) { PMD_INIT_LOG(ERR, "ERROR: Creating shared zone"); @@ -391,10 +397,14 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev) /* * Allocate a memzone for Vmxnet3_RxQueueDesc - Vmxnet3_TxQueueDesc - * on current socket + * on current socket. + * + * We cannot reuse this memzone from previous allocation as its size + * depends on the number of tx and rx queues, which could be different + * from one config to another. */ - mz = gpa_zone_reserve(dev, size, "queuedesc", - rte_socket_id(), VMXNET3_QUEUE_DESC_ALIGN); + mz = gpa_zone_reserve(dev, size, "queuedesc", rte_socket_id(), + VMXNET3_QUEUE_DESC_ALIGN, 0); if (mz == NULL) { PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); return -ENOMEM; @@ -408,10 +418,10 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev) hw->queue_desc_len = (uint16_t)size; if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) { - /* Allocate memory structure for UPT1_RSSConf and configure */ - mz = gpa_zone_reserve(dev, sizeof(struct VMXNET3_RSSConf), "rss_conf", - rte_socket_id(), RTE_CACHE_LINE_SIZE); + mz = gpa_zone_reserve(dev, sizeof(struct VMXNET3_RSSConf), + "rss_conf", rte_socket_id(), + RTE_CACHE_LINE_SIZE, 1); if (mz == NULL) { PMD_INIT_LOG(ERR, "ERROR: Creating rss_conf structure zone"); @@ -459,8 +469,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev) /* Setting up Guest OS information */ devRead->misc.driverInfo.gos.gosBits = sizeof(void *) == 4 ? - VMXNET3_GOS_BITS_32 : - VMXNET3_GOS_BITS_64; + VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64; devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX; devRead->misc.driverInfo.vmxnet3RevSpt = 1; devRead->misc.driverInfo.uptVerSpt = 1; @@ -523,6 +532,11 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev) if (dev->data->dev_conf.rxmode.hw_ip_checksum) devRead->misc.uptFeatures |= VMXNET3_F_RXCSUM; + if (dev->data->dev_conf.rxmode.enable_lro) { + devRead->misc.uptFeatures |= VMXNET3_F_LRO; + devRead->misc.maxNumRxSG = 0; + } + if (port_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) { ret = vmxnet3_rss_configure(dev); if (ret != VMXNET3_SUCCESS) @@ -535,7 +549,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev) } vmxnet3_dev_vlan_offload_set(dev, - ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK); + ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK); vmxnet3_write_mac(hw, hw->perm_addr); @@ -550,7 +564,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev) static int vmxnet3_dev_start(struct rte_eth_dev *dev) { - int status, ret; + int ret; struct vmxnet3_hw *hw = dev->data->dev_private; PMD_INIT_FUNC_TRACE(); @@ -567,11 +581,11 @@ vmxnet3_dev_start(struct rte_eth_dev *dev) /* Activate device by register write */ VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_ACTIVATE_DEV); - status = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); + ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); - if (status != 0) { + if (ret != 0) { PMD_INIT_LOG(ERR, "Device activation: UNSUCCESSFUL"); - return -1; + return -EINVAL; } /* Disable interrupts */ @@ -583,7 +597,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev) */ ret = vmxnet3_dev_rxtx_init(dev); if (ret != VMXNET3_SUCCESS) { - PMD_INIT_LOG(ERR, "Device receive init: UNSUCCESSFUL"); + PMD_INIT_LOG(ERR, "Device queue init: UNSUCCESSFUL"); return ret; } @@ -598,7 +612,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Reading events: 0x%X", events); vmxnet3_process_events(hw); #endif - return status; + return VMXNET3_SUCCESS; } /* @@ -664,16 +678,15 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) struct UPT1_TxStats *txStats = &hw->tqd_start[i].stats; stats->q_opackets[i] = txStats->ucastPktsTxOK + - txStats->mcastPktsTxOK + - txStats->bcastPktsTxOK; + txStats->mcastPktsTxOK + + txStats->bcastPktsTxOK; stats->q_obytes[i] = txStats->ucastBytesTxOK + - txStats->mcastBytesTxOK + - txStats->bcastBytesTxOK; + txStats->mcastBytesTxOK + + txStats->bcastBytesTxOK; stats->opackets += stats->q_opackets[i]; stats->obytes += stats->q_obytes[i]; - stats->oerrors += txStats->pktsTxError + - txStats->pktsTxDiscard; + stats->oerrors += txStats->pktsTxError + txStats->pktsTxDiscard; } RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_RX_QUEUES); @@ -681,12 +694,12 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) struct UPT1_RxStats *rxStats = &hw->rqd_start[i].stats; stats->q_ipackets[i] = rxStats->ucastPktsRxOK + - rxStats->mcastPktsRxOK + - rxStats->bcastPktsRxOK; + rxStats->mcastPktsRxOK + + rxStats->bcastPktsRxOK; stats->q_ibytes[i] = rxStats->ucastBytesRxOK + - rxStats->mcastBytesRxOK + - rxStats->bcastBytesRxOK; + rxStats->mcastBytesRxOK + + rxStats->bcastBytesRxOK; stats->ipackets += stats->q_ipackets[i]; stats->ibytes += stats->q_ibytes[i]; @@ -698,16 +711,17 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) } static void -vmxnet3_dev_info_get(__attribute__((unused))struct rte_eth_dev *dev, +vmxnet3_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); + dev_info->max_rx_queues = VMXNET3_MAX_RX_QUEUES; dev_info->max_tx_queues = VMXNET3_MAX_TX_QUEUES; dev_info->min_rx_bufsize = 1518 + RTE_PKTMBUF_HEADROOM; dev_info->max_rx_pktlen = 16384; /* includes CRC, cf MAXFRS register */ - dev_info->max_mac_addrs = VMXNET3_MAX_MAC_ADDRS; - /* TRex patch */ dev_info->speed_capa = ETH_LINK_SPEED_10G; + dev_info->max_mac_addrs = VMXNET3_MAX_MAC_ADDRS; dev_info->default_txconf.txq_flags = ETH_TXQ_FLAGS_NOXSUMSCTP; dev_info->flow_type_rss_offloads = VMXNET3_RSS_OFFLOAD_ALL; @@ -722,12 +736,15 @@ vmxnet3_dev_info_get(__attribute__((unused))struct rte_eth_dev *dev, .nb_max = VMXNET3_TX_RING_MAX_SIZE, .nb_min = VMXNET3_DEF_TX_RING_SIZE, .nb_align = 1, + .nb_seg_max = VMXNET3_TX_MAX_SEG, + .nb_mtu_seg_max = VMXNET3_MAX_TXD_PER_PKT, }; dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP | DEV_RX_OFFLOAD_UDP_CKSUM | - DEV_RX_OFFLOAD_TCP_CKSUM; + DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_TCP_LRO; dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT | @@ -760,14 +777,16 @@ vmxnet3_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) /* return 0 means link status changed, -1 means not changed */ static int -vmxnet3_dev_link_update(struct rte_eth_dev *dev, __attribute__((unused)) int wait_to_complete) +vmxnet3_dev_link_update(struct rte_eth_dev *dev, + __rte_unused int wait_to_complete) { struct vmxnet3_hw *hw = dev->data->dev_private; struct rte_eth_link old, link; uint32_t ret; + /* Link status doesn't change for stopped dev */ if (dev->data->dev_started == 0) - return -1; /* Link status doesn't change for stopped dev */ + return -1; memset(&link, 0, sizeof(link)); vmxnet3_dev_atomic_read_link_status(dev, &old); @@ -789,8 +808,8 @@ vmxnet3_dev_link_update(struct rte_eth_dev *dev, __attribute__((unused)) int wai /* Updating rxmode through Vmxnet3_DriverShared structure in adapter */ static void -vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set) { - +vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set) +{ struct Vmxnet3_RxFilterConf *rxConf = &hw->shared->devRead.rxFilterConf; if (set) @@ -923,11 +942,13 @@ vmxnet3_process_events(struct vmxnet3_hw *hw) /* Check if link state has changed */ if (events & VMXNET3_ECR_LINK) PMD_INIT_LOG(ERR, - "Process events in %s(): VMXNET3_ECR_LINK event", __func__); + "Process events in %s(): VMXNET3_ECR_LINK event", + __func__); /* Check if there is an error on xmit/recv queues */ if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) { - VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_QUEUE_STATUS); + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_QUEUE_STATUS); if (hw->tqd_start->status.stopped) PMD_INIT_LOG(ERR, "tq error 0x%x", @@ -946,14 +967,9 @@ vmxnet3_process_events(struct vmxnet3_hw *hw) if (events & VMXNET3_ECR_DEBUG) PMD_INIT_LOG(ERR, "Debug event generated by device."); - } #endif -static struct rte_driver rte_vmxnet3_driver = { - .type = PMD_PDEV, - .init = rte_vmxnet3_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_vmxnet3_driver, vmxnet3); -DRIVER_REGISTER_PCI_TABLE(vmxnet3, pci_id_vmxnet3_map); +RTE_PMD_REGISTER_PCI(net_vmxnet3, rte_vmxnet3_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_vmxnet3, pci_id_vmxnet3_map); +RTE_PMD_REGISTER_KMOD_DEP(net_vmxnet3, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h index 1be833ab..348c840b 100644 --- a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h +++ b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h @@ -34,6 +34,8 @@ #ifndef _VMXNET3_ETHDEV_H_ #define _VMXNET3_ETHDEV_H_ +#include + #define VMXNET3_MAX_MAC_ADDRS 1 /* UPT feature to negotiate */ @@ -62,8 +64,7 @@ ETH_RSS_NONFRAG_IPV6_TCP) /* RSS configuration structure - shared with device through GPA */ -typedef -struct VMXNET3_RSSConf { +typedef struct VMXNET3_RSSConf { uint16_t hashType; uint16_t hashFunc; uint16_t hashKeySize; @@ -76,15 +77,13 @@ struct VMXNET3_RSSConf { uint8_t indTable[VMXNET3_RSS_MAX_IND_TABLE_SIZE]; } VMXNET3_RSSConf; -typedef -struct vmxnet3_mf_table { +typedef struct vmxnet3_mf_table { void *mfTableBase; /* Multicast addresses list */ uint64_t mfTablePA; /* Physical address of the list */ uint16_t num_addrs; /* number of multicast addrs */ } vmxnet3_mf_table_t; struct vmxnet3_hw { - uint8_t *hw_addr0; /* BAR0: PT-Passthrough Regs */ uint8_t *hw_addr1; /* BAR1: VD-Virtual Device Regs */ /* BAR2: MSI-X Regs */ @@ -111,10 +110,10 @@ struct vmxnet3_hw { uint64_t queueDescPA; uint16_t queue_desc_len; - VMXNET3_RSSConf *rss_conf; - uint64_t rss_confPA; - vmxnet3_mf_table_t *mf_table; - uint32_t shadow_vfta[VMXNET3_VFT_SIZE]; + VMXNET3_RSSConf *rss_conf; + uint64_t rss_confPA; + vmxnet3_mf_table_t *mf_table; + uint32_t shadow_vfta[VMXNET3_VFT_SIZE]; #define VMXNET3_VFT_TABLE_SIZE (VMXNET3_VFT_SIZE * sizeof(uint32_t)) }; @@ -123,16 +122,15 @@ struct vmxnet3_hw { /* Config space read/writes */ -#define VMXNET3_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define VMXNET3_PCI_REG(reg) rte_read32(reg) -static inline uint32_t vmxnet3_read_addr(volatile void *addr) +static inline uint32_t +vmxnet3_read_addr(volatile void *addr) { return VMXNET3_PCI_REG(addr); } -#define VMXNET3_PCI_REG_WRITE(reg, value) do { \ - VMXNET3_PCI_REG((reg)) = (value); \ -} while(0) +#define VMXNET3_PCI_REG_WRITE(reg, value) rte_write32((value), (reg)) #define VMXNET3_PCI_BAR0_REG_ADDR(hw, reg) \ ((volatile uint32_t *)((char *)(hw)->hw_addr0 + (reg))) @@ -158,20 +156,22 @@ void vmxnet3_dev_rx_queue_release(void *rxq); void vmxnet3_dev_tx_queue_release(void *txq); int vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, - uint16_t nb_rx_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mb_pool); + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); int vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, - uint16_t nb_tx_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf); + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); int vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev); int vmxnet3_rss_configure(struct rte_eth_dev *dev); uint16_t vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts); + uint16_t nb_pkts); uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts); + uint16_t nb_pkts); +uint16_t vmxnet3_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); #endif /* _VMXNET3_ETHDEV_H_ */ diff --git a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h index 69ff2ded..b50d2b00 100644 --- a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h +++ b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h @@ -96,12 +96,12 @@ vmxnet3_cmd_ring_desc_empty(struct vmxnet3_cmd_ring *ring) } typedef struct vmxnet3_comp_ring { - uint32_t size; - uint32_t next2proc; - uint8_t gen; - uint8_t intr_idx; + uint32_t size; + uint32_t next2proc; + uint8_t gen; + uint8_t intr_idx; Vmxnet3_GenericDesc *base; - uint64_t basePA; + uint64_t basePA; } vmxnet3_comp_ring_t; struct vmxnet3_data_ring { @@ -121,13 +121,13 @@ vmxnet3_comp_ring_adv_next2proc(struct vmxnet3_comp_ring *ring) } struct vmxnet3_txq_stats { - uint64_t drop_total; /* # of pkts dropped by the driver, + uint64_t drop_total; /* # of pkts dropped by the driver, * the counters below track droppings due to * different reasons */ - uint64_t drop_too_many_segs; - uint64_t drop_tso; - uint64_t tx_ring_full; + uint64_t drop_too_many_segs; + uint64_t drop_tso; + uint64_t tx_ring_full; }; typedef struct vmxnet3_tx_queue { @@ -158,8 +158,8 @@ typedef struct vmxnet3_rx_queue { uint32_t qid1; uint32_t qid2; Vmxnet3_RxQueueDesc *shared; - struct rte_mbuf *start_seg; - struct rte_mbuf *last_seg; + struct rte_mbuf *start_seg; + struct rte_mbuf *last_seg; struct vmxnet3_rxq_stats stats; bool stopped; uint16_t queue_id; /**< Device RX queue index. */ diff --git a/src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c b/src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c index 9deeb3ff..b246884b 100644 --- a/src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c +++ b/src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include #include @@ -70,6 +69,7 @@ #include #include #include +#include #include "base/vmxnet3_defs.h" #include "vmxnet3_ring.h" @@ -77,6 +77,14 @@ #include "vmxnet3_logs.h" #include "vmxnet3_ethdev.h" +#define VMXNET3_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK) + static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); @@ -141,10 +149,10 @@ vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq) #endif static void -vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) +vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) { while (ring->next2comp != ring->next2fill) { - /* No need to worry about tx desc ownership, device is quiesced by now. */ + /* No need to worry about desc ownership, device is quiesced by now. */ vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp; if (buf_info->m) { @@ -157,21 +165,40 @@ vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) } } +static void +vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) +{ + uint32_t i; + + for (i = 0; i < ring->size; i++) { + /* No need to worry about desc ownership, device is quiesced by now. */ + vmxnet3_buf_info_t *buf_info = &ring->buf_info[i]; + + if (buf_info->m) { + rte_pktmbuf_free_seg(buf_info->m); + buf_info->m = NULL; + buf_info->bufPA = 0; + buf_info->len = 0; + } + vmxnet3_cmd_ring_adv_next2comp(ring); + } +} + static void vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring) { - vmxnet3_cmd_ring_release_mbufs(ring); rte_free(ring->buf_info); ring->buf_info = NULL; } - void vmxnet3_dev_tx_queue_release(void *txq) { vmxnet3_tx_queue_t *tq = txq; if (tq != NULL) { + /* Release mbufs */ + vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); /* Release the cmd_ring */ vmxnet3_cmd_ring_release(&tq->cmd_ring); } @@ -184,6 +211,10 @@ vmxnet3_dev_rx_queue_release(void *rxq) vmxnet3_rx_queue_t *rq = rxq; if (rq != NULL) { + /* Release mbufs */ + for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) + vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); + /* Release both the cmd_rings */ for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) vmxnet3_cmd_ring_release(&rq->cmd_ring[i]); @@ -201,7 +232,7 @@ vmxnet3_dev_tx_queue_reset(void *txq) if (tq != NULL) { /* Release the cmd_ring mbufs */ - vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring); + vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); } /* Tx vmxnet rings structure initialization*/ @@ -230,7 +261,7 @@ vmxnet3_dev_rx_queue_reset(void *rxq) if (rq != NULL) { /* Release both the cmd_rings mbufs */ for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) - vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]); + vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); } ring0 = &rq->cmd_ring[0]; @@ -327,6 +358,53 @@ vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq) PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed); } +uint16_t +vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int32_t ret; + uint32_t i; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /* Non-TSO packet cannot occupy more than + * VMXNET3_MAX_TXD_PER_PKT TX descriptors. + */ + if ((ol_flags & PKT_TX_TCP_SEG) == 0 && + m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) { + rte_errno = -EINVAL; + return i; + } + + /* check that only supported TX offloads are requested. */ + if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 || + (ol_flags & PKT_TX_L4_MASK) == + PKT_TX_SCTP_CKSUM) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) @@ -392,7 +470,8 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, continue; } - if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) { + if (txm->nb_segs == 1 && + rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) { struct Vmxnet3_TxDataDesc *tdd; tdd = txq->data_ring.base + txq->cmd_ring.next2fill; @@ -414,8 +493,8 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; if (copy_size) gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA + - txq->cmd_ring.next2fill * - sizeof(struct Vmxnet3_TxDataDesc)); + txq->cmd_ring.next2fill * + sizeof(struct Vmxnet3_TxDataDesc)); else gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg); @@ -495,16 +574,41 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return nb_tx; } +static inline void +vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id, + struct rte_mbuf *mbuf) +{ + uint32_t val = 0; + struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; + struct Vmxnet3_RxDesc *rxd = + (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill); + vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill]; + + if (ring_id == 0) + val = VMXNET3_RXD_BTYPE_HEAD; + else + val = VMXNET3_RXD_BTYPE_BODY; + + buf_info->m = mbuf; + buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM); + buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf); + + rxd->addr = buf_info->bufPA; + rxd->btype = val; + rxd->len = buf_info->len; + rxd->gen = ring->gen; + + vmxnet3_cmd_ring_adv_next2fill(ring); +} /* * Allocates mbufs and clusters. Post rx descriptors with buffer details * so that device can receive packets in those buffers. - * Ring layout: - * Among the two rings, 1st ring contains buffers of type 0 and type1. + * Ring layout: + * Among the two rings, 1st ring contains buffers of type 0 and type 1. * bufs_per_pkt is set such that for non-LRO cases all the buffers required * by a frame will fit in 1st ring (1st buf of type0 and rest of type1). * 2nd ring contains buffers of type 1 alone. Second ring mostly be used * only for LRO. - * */ static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) @@ -549,8 +653,7 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) buf_info->m = mbuf; buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM); - buf_info->bufPA = - rte_mbuf_data_dma_addr_default(mbuf); + buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf); /* Load Rx Descriptor with the buffer's GPA */ rxd->addr = buf_info->bufPA; @@ -636,9 +739,18 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) } while (rcd->gen == rxq->comp_ring.gen) { + struct rte_mbuf *newm; + if (nb_rx >= nb_pkts) break; + newm = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(newm == NULL)) { + PMD_RX_LOG(ERR, "Error allocating mbuf"); + rxq->stats.rx_buf_alloc_failure++; + break; + } + idx = rcd->rxdIdx; ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1); rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx; @@ -676,7 +788,6 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) goto rcd_done; } - /* Initialize newly received packet buffer */ rxm->port = rxq->port_id; rxm->nb_segs = 1; @@ -736,10 +847,11 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rcd_done: rxq->cmd_ring[ring_idx].next2comp = idx; - VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size); + VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, + rxq->cmd_ring[ring_idx].size); - /* It's time to allocate some new buf and renew descriptors */ - vmxnet3_post_rx_bufs(rxq, ring_idx); + /* It's time to renew descriptors */ + vmxnet3_renew_desc(rxq, ring_idx, newm); if (unlikely(rxq->shared->ctrl.updateRxProd)) { VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN), rxq->cmd_ring[ring_idx].next2fill); @@ -751,8 +863,7 @@ rcd_done: rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; nb_rxd++; if (nb_rxd > rxq->cmd_ring[0].size) { - PMD_RX_LOG(ERR, - "Used up quota of receiving packets," + PMD_RX_LOG(ERR, "Used up quota of receiving packets," " relinquish control."); break; } @@ -774,15 +885,15 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->driver->pci_drv.name, ring_name, - dev->data->port_id, queue_id); + dev->driver->pci_drv.driver.name, ring_name, + dev->data->port_id, queue_id); mz = rte_memzone_lookup(z_name); if (mz) return mz; return rte_memzone_reserve_aligned(z_name, ring_size, - socket_id, 0, VMXNET3_RING_BA_ALIGN); + socket_id, 0, VMXNET3_RING_BA_ALIGN); } int @@ -790,7 +901,7 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, - __attribute__((unused)) const struct rte_eth_txconf *tx_conf) + __rte_unused const struct rte_eth_txconf *tx_conf) { struct vmxnet3_hw *hw = dev->data->dev_private; const struct rte_memzone *mz; @@ -808,7 +919,8 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } - txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE); + txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), + RTE_CACHE_LINE_SIZE); if (txq == NULL) { PMD_INIT_LOG(ERR, "Can not allocate tx queue structure"); return -ENOMEM; @@ -891,12 +1003,12 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, - __attribute__((unused)) const struct rte_eth_rxconf *rx_conf, + __rte_unused const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp) { const struct rte_memzone *mz; struct vmxnet3_rx_queue *rxq; - struct vmxnet3_hw *hw = dev->data->dev_private; + struct vmxnet3_hw *hw = dev->data->dev_private; struct vmxnet3_cmd_ring *ring0, *ring1, *ring; struct vmxnet3_comp_ring *comp_ring; int size; @@ -905,7 +1017,8 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, PMD_INIT_FUNC_TRACE(); - rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE); + rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), + RTE_CACHE_LINE_SIZE); if (rxq == NULL) { PMD_INIT_LOG(ERR, "Can not allocate rx queue structure"); return -ENOMEM; @@ -979,7 +1092,9 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, ring->rid = i; snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i); - ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE); + ring->buf_info = rte_zmalloc(mem_name, + ring->size * sizeof(vmxnet3_buf_info_t), + RTE_CACHE_LINE_SIZE); if (ring->buf_info == NULL) { PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure"); return -ENOMEM; @@ -1013,10 +1128,15 @@ vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev) /* Passing 0 as alloc_num will allocate full ring */ ret = vmxnet3_post_rx_bufs(rxq, j); if (ret <= 0) { - PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j); + PMD_INIT_LOG(ERR, + "ERROR: Posting Rxq: %d buffers ring: %d", + i, j); return -ret; } - /* Updating device with the index:next2fill to fill the mbufs for coming packets */ + /* + * Updating device with the index:next2fill to fill the + * mbufs for coming packets. + */ if (unlikely(rxq->shared->ctrl.updateRxProd)) { VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN), rxq->cmd_ring[j].next2fill); @@ -1064,7 +1184,7 @@ vmxnet3_rss_configure(struct rte_eth_dev *dev) dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ; /* loading hashKeySize */ dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE; - /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/ + /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/ dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4); if (port_rss_conf->rss_key == NULL) { @@ -1073,7 +1193,8 @@ vmxnet3_rss_configure(struct rte_eth_dev *dev) } /* loading hashKey */ - memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize); + memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, + dev_rss_conf->hashKeySize); /* loading indTable */ for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) { diff --git a/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c b/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c index 99f6cc81..19bc09a3 100644 --- a/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c +++ b/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include #include @@ -70,8 +70,6 @@ /* virtio_idx is increased after new device is created.*/ static int virtio_idx = 0; -static const char *drivername = "xen virtio PMD"; - static struct rte_eth_link pmd_link = { .link_speed = ETH_SPEED_NUM_10G, .link_duplex = ETH_LINK_FULL_DUPLEX, @@ -331,13 +329,11 @@ eth_dev_info(struct rte_eth_dev *dev, struct pmd_internals *internals = dev->data->dev_private; RTE_SET_USED(internals); - dev_info->driver_name = drivername; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)2048; dev_info->max_rx_queues = (uint16_t)1; dev_info->max_tx_queues = (uint16_t)1; dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; } static void @@ -620,6 +616,7 @@ enum dev_action { DEV_ATTACH }; +static struct rte_vdev_driver pmd_xenvirt_drv; static int eth_dev_xenvirt_create(const char *name, const char *params, @@ -654,7 +651,7 @@ eth_dev_xenvirt_create(const char *name, const char *params, goto err; /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) goto err; @@ -673,9 +670,9 @@ eth_dev_xenvirt_create(const char *name, const char *params, eth_dev->data = data; eth_dev->dev_ops = &ops; - eth_dev->data->dev_flags = RTE_PCI_DRV_DETACHABLE; + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; eth_dev->data->kdrv = RTE_KDRV_NONE; - eth_dev->data->drv_name = drivername; + eth_dev->data->drv_name = pmd_xenvirt_drv.driver.name; eth_dev->driver = NULL; eth_dev->data->numa_node = numa_node; @@ -729,7 +726,7 @@ eth_dev_xenvirt_free(const char *name, const unsigned numa_node) /*TODO: Support multiple process model */ static int -rte_pmd_xenvirt_devinit(const char *name, const char *params) +rte_pmd_xenvirt_probe(const char *name, const char *params) { if (virtio_idx == 0) { if (xenstore_init() != 0) { @@ -746,7 +743,7 @@ rte_pmd_xenvirt_devinit(const char *name, const char *params) } static int -rte_pmd_xenvirt_devuninit(const char *name) +rte_pmd_xenvirt_remove(const char *name) { eth_dev_xenvirt_free(name, rte_socket_id()); @@ -759,12 +756,12 @@ rte_pmd_xenvirt_devuninit(const char *name) return 0; } -static struct rte_driver pmd_xenvirt_drv = { - .type = PMD_VDEV, - .init = rte_pmd_xenvirt_devinit, - .uninit = rte_pmd_xenvirt_devuninit, +static struct rte_vdev_driver pmd_xenvirt_drv = { + .probe = rte_pmd_xenvirt_probe, + .remove = rte_pmd_xenvirt_remove, }; -PMD_REGISTER_DRIVER(pmd_xenvirt_drv, eth_xenvirt); -DRIVER_REGISTER_PARAM_STRING(eth_xenvirt, +RTE_PMD_REGISTER_VDEV(net_xenvirt, pmd_xenvirt_drv); +RTE_PMD_REGISTER_ALIAS(net_xenvirt, eth_xenvirt); +RTE_PMD_REGISTER_PARAM_STRING(net_xenvirt, "mac="); diff --git a/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h b/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h index 4995a9b4..598adc6f 100644 --- a/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h +++ b/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h @@ -39,7 +39,6 @@ extern "C" { #endif #include -#include /** * Creates mempool for xen virtio PMD. diff --git a/src/dpdk/lib/librte_acl/acl.h b/src/dpdk/lib/librte_acl/acl.h index 09d67841..6664a55e 100644 --- a/src/dpdk/lib/librte_acl/acl.h +++ b/src/dpdk/lib/librte_acl/acl.h @@ -234,6 +234,10 @@ int rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data, uint32_t *results, uint32_t num, uint32_t categories); +int +rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/src/dpdk/lib/librte_acl/acl_run.h b/src/dpdk/lib/librte_acl/acl_run.h index b2fc42c6..a862ff6e 100644 --- a/src/dpdk/lib/librte_acl/acl_run.h +++ b/src/dpdk/lib/librte_acl/acl_run.h @@ -39,7 +39,9 @@ #define MAX_SEARCHES_AVX16 16 #define MAX_SEARCHES_SSE8 8 +#define MAX_SEARCHES_ALTIVEC8 8 #define MAX_SEARCHES_SSE4 4 +#define MAX_SEARCHES_ALTIVEC4 4 #define MAX_SEARCHES_SCALAR 2 #define GET_NEXT_4BYTES(prm, idx) \ @@ -67,10 +69,10 @@ struct acl_flow_data { uint32_t trie; /* current trie index (0 to N-1) */ uint32_t cmplt_size; + /* maximum number of packets to process */ uint32_t total_packets; - uint32_t categories; /* number of result categories per packet. */ - /* maximum number of packets to process */ + uint32_t categories; const uint64_t *trans; const uint8_t **data; uint32_t *results; diff --git a/src/dpdk/lib/librte_acl/acl_run_altivec.c b/src/dpdk/lib/librte_acl/acl_run_altivec.c new file mode 100644 index 00000000..35235260 --- /dev/null +++ b/src/dpdk/lib/librte_acl/acl_run_altivec.c @@ -0,0 +1,47 @@ +/*- + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2016. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "acl_run_altivec.h" + +int +rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + if (likely(num >= MAX_SEARCHES_ALTIVEC8)) + return search_altivec_8(ctx, data, results, num, categories); + else if (num >= MAX_SEARCHES_ALTIVEC4) + return search_altivec_4(ctx, data, results, num, categories); + else + return rte_acl_classify_scalar(ctx, data, results, num, + categories); +} diff --git a/src/dpdk/lib/librte_acl/acl_run_altivec.h b/src/dpdk/lib/librte_acl/acl_run_altivec.h new file mode 100644 index 00000000..7d329bcf --- /dev/null +++ b/src/dpdk/lib/librte_acl/acl_run_altivec.h @@ -0,0 +1,329 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "acl_run.h" +#include "acl_vect.h" + +struct _altivec_acl_const { + rte_xmm_t xmm_shuffle_input; + rte_xmm_t xmm_index_mask; + rte_xmm_t xmm_ones_16; + rte_xmm_t range_base; +} altivec_acl_const __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = { + { + .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c} + }, + { + .u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX} + }, + { + .u16 = {1, 1, 1, 1, 1, 1, 1, 1} + }, + { + .u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c} + }, +}; + +/* + * Resolve priority for multiple results (altivec version). + * This consists comparing the priority of the current traversal with the + * running set of results for the packet. + * For each result, keep a running array of the result (rule number) and + * its priority for each category. + */ +static inline void +resolve_priority_altivec(uint64_t transition, int n, + const struct rte_acl_ctx *ctx, struct parms *parms, + const struct rte_acl_match_results *p, uint32_t categories) +{ + uint32_t x; + xmm_t results, priority, results1, priority1; + vector bool int selector; + xmm_t *saved_results, *saved_priority; + + for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) { + + saved_results = (xmm_t *)(&parms[n].cmplt->results[x]); + saved_priority = + (xmm_t *)(&parms[n].cmplt->priority[x]); + + /* get results and priorities for completed trie */ + results = *(const xmm_t *)&p[transition].results[x]; + priority = *(const xmm_t *)&p[transition].priority[x]; + + /* if this is not the first completed trie */ + if (parms[n].cmplt->count != ctx->num_tries) { + + /* get running best results and their priorities */ + results1 = *saved_results; + priority1 = *saved_priority; + + /* select results that are highest priority */ + selector = vec_cmpgt(priority1, priority); + results = vec_sel(results, results1, selector); + priority = vec_sel(priority, priority1, + selector); + } + + /* save running best results and their priorities */ + *saved_results = results; + *saved_priority = priority; + } +} + +/* + * Check for any match in 4 transitions + */ +static inline __attribute__((always_inline)) uint32_t +check_any_match_x4(uint64_t val[]) +{ + return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH; +} + +static inline __attribute__((always_inline)) void +acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, uint64_t transitions[]) +{ + while (check_any_match_x4(transitions)) { + transitions[0] = acl_match_check(transitions[0], slot, ctx, + parms, flows, resolve_priority_altivec); + transitions[1] = acl_match_check(transitions[1], slot + 1, ctx, + parms, flows, resolve_priority_altivec); + transitions[2] = acl_match_check(transitions[2], slot + 2, ctx, + parms, flows, resolve_priority_altivec); + transitions[3] = acl_match_check(transitions[3], slot + 3, ctx, + parms, flows, resolve_priority_altivec); + } +} + +/* + * Process 4 transitions (in 2 XMM registers) in parallel + */ +static inline __attribute__((optimize("O2"))) xmm_t +transition4(xmm_t next_input, const uint64_t *trans, + xmm_t *indices1, xmm_t *indices2) +{ + xmm_t addr, tr_lo, tr_hi; + xmm_t in, node_type, r, t; + xmm_t dfa_ofs, quad_ofs; + xmm_t *index_mask, *tp; + vector bool int dfa_msk; + vector signed char zeroes = {}; + union { + uint64_t d64[2]; + uint32_t d32[4]; + } v; + + /* Move low 32 into tr_lo and high 32 into tr_hi */ + tr_lo = (xmm_t){(*indices1)[0], (*indices1)[2], + (*indices2)[0], (*indices2)[2]}; + tr_hi = (xmm_t){(*indices1)[1], (*indices1)[3], + (*indices2)[1], (*indices2)[3]}; + + /* Calculate the address (array index) for all 4 transitions. */ + index_mask = (xmm_t *)&altivec_acl_const.xmm_index_mask.u32; + t = vec_xor(*index_mask, *index_mask); + in = vec_perm(next_input, (xmm_t){}, + *(vector unsigned char *)&altivec_acl_const.xmm_shuffle_input); + + /* Calc node type and node addr */ + node_type = vec_and(vec_nor(*index_mask, *index_mask), tr_lo); + addr = vec_and(tr_lo, *index_mask); + + /* mask for DFA type(0) nodes */ + dfa_msk = vec_cmpeq(node_type, t); + + /* DFA calculations. */ + r = vec_sr(in, (vector unsigned int){30, 30, 30, 30}); + tp = (xmm_t *)&altivec_acl_const.range_base.u32; + r = vec_add(r, *tp); + t = vec_sr(in, (vector unsigned int){24, 24, 24, 24}); + r = vec_perm(tr_hi, (xmm_t){(uint16_t)0 << 16}, + (vector unsigned char)r); + + dfa_ofs = vec_sub(t, r); + + /* QUAD/SINGLE caluclations. */ + t = (xmm_t)vec_cmpgt((vector signed char)in, (vector signed char)tr_hi); + t = (xmm_t)vec_sel( + vec_sel( + (vector signed char)vec_sub( + zeroes, (vector signed char)t), + (vector signed char)t, + vec_cmpgt((vector signed char)t, zeroes)), + zeroes, + vec_cmpeq((vector signed char)t, zeroes)); + + t = (xmm_t)vec_msum((vector signed char)t, + (vector unsigned char)t, (xmm_t){}); + quad_ofs = (xmm_t)vec_msum((vector signed short)t, + *(vector signed short *)&altivec_acl_const.xmm_ones_16.u16, + (xmm_t){}); + + /* blend DFA and QUAD/SINGLE. */ + t = vec_sel(quad_ofs, dfa_ofs, dfa_msk); + + /* calculate address for next transitions. */ + addr = vec_add(addr, t); + + v.d64[0] = (uint64_t)trans[addr[0]]; + v.d64[1] = (uint64_t)trans[addr[1]]; + *indices1 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]}; + v.d64[0] = (uint64_t)trans[addr[2]]; + v.d64[1] = (uint64_t)trans[addr[3]]; + *indices2 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]}; + + return vec_sr(next_input, + (vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT}); +} + +/* + * Execute trie traversal with 8 traversals in parallel + */ +static inline int +search_altivec_8(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_ALTIVEC8]; + struct completion cmplt[MAX_SEARCHES_ALTIVEC8]; + struct parms parms[MAX_SEARCHES_ALTIVEC8]; + xmm_t input0, input1; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_ALTIVEC8; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, (uint64_t *)&index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, (uint64_t *)&index_array[4]); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input0 = (xmm_t){GET_NEXT_4BYTES(parms, 0), + GET_NEXT_4BYTES(parms, 1), + GET_NEXT_4BYTES(parms, 2), + GET_NEXT_4BYTES(parms, 3)}; + + input1 = (xmm_t){GET_NEXT_4BYTES(parms, 4), + GET_NEXT_4BYTES(parms, 5), + GET_NEXT_4BYTES(parms, 6), + GET_NEXT_4BYTES(parms, 7)}; + + /* Process the 4 bytes of input on each stream. */ + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + (uint64_t *)&index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, + (uint64_t *)&index_array[4]); + } + + return 0; +} + +/* + * Execute trie traversal with 4 traversals in parallel + */ +static inline int +search_altivec_4(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, int total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_ALTIVEC4]; + struct completion cmplt[MAX_SEARCHES_ALTIVEC4]; + struct parms parms[MAX_SEARCHES_ALTIVEC4]; + xmm_t input; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_ALTIVEC4; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input = (xmm_t){GET_NEXT_4BYTES(parms, 0), + GET_NEXT_4BYTES(parms, 1), + GET_NEXT_4BYTES(parms, 2), + GET_NEXT_4BYTES(parms, 3)}; + + /* Process the 4 bytes of input on each stream. */ + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + } + + return 0; +} diff --git a/src/dpdk/lib/librte_acl/rte_acl.c b/src/dpdk/lib/librte_acl/rte_acl.c index 4ba9786b..d1f40bef 100644 --- a/src/dpdk/lib/librte_acl/rte_acl.c +++ b/src/dpdk/lib/librte_acl/rte_acl.c @@ -75,12 +75,23 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx, return -ENOTSUP; } +int __attribute__ ((weak)) +rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + static const rte_acl_classify_t classify_fns[] = { [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar, [RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar, [RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse, [RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2, [RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon, + [RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec, }; /* by default, use always available scalar code path. */ @@ -119,6 +130,8 @@ rte_acl_init(void) #elif defined(RTE_ARCH_ARM) if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) alg = RTE_ACL_CLASSIFY_NEON; +#elif defined(RTE_ARCH_PPC_64) + alg = RTE_ACL_CLASSIFY_ALTIVEC; #else #ifdef CC_AVX2_SUPPORT if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) @@ -300,8 +313,7 @@ acl_check_rule(const struct rte_acl_rule_data *rd) if ((RTE_LEN2MASK(RTE_ACL_MAX_CATEGORIES, typeof(rd->category_mask)) & rd->category_mask) == 0 || rd->priority > RTE_ACL_MAX_PRIORITY || - rd->priority < RTE_ACL_MIN_PRIORITY || - rd->userdata == RTE_ACL_INVALID_USERDATA) + rd->priority < RTE_ACL_MIN_PRIORITY) return -EINVAL; return 0; } diff --git a/src/dpdk/lib/librte_acl/rte_acl.h b/src/dpdk/lib/librte_acl/rte_acl.h index 0979a098..b53179a8 100644 --- a/src/dpdk/lib/librte_acl/rte_acl.h +++ b/src/dpdk/lib/librte_acl/rte_acl.h @@ -120,8 +120,6 @@ enum { RTE_ACL_MIN_PRIORITY = 0, }; -#define RTE_ACL_INVALID_USERDATA 0 - #define RTE_ACL_MASKLEN_TO_BITMASK(v, s) \ ((v) == 0 ? (v) : (typeof(v))((uint64_t)-1 << ((s) * CHAR_BIT - (v)))) @@ -144,7 +142,7 @@ struct rte_acl_rule_data { struct rte_acl_field field[fld_num]; \ } -RTE_ACL_RULE_DEF(rte_acl_rule, 0); +RTE_ACL_RULE_DEF(rte_acl_rule,); #define RTE_ACL_RULE_SZ(fld_num) \ (sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num)) @@ -271,6 +269,7 @@ enum rte_acl_classify_alg { RTE_ACL_CLASSIFY_SSE = 2, /**< requires SSE4.1 support. */ RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */ RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */ + RTE_ACL_CLASSIFY_ALTIVEC = 5, /**< requires ALTIVEC support. */ RTE_ACL_CLASSIFY_NUM /* should always be the last one. */ }; diff --git a/src/dpdk/lib/librte_cfgfile/rte_cfgfile.h b/src/dpdk/lib/librte_cfgfile/rte_cfgfile.h index f649836c..b40e6a13 100644 --- a/src/dpdk/lib/librte_cfgfile/rte_cfgfile.h +++ b/src/dpdk/lib/librte_cfgfile/rte_cfgfile.h @@ -34,6 +34,8 @@ #ifndef __INCLUDE_RTE_CFGFILE_H__ #define __INCLUDE_RTE_CFGFILE_H__ +#include + #ifdef __cplusplus extern "C" { #endif @@ -86,7 +88,7 @@ struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags); * @param length * Maximum section name length * @return -* 0 on success, error code otherwise +* Number of sections */ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name, size_t length); @@ -100,13 +102,13 @@ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name, * @param cfg * Config file * @param sections -* Array containing section names after successful invocation. Each elemen +* Array containing section names after successful invocation. Each element * of this array should be preallocated by the user with at least * CFG_NAME_LEN characters. * @param max_sections * Maximum number of section names to be stored in sections array * @return -* 0 on success, error code otherwise +* Number of populated sections names */ int rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[], int max_sections); @@ -134,12 +136,13 @@ int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname); * @param sectionname * Section name * @return -* Number of entries in section +* Number of entries in section on success, -1 otherwise */ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, const char *sectionname); -/** Get section entries as key-value pairs +/** +* Get section entries as key-value pairs * * If multiple sections have the given name this function operates on the * first one. @@ -154,14 +157,15 @@ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, * @param max_entries * Maximum number of section entries to be stored in entries array * @return -* 0 on success, error code otherwise +* Number of entries populated on success, -1 otherwise */ int rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname, struct rte_cfgfile_entry *entries, int max_entries); -/** Get section entries as key-value pairs +/** +* Get section entries as key-value pairs * * The index of a section is the same as the index of its name in the * result of rte_cfgfile_sections. This API can be used when there are @@ -180,7 +184,7 @@ int rte_cfgfile_section_entries(struct rte_cfgfile *cfg, * @param max_entries * Maximum number of section entries to be stored in entries array * @return -* Number of entries populated on success, negative error code otherwise +* Number of entries populated on success, -1 otherwise */ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, int index, @@ -188,7 +192,8 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, struct rte_cfgfile_entry *entries, int max_entries); -/** Get value of the named entry in named config file section +/** +* Get value of the named entry in named config file section * * If multiple sections have the given name this function operates on the * first one. @@ -200,13 +205,14 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, * @param entryname * Entry name * @return -* Entry value +* Entry value on success, NULL otherwise */ const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg, const char *sectionname, const char *entryname); -/** Check if given entry exists in named config file section +/** +* Check if given entry exists in named config file section * * If multiple sections have the given name this function operates on the * first one. @@ -223,12 +229,13 @@ const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg, int rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname, const char *entryname); -/** Close config file +/** +* Close config file * * @param cfg * Config file * @return -* 0 on success, error code otherwise +* 0 on success, -1 otherwise */ int rte_cfgfile_close(struct rte_cfgfile *cfg); diff --git a/src/dpdk/lib/librte_eal/bsdapp/contigmem/contigmem.c b/src/dpdk/lib/librte_eal/bsdapp/contigmem/contigmem.c index c6ca3b9c..da971deb 100644 --- a/src/dpdk/lib/librte_eal/bsdapp/contigmem/contigmem.c +++ b/src/dpdk/lib/librte_eal/bsdapp/contigmem/contigmem.c @@ -216,15 +216,19 @@ static int contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **obj, int nprot) { + uint64_t buffer_index; + /* * The buffer index is encoded in the offset. Divide the offset by * PAGE_SIZE to get the index of the buffer requested by the user * app. */ - if ((*offset/PAGE_SIZE) >= contigmem_num_buffers) + buffer_index = *offset / PAGE_SIZE; + if (buffer_index >= contigmem_num_buffers) return EINVAL; - *offset = (vm_ooffset_t)vtophys(contigmem_buffers[*offset/PAGE_SIZE]); + memset(contigmem_buffers[buffer_index], 0, contigmem_buffer_size); + *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index]); *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset, curthread->td_ucred); diff --git a/src/dpdk/lib/librte_eal/bsdapp/eal/eal.c b/src/dpdk/lib/librte_eal/bsdapp/eal/eal.c index a0c8f8c8..ee7c9de7 100644 --- a/src/dpdk/lib/librte_eal/bsdapp/eal/eal.c +++ b/src/dpdk/lib/librte_eal/bsdapp/eal/eal.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -496,14 +497,14 @@ rte_eal_init(int argc, char **argv) char cpuset[RTE_CPU_AFFINITY_STR_LEN]; char thread_name[RTE_MAX_THREAD_NAME_LEN]; + /* checks if the machine is adequate */ + rte_cpu_check_supported(); + if (!rte_atomic32_test_and_set(&run_once)) return -1; thread_id = pthread_self(); - if (rte_eal_log_early_init() < 0) - rte_panic("Cannot init early logs\n"); - eal_log_level_parse(argc, argv); /* set log level as early as possible */ @@ -552,9 +553,6 @@ rte_eal_init(int argc, char **argv) if (rte_eal_tailqs_init() < 0) rte_panic("Cannot init tail queues for objects\n"); -/* if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n");*/ - if (rte_eal_alarm_init() < 0) rte_panic("Cannot init interrupt-handling thread\n"); @@ -580,8 +578,8 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); + if (rte_bus_scan()) + rte_panic("Cannot scan the buses for devices\n"); RTE_LCORE_FOREACH_SLAVE(i) { @@ -615,10 +613,17 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) + rte_panic("Cannot probe devices\n"); + /* Probe & Initialize PCI devices */ if (rte_eal_pci_probe()) rte_panic("Cannot probe PCI\n"); + if (rte_eal_dev_init() < 0) + rte_panic("Cannot init pmd devices\n"); + rte_eal_mcfg_complete(); return fctret; diff --git a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/src/dpdk/lib/librte_eal/bsdapp/eal/eal_interrupts.c index 836e4836..ea2afff4 100644 --- a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_interrupts.c +++ b/src/dpdk/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -36,29 +36,37 @@ #include "eal_private.h" int -rte_intr_callback_register(struct rte_intr_handle *intr_handle __rte_unused, - rte_intr_callback_fn cb __rte_unused, - void *cb_arg __rte_unused) +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, + void *cb_arg) { + RTE_SET_USED(intr_handle); + RTE_SET_USED(cb); + RTE_SET_USED(cb_arg); + return -ENOTSUP; } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle __rte_unused, - rte_intr_callback_fn cb_fn __rte_unused, - void *cb_arg __rte_unused) +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, + void *cb_arg) { + RTE_SET_USED(intr_handle); + RTE_SET_USED(cb); + RTE_SET_USED(cb_arg); + return -ENOTSUP; } int -rte_intr_enable(struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused) { return -ENOTSUP; } int -rte_intr_disable(struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused) { return -ENOTSUP; } diff --git a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_log.c b/src/dpdk/lib/librte_eal/bsdapp/eal/eal_log.c deleted file mode 100644 index a425f7a8..00000000 --- a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_log.c +++ /dev/null @@ -1,57 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#include - -/* - * set the log to default function, called during eal init process, - * once memzones are available. - */ -int -rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused) -{ - if (rte_eal_common_log_init(stderr) < 0) - return -1; - return 0; -} - -int -rte_eal_log_early_init(void) -{ - rte_openlog_stream(stderr); - return 0; -} diff --git a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c b/src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c index 374b68f2..3a5c3159 100644 --- a/src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c +++ b/src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -87,15 +87,6 @@ * enabling bus master. */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused) -{ - RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented " - "for BSD\n"); - return -ENOTSUP; -} - /* Map pci device */ int rte_eal_pci_map_device(struct rte_pci_device *dev) @@ -287,7 +278,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) dev->max_vfs = 0; /* FreeBSD has no NUMA support (yet) */ - dev->numa_node = 0; + dev->device.numa_node = 0; /* FreeBSD has only one pass through driver */ dev->kdrv = RTE_KDRV_NIC_UIO; @@ -406,6 +397,55 @@ error: return -1; } +int +pci_update_device(const struct rte_pci_addr *addr) +{ + int fd; + struct pci_conf matches[2]; + struct pci_match_conf match = { + .pc_sel = { + .pc_domain = addr->domain, + .pc_bus = addr->bus, + .pc_dev = addr->devid, + .pc_func = addr->function, + }, + }; + struct pci_conf_io conf_io = { + .pat_buf_len = 0, + .num_patterns = 1, + .patterns = &match, + .match_buf_len = sizeof(matches), + .matches = &matches[0], + }; + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { + RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", + __func__, strerror(errno)); + goto error; + } + + if (conf_io.num_matches != 1) + goto error; + + if (pci_scan_one(fd, &matches[0]) < 0) + goto error; + + close(fd); + + return 0; + +error: + if (fd >= 0) + close(fd); + return -1; +} + /* Read PCI config space. */ int rte_eal_pci_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offset) @@ -623,9 +663,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) int rte_eal_pci_init(void) { - TAILQ_INIT(&pci_driver_list); - TAILQ_INIT(&pci_device_list); - /* for debug purposes, PCI can be disabled */ if (internal_config.no_pci) return 0; diff --git a/src/dpdk/lib/librte_eal/common/eal_common_bus.c b/src/dpdk/lib/librte_eal/common/eal_common_bus.c new file mode 100644 index 00000000..4638e78d --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/eal_common_bus.c @@ -0,0 +1,133 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 NXP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include + +#include "eal_private.h" + +struct rte_bus_list rte_bus_list = + TAILQ_HEAD_INITIALIZER(rte_bus_list); + +void +rte_bus_register(struct rte_bus *bus) +{ + RTE_VERIFY(bus); + RTE_VERIFY(bus->name && strlen(bus->name)); + /* A bus should mandatorily have the scan implemented */ + RTE_VERIFY(bus->scan); + RTE_VERIFY(bus->probe); + + TAILQ_INSERT_TAIL(&rte_bus_list, bus, next); + RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name); +} + +void +rte_bus_unregister(struct rte_bus *bus) +{ + TAILQ_REMOVE(&rte_bus_list, bus, next); + RTE_LOG(DEBUG, EAL, "Unregistered [%s] bus.\n", bus->name); +} + +/* Scan all the buses for registered devices */ +int +rte_bus_scan(void) +{ + int ret; + struct rte_bus *bus = NULL; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus->scan(); + if (ret) { + RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n", + bus->name); + return ret; + } + } + + return 0; +} + +/* Probe all devices of all buses */ +int +rte_bus_probe(void) +{ + int ret; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus->probe(); + if (ret) { + RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", + bus->name); + return ret; + } + } + + return 0; +} + +/* Dump information of a single bus */ +static int +bus_dump_one(FILE *f, struct rte_bus *bus) +{ + int ret; + + /* For now, dump only the bus name */ + ret = fprintf(f, " %s\n", bus->name); + + /* Error in case of inability in writing to stream */ + if (ret < 0) + return ret; + + return 0; +} + +void +rte_bus_dump(FILE *f) +{ + int ret; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus_dump_one(f, bus); + if (ret) { + RTE_LOG(ERR, EAL, "Unable to write to stream (%d)\n", + ret); + break; + } + } +} diff --git a/src/dpdk/lib/librte_eal/common/eal_common_cpuflags.c b/src/dpdk/lib/librte_eal/common/eal_common_cpuflags.c index ecb12409..b5f76f7f 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_cpuflags.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_cpuflags.c @@ -39,14 +39,8 @@ /** * Checks if the machine is adequate for running the binary. If it is not, the * program exits with status 1. - * The function attribute forces this function to be called before main(). But - * with ICC, the check is generated by the compiler. */ -#ifndef __INTEL_COMPILER -void __attribute__ ((__constructor__)) -#else void -#endif rte_cpu_check_supported(void) { /* This is generated at compile-time by the build system */ diff --git a/src/dpdk/lib/librte_eal/common/eal_common_dev.c b/src/dpdk/lib/librte_eal/common/eal_common_dev.c index a8a4146c..4f3b4934 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_dev.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_dev.c @@ -48,6 +48,9 @@ /** Global list of device drivers. */ static struct rte_driver_list dev_driver_list = TAILQ_HEAD_INITIALIZER(dev_driver_list); +/** Global list of device drivers. */ +static struct rte_device_list dev_device_list = + TAILQ_HEAD_INITIALIZER(dev_device_list); /* register a driver */ void @@ -63,42 +66,25 @@ rte_eal_driver_unregister(struct rte_driver *driver) TAILQ_REMOVE(&dev_driver_list, driver, next); } -int -rte_eal_vdev_init(const char *name, const char *args) +void rte_eal_device_insert(struct rte_device *dev) { - struct rte_driver *driver; - - if (name == NULL) - return -EINVAL; - - TAILQ_FOREACH(driver, &dev_driver_list, next) { - if (driver->type != PMD_VDEV) - continue; - - /* - * search a driver prefix in virtual device name. - * For example, if the driver is pcap PMD, driver->name - * will be "eth_pcap", but "name" will be "eth_pcapN". - * So use strncmp to compare. - */ - if (!strncmp(driver->name, name, strlen(driver->name))) - return driver->init(name, args); - } + TAILQ_INSERT_TAIL(&dev_device_list, dev, next); +} - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); - return -EINVAL; +void rte_eal_device_remove(struct rte_device *dev) +{ + TAILQ_REMOVE(&dev_device_list, dev, next); } int rte_eal_dev_init(void) { struct rte_devargs *devargs; - struct rte_driver *driver; /* * Note that the dev_driver_list is populated here * from calls made to rte_eal_driver_register from constructor functions - * embedded into PMD modules via the PMD_REGISTER_DRIVER macro + * embedded into PMD modules via the RTE_PMD_REGISTER_VDEV macro */ /* call the init function for each virtual device */ @@ -115,38 +101,53 @@ rte_eal_dev_init(void) } } - /* Once the vdevs are initalized, start calling all the pdev drivers */ - TAILQ_FOREACH(driver, &dev_driver_list, next) { - if (driver->type != PMD_PDEV) - continue; - /* PDEV drivers don't get passed any parameters */ - driver->init(NULL, NULL); - } return 0; } -int -rte_eal_vdev_uninit(const char *name) +int rte_eal_dev_attach(const char *name, const char *devargs) { - struct rte_driver *driver; + struct rte_pci_addr addr; - if (name == NULL) + if (name == NULL || devargs == NULL) { + RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n"); return -EINVAL; + } - TAILQ_FOREACH(driver, &dev_driver_list, next) { - if (driver->type != PMD_VDEV) - continue; + if (eal_parse_pci_DomBDF(name, &addr) == 0) { + if (rte_eal_pci_probe_one(&addr) < 0) + goto err; + + } else { + if (rte_eal_vdev_init(name, devargs)) + goto err; + } + + return 0; + +err: + RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", name); + return -EINVAL; +} + +int rte_eal_dev_detach(const char *name) +{ + struct rte_pci_addr addr; - /* - * search a driver prefix in virtual device name. - * For example, if the driver is pcap PMD, driver->name - * will be "eth_pcap", but "name" will be "eth_pcapN". - * So use strncmp to compare. - */ - if (!strncmp(driver->name, name, strlen(driver->name))) - return driver->uninit(name); + if (name == NULL) { + RTE_LOG(ERR, EAL, "Invalid device provided.\n"); + return -EINVAL; } - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + if (eal_parse_pci_DomBDF(name, &addr) == 0) { + if (rte_eal_pci_detach(&addr) < 0) + goto err; + } else { + if (rte_eal_vdev_uninit(name)) + goto err; + } + return 0; + +err: + RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name); return -EINVAL; } diff --git a/src/dpdk/lib/librte_eal/common/eal_common_devargs.c b/src/dpdk/lib/librte_eal/common/eal_common_devargs.c index e403717b..ffa8ad96 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_devargs.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_devargs.c @@ -72,6 +72,7 @@ rte_eal_parse_devargs_str(const char *devargs_str, if (*drvargs == NULL) { free(*drvname); + *drvname = NULL; return -1; } return 0; diff --git a/src/dpdk/lib/librte_eal/common/eal_common_log.c b/src/dpdk/lib/librte_eal/common/eal_common_log.c index 7916c781..21975583 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_log.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_log.c @@ -48,11 +48,12 @@ struct rte_logs rte_logs = { .file = NULL, }; +/* Stream to use for logging if rte_logs.file is NULL */ static FILE *default_log_stream; /** * This global structure stores some informations about the message - * that is currently beeing processed by one lcore + * that is currently being processed by one lcore */ struct log_cur_msg { uint32_t loglevel; /**< log level - see rte_log.h */ @@ -64,27 +65,11 @@ static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg); /* default logs */ -int -rte_log_add_in_history(const char *buf __rte_unused, size_t size __rte_unused) -{ - return 0; -} - -void -rte_log_set_history(int enable) -{ - if (enable) - RTE_LOG(WARNING, EAL, "The log history is deprecated.\n"); -} - /* Change the stream that will be used by logging system */ int rte_openlog_stream(FILE *f) { - if (f == NULL) - rte_logs.file = default_log_stream; - else - rte_logs.file = f; + rte_logs.file = f; return 0; } @@ -131,12 +116,6 @@ int rte_log_cur_msg_logtype(void) return RTE_PER_LCORE(log_cur_msg).logtype; } -/* Dump log history to file */ -void -rte_log_dump_history(FILE *out __rte_unused) -{ -} - /* * Generates a log message The message will be sent in the stream * defined by the previous call to rte_openlog_stream(). @@ -146,6 +125,19 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) { int ret; FILE *f = rte_logs.file; + if (f == NULL) { + f = default_log_stream; + if (f == NULL) { + /* + * Grab the current value of stderr here, rather than + * just initializing default_log_stream to stderr. This + * ensures that we will always use the current value + * of stderr, even if the application closes and + * reopens it. + */ + f = stderr; + } + } if ((level > rte_logs.level) || !(logtype & rte_logs.type)) return 0; @@ -177,17 +169,15 @@ rte_log(uint32_t level, uint32_t logtype, const char *format, ...) } /* - * called by environment-specific log init function + * Called by environment-specific initialization functions. */ -int -rte_eal_common_log_init(FILE *default_log) +void +eal_log_set_default(FILE *default_log) { default_log_stream = default_log; - rte_openlog_stream(default_log); -#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG - RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n"); +#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG + RTE_LOG(NOTICE, EAL, + "Debug dataplane logs available - lower performance\n"); #endif - - return 0; } diff --git a/src/dpdk/lib/librte_eal/common/eal_common_memzone.c b/src/dpdk/lib/librte_eal/common/eal_common_memzone.c index 1bd0a33d..64f4e0ad 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_memzone.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_memzone.c @@ -337,19 +337,7 @@ rte_memzone_free(const struct rte_memzone *mz) idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); idx = idx / sizeof(struct rte_memzone); -#ifdef RTE_LIBRTE_IVSHMEM - /* - * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot - * free it. - */ - if (mcfg->memzone[idx].ioremap_addr != 0) { - rte_rwlock_write_unlock(&mcfg->mlock); - return -EINVAL; - } -#endif - addr = mcfg->memzone[idx].addr; - if (addr == NULL) ret = -EINVAL; else if (mcfg->memzone_cnt == 0) { diff --git a/src/dpdk/lib/librte_eal/common/eal_common_options.c b/src/dpdk/lib/librte_eal/common/eal_common_options.c index 1a1bab36..f36bc556 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_options.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_options.c @@ -118,7 +118,7 @@ static const char *default_solib_dir = RTE_EAL_PMD_PATH; /* * Stringified version of solib path used by dpdk-pmdinfo.py * Note: PLEASE DO NOT ALTER THIS without making a corresponding - * change to tools/dpdk-pmdinfo.py + * change to usertools/dpdk-pmdinfo.py */ static const char dpdk_solib_path[] __attribute__((used)) = "DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH; @@ -126,6 +126,7 @@ static const char dpdk_solib_path[] __attribute__((used)) = static int master_lcore_parsed; static int mem_parsed; +static int core_parsed; void eal_reset_internal_config(struct internal_config *internal_cfg) @@ -797,6 +798,7 @@ eal_parse_common_option(int opt, const char *optarg, RTE_LOG(ERR, EAL, "invalid coremask\n"); return -1; } + core_parsed = 1; break; /* corelist */ case 'l': @@ -804,6 +806,7 @@ eal_parse_common_option(int opt, const char *optarg, RTE_LOG(ERR, EAL, "invalid core list\n"); return -1; } + core_parsed = 1; break; /* size of memory */ case 'm': @@ -912,6 +915,7 @@ eal_parse_common_option(int opt, const char *optarg, OPT_LCORES "\n"); return -1; } + core_parsed = 1; break; /* don't know what to do, leave this to caller */ @@ -923,12 +927,38 @@ eal_parse_common_option(int opt, const char *optarg, return 0; } +static void +eal_auto_detect_cores(struct rte_config *cfg) +{ + unsigned int lcore_id; + unsigned int removed = 0; + rte_cpuset_t affinity_set; + pthread_t tid = pthread_self(); + + if (pthread_getaffinity_np(tid, sizeof(rte_cpuset_t), + &affinity_set) < 0) + CPU_ZERO(&affinity_set); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (cfg->lcore_role[lcore_id] == ROLE_RTE && + !CPU_ISSET(lcore_id, &affinity_set)) { + cfg->lcore_role[lcore_id] = ROLE_OFF; + removed++; + } + } + + cfg->lcore_count -= removed; +} + int eal_adjust_config(struct internal_config *internal_cfg) { int i; struct rte_config *cfg = rte_eal_get_configuration(); + if (!core_parsed) + eal_auto_detect_cores(cfg); + if (internal_config.process_type == RTE_PROC_AUTO) internal_config.process_type = eal_proc_type_detect(); @@ -1021,7 +1051,7 @@ eal_common_usage(void) " [NOTE: PCI whitelist cannot be used with -b option]\n" " --"OPT_VDEV" Add a virtual device.\n" " The argument format is [,key=val,...]\n" - " (ex: --vdev=eth_pcap0,iface=eth2).\n" + " (ex: --vdev=net_pcap0,iface=eth2).\n" " -d LIB.so|DIR Add a driver or driver directory\n" " (can be used multiple times)\n" " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n" diff --git a/src/dpdk/lib/librte_eal/common/eal_common_pci.c b/src/dpdk/lib/librte_eal/common/eal_common_pci.c index 7248c38b..72547bd2 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_pci.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_pci.c @@ -82,8 +82,10 @@ #include "eal_private.h" -struct pci_driver_list pci_driver_list; -struct pci_device_list pci_device_list; +struct pci_driver_list pci_driver_list = + TAILQ_HEAD_INITIALIZER(pci_driver_list); +struct pci_device_list pci_device_list = + TAILQ_HEAD_INITIALIZER(pci_device_list); #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" @@ -151,7 +153,7 @@ pci_unmap_resource(void *requested_addr, size_t size) } /* - * If vendor/device ID match, call the devinit() function of the + * If vendor/device ID match, call the probe() function of the * driver. */ static int @@ -183,42 +185,45 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", loc->domain, loc->bus, loc->devid, loc->function, - dev->numa_node); + dev->device.numa_node); /* no initialization when blacklisted, return without error */ - if (dev->devargs != NULL && - dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) { + if (dev->device.devargs != NULL && + dev->device.devargs->type == + RTE_DEVTYPE_BLACKLISTED_PCI) { RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n"); return 1; } RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->name); + dev->id.device_id, dr->driver.name); if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { /* map resources for devices that use igb_uio */ ret = rte_eal_pci_map_device(dev); if (ret != 0) return ret; - } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && - rte_eal_process_type() == RTE_PROC_PRIMARY) { - /* unbind current driver */ - if (pci_unbind_kernel_driver(dev) < 0) - return -1; } /* reference driver structure */ dev->driver = dr; - /* call the driver devinit() function */ - return dr->devinit(dr, dev); + /* call the driver probe() function */ + ret = dr->probe(dr, dev); + if (ret) { + dev->driver = NULL; + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + rte_eal_pci_unmap_device(dev); + } + + return ret; } /* return positive value if driver doesn't support this device */ return 1; } /* - * If vendor/device ID match, call the devuninit() function of the + * If vendor/device ID match, call the remove() function of the * driver. */ static int @@ -250,12 +255,12 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr, RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", loc->domain, loc->bus, loc->devid, - loc->function, dev->numa_node); + loc->function, dev->device.numa_node); RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->name); + dev->id.device_id, dr->driver.name); - if (dr->devuninit && (dr->devuninit(dev) < 0)) + if (dr->remove && (dr->remove(dev) < 0)) return -1; /* negative value is an error */ /* clear driver structure */ @@ -273,7 +278,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr, } /* - * If vendor/device ID match, call the devinit() function of all + * If vendor/device ID match, call the probe() function of all * registered driver for the given device. Return -1 if initialization * failed, return 1 if no driver is found for this device. */ @@ -286,6 +291,10 @@ pci_probe_all_drivers(struct rte_pci_device *dev) if (dev == NULL) return -1; + /* Check if a driver is already loaded */ + if (dev->driver != NULL) + return 0; + TAILQ_FOREACH(dr, &pci_driver_list, next) { rc = rte_eal_pci_probe_one_driver(dr, dev); if (rc < 0) @@ -300,7 +309,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev) } /* - * If vendor/device ID match, call the devuninit() function of all + * If vendor/device ID match, call the remove() function of all * registered driver for the given device. Return -1 if initialization * failed, return 1 if no driver is found for this device. */ @@ -339,21 +348,27 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr) if (addr == NULL) return -1; + /* update current pci device in global list, kernel bindings might have + * changed since last time we looked at it. + */ + if (pci_update_device(addr) < 0) + goto err_return; + TAILQ_FOREACH(dev, &pci_device_list, next) { if (rte_eal_compare_pci_addr(&dev->addr, addr)) continue; ret = pci_probe_all_drivers(dev); - if (ret < 0) + if (ret) goto err_return; return 0; } return -1; err_return: - RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT - " cannot be used\n", dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); + RTE_LOG(WARNING, EAL, + "Requested device " PCI_PRI_FMT " cannot be used\n", + addr->domain, addr->bus, addr->devid, addr->function); return -1; } @@ -378,6 +393,7 @@ rte_eal_pci_detach(const struct rte_pci_addr *addr) goto err_return; TAILQ_REMOVE(&pci_device_list, dev, next); + free(dev); return 0; } return -1; @@ -390,7 +406,7 @@ err_return: } /* - * Scan the content of the PCI bus, and call the devinit() function for + * Scan the content of the PCI bus, and call the probe() function for * all registered drivers that have a matching entry in its id_table * for discovered devices. */ @@ -410,7 +426,7 @@ rte_eal_pci_probe(void) /* set devargs in PCI structure */ devargs = pci_devargs_lookup(dev); if (devargs != NULL) - dev->devargs = devargs; + dev->device.devargs = devargs; /* probe all or only whitelisted devices */ if (probe_all) @@ -463,11 +479,13 @@ void rte_eal_pci_register(struct rte_pci_driver *driver) { TAILQ_INSERT_TAIL(&pci_driver_list, driver, next); + rte_eal_driver_register(&driver->driver); } /* unregister a driver */ void rte_eal_pci_unregister(struct rte_pci_driver *driver) { + rte_eal_driver_unregister(&driver->driver); TAILQ_REMOVE(&pci_driver_list, driver, next); } diff --git a/src/dpdk/lib/librte_eal/common/eal_common_timer.c b/src/dpdk/lib/librte_eal/common/eal_common_timer.c index c4227cd8..72656176 100644 --- a/src/dpdk/lib/librte_eal/common/eal_common_timer.c +++ b/src/dpdk/lib/librte_eal/common/eal_common_timer.c @@ -47,8 +47,11 @@ /* The frequency of the RDTSC timer resolution */ static uint64_t eal_tsc_resolution_hz; +/* Pointer to user delay function */ +void (*rte_delay_us)(unsigned int) = NULL; + void -rte_delay_us(unsigned us) +rte_delay_us_block(unsigned int us) { const uint64_t start = rte_get_timer_cycles(); const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6; @@ -84,3 +87,15 @@ set_tsc_freq(void) RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000); eal_tsc_resolution_hz = freq; } + +void rte_delay_us_callback_register(void (*userfunc)(unsigned int)) +{ + rte_delay_us = userfunc; +} + +static void __attribute__((constructor)) +rte_timer_init(void) +{ + /* set rte_delay_us_block as a delay function */ + rte_delay_us_callback_register(rte_delay_us_block); +} diff --git a/src/dpdk/lib/librte_eal/common/eal_common_vdev.c b/src/dpdk/lib/librte_eal/common/eal_common_vdev.c new file mode 100644 index 00000000..7d6e54f4 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/eal_common_vdev.c @@ -0,0 +1,124 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +struct vdev_driver_list vdev_driver_list = + TAILQ_HEAD_INITIALIZER(vdev_driver_list); + +/* register a driver */ +void +rte_eal_vdrv_register(struct rte_vdev_driver *driver) +{ + TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next); + rte_eal_driver_register(&driver->driver); +} + +/* unregister a driver */ +void +rte_eal_vdrv_unregister(struct rte_vdev_driver *driver) +{ + rte_eal_driver_unregister(&driver->driver); + TAILQ_REMOVE(&vdev_driver_list, driver, next); +} + +int +rte_eal_vdev_init(const char *name, const char *args) +{ + struct rte_vdev_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "net_pcap", but "name" will be "net_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->driver.name, name, + strlen(driver->driver.name))) + return driver->probe(name, args); + } + + /* Give new names precedence over aliases. */ + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + if (driver->driver.alias && + !strncmp(driver->driver.alias, name, + strlen(driver->driver.alias))) + return driver->probe(name, args); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} + +int +rte_eal_vdev_uninit(const char *name) +{ + struct rte_vdev_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "net_pcap", but "name" will be "net_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->driver.name, name, + strlen(driver->driver.name))) + return driver->remove(name); + } + + /* Give new names precedence over aliases. */ + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + if (driver->driver.alias && + !strncmp(driver->driver.alias, name, + strlen(driver->driver.alias))) + return driver->remove(name); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} diff --git a/src/dpdk/lib/librte_eal/common/eal_filesystem.h b/src/dpdk/lib/librte_eal/common/eal_filesystem.h index fdb4a70b..8acbd996 100644 --- a/src/dpdk/lib/librte_eal/common/eal_filesystem.h +++ b/src/dpdk/lib/librte_eal/common/eal_filesystem.h @@ -97,17 +97,6 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id return buffer; } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS -static inline const char * -eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id) -{ - snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir, - internal_config.hugefile_prefix, f_id); - buffer[buflen - 1] = '\0'; - return buffer; -} -#endif - /** define the default filename prefix for the %s values above */ #define HUGEFILE_PREFIX_DEFAULT "rte" diff --git a/src/dpdk/lib/librte_eal/common/eal_hugepages.h b/src/dpdk/lib/librte_eal/common/eal_hugepages.h index 38edac03..68369f26 100644 --- a/src/dpdk/lib/librte_eal/common/eal_hugepages.h +++ b/src/dpdk/lib/librte_eal/common/eal_hugepages.h @@ -52,9 +52,6 @@ struct hugepage_file { int socket_id; /**< NUMA socket ID */ int file_id; /**< the '%d' in HUGEFILE_FMT */ int memseg_id; /**< the memory segment to which page belongs */ -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - int repeated; /**< number of times the page size is repeated */ -#endif char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */ }; diff --git a/src/dpdk/lib/librte_eal/common/eal_private.h b/src/dpdk/lib/librte_eal/common/eal_private.h index 857dc3ea..9e7d8f6b 100644 --- a/src/dpdk/lib/librte_eal/common/eal_private.h +++ b/src/dpdk/lib/librte_eal/common/eal_private.h @@ -47,7 +47,9 @@ int rte_eal_memzone_init(void); /** - * Common log initialization function (private to eal). + * Common log initialization function (private to eal). Determines + * where log data is written when no call to rte_openlog_stream is + * in effect. * * @param default_log * The default log stream to be used. @@ -55,7 +57,7 @@ int rte_eal_memzone_init(void); * - 0 on success * - Negative on error */ -int rte_eal_common_log_init(FILE *default_log); +void eal_log_set_default(FILE *default_log); /** * Fill configuration with number of physical and logical processors @@ -96,16 +98,6 @@ int rte_eal_memory_init(void); */ int rte_eal_timer_init(void); -/** - * Init early logs - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int rte_eal_log_early_init(void); - /** * Init the default log stream * @@ -117,7 +109,7 @@ int rte_eal_log_early_init(void); int rte_eal_log_init(const char *id, int facility); /** - * Init the default log stream + * Init the PCI infrastructure * * This function is private to EAL. * @@ -126,30 +118,21 @@ int rte_eal_log_init(const char *id, int facility); */ int rte_eal_pci_init(void); -#ifdef RTE_LIBRTE_IVSHMEM -/** - * Init the memory from IVSHMEM devices - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int rte_eal_ivshmem_init(void); +struct rte_pci_driver; +struct rte_pci_device; /** - * Init objects in IVSHMEM devices + * Update a pci device object by asking the kernel for the latest information. * * This function is private to EAL. * + * @param addr + * The PCI Bus-Device-Function address to look for * @return - * 0 on success, negative on error + * - 0 on success. + * - negative on error. */ -int rte_eal_ivshmem_obj_init(void); -#endif - -struct rte_pci_driver; -struct rte_pci_device; +int pci_update_device(const struct rte_pci_addr *addr); /** * Unbind kernel driver for this device @@ -258,13 +241,6 @@ int rte_eal_intr_init(void); */ int rte_eal_alarm_init(void); -/** - * This function initialises any virtual devices - * - * This function is private to the EAL. - */ -int rte_eal_dev_init(void); - /** * Function is to check if the kernel module(like, vfio, vfio_iommu_type1, * etc.) loaded. diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h index b20056b8..4eac6663 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -38,6 +38,8 @@ extern "C" { #endif +#include +#include #include #include "generic/rte_atomic.h" @@ -59,6 +61,12 @@ extern "C" { #define rte_smp_rmb() rte_compiler_barrier() +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_compiler_barrier() + +#define rte_io_rmb() rte_compiler_barrier() + /*------------------------- 16 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h index 400d8a96..2e04c759 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -37,9 +37,17 @@ * All rights reserved. */ +#ifndef _RTE_ATOMIC_X86_H_ +#error do not include this file directly, use instead +#endif + #ifndef _RTE_ATOMIC_I686_H_ #define _RTE_ATOMIC_I686_H_ +#include +#include +#include + /*------------------------- 64 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS @@ -47,6 +55,7 @@ static inline int rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) { uint8_t res; + RTE_STD_C11 union { struct { uint32_t l32; diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h index 4de66000..1a53a766 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h @@ -37,9 +37,17 @@ * All rights reserved. */ +#ifndef _RTE_ATOMIC_X86_H_ +#error do not include this file directly, use instead +#endif + #ifndef _RTE_ATOMIC_X86_64_H_ #define _RTE_ATOMIC_X86_64_H_ +#include +#include +#include + /*------------------------- 64 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder.h index ffdb6ef5..251f11b4 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder.h @@ -38,6 +38,8 @@ extern "C" { #endif +#include +#include #include "generic/rte_byteorder.h" #ifndef RTE_BYTE_ORDER diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h index 51c306f8..14d64834 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h @@ -31,9 +31,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _RTE_BYTEORDER_X86_H_ +#error do not include this file directly, use instead +#endif + #ifndef _RTE_BYTEORDER_I686_H_ #define _RTE_BYTEORDER_I686_H_ +#include +#include + /* * An architecture-optimized byte swap for a 64-bit value. * diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h index dda572bd..516ac052 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h @@ -31,9 +31,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _RTE_BYTEORDER_X86_H_ +#error do not include this file directly, use instead +#endif + #ifndef _RTE_BYTEORDER_X86_64_H_ #define _RTE_BYTEORDER_X86_64_H_ +#include +#include + /* * An architecture-optimized byte swap for a 64-bit value. * diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_cycles.h index 6e3c7d89..5eb6ce96 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_cycles.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_cycles.h @@ -75,12 +75,14 @@ extern "C" { extern int rte_cycles_vmware_tsc_map; #include #endif +#include static inline uint64_t rte_rdtsc(void) { union { uint64_t tsc_64; + RTE_STD_C11 struct { uint32_t lo_32; uint32_t hi_32; diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h new file mode 100644 index 00000000..c8d14043 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h @@ -0,0 +1,47 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_IO_X86_H_ +#define _RTE_IO_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_io.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_IO_X86_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index 413035e7..b9785e85 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -69,6 +69,8 @@ rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline)); #ifdef RTE_MACHINE_CPUFLAG_AVX512F +#define ALIGNMENT_MASK 0x3F + /** * AVX512 implementation below */ @@ -189,7 +191,7 @@ rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n) } static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { uintptr_t dstu = (uintptr_t)dst; uintptr_t srcu = (uintptr_t)src; @@ -308,6 +310,8 @@ COPY_BLOCK_128_BACK63: #elif defined RTE_MACHINE_CPUFLAG_AVX2 +#define ALIGNMENT_MASK 0x1F + /** * AVX2 implementation below */ @@ -387,7 +391,7 @@ rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) } static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { uintptr_t dstu = (uintptr_t)dst; uintptr_t srcu = (uintptr_t)src; @@ -499,6 +503,8 @@ COPY_BLOCK_128_BACK31: #else /* RTE_MACHINE_CPUFLAG */ +#define ALIGNMENT_MASK 0x0F + /** * SSE & AVX implementation below */ @@ -594,7 +600,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src) * - __m128i ~ must be pre-defined */ #define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \ -({ \ +__extension__ ({ \ int tmp; \ while (len >= 128 + 16 - offset) { \ xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ @@ -655,7 +661,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src) * - __m128i ~ used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined */ #define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \ -({ \ +__extension__ ({ \ switch (offset) { \ case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \ case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \ @@ -677,7 +683,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src) }) static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; uintptr_t dstu = (uintptr_t)dst; @@ -821,6 +827,75 @@ COPY_BLOCK_64_BACK15: #endif /* RTE_MACHINE_CPUFLAG */ +static inline void * +rte_memcpy_aligned(void *dst, const void *src, size_t n) +{ + void *ret = dst; + + /* Copy size <= 16 bytes */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dst = *(const uint8_t *)src; + src = (const uint8_t *)src + 1; + dst = (uint8_t *)dst + 1; + } + if (n & 0x02) { + *(uint16_t *)dst = *(const uint16_t *)src; + src = (const uint16_t *)src + 1; + dst = (uint16_t *)dst + 1; + } + if (n & 0x04) { + *(uint32_t *)dst = *(const uint32_t *)src; + src = (const uint32_t *)src + 1; + dst = (uint32_t *)dst + 1; + } + if (n & 0x08) + *(uint64_t *)dst = *(const uint64_t *)src; + + return ret; + } + + /* Copy 16 <= size <= 32 bytes */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + + return ret; + } + + /* Copy 32 < size <= 64 bytes */ + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + + return ret; + } + + /* Copy 64 bytes blocks */ + for (; n >= 64; n -= 64) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; + } + + /* Copy whatever left */ + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + + return ret; +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + if (!(((uintptr_t)dst | (uintptr_t)src) & ALIGNMENT_MASK)) + return rte_memcpy_aligned(dst, src, n); + else + return rte_memcpy_generic(dst, src, n); +} + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_prefetch.h index 5dac47eb..f464398f 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_prefetch.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_prefetch.h @@ -38,6 +38,7 @@ extern "C" { #endif +#include #include "generic/rte_prefetch.h" static inline void rte_prefetch0(const volatile void *p) diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_rtm.h index 0649f794..ab099952 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_rtm.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -20,6 +20,7 @@ /* Official RTM intrinsics interface matching gcc/icc, but works on older gcc compatible compilers and binutils. */ +#include #ifdef __cplusplus extern "C" { diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_vect.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_vect.h index b698797c..1b4b85dd 100644 --- a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_vect.h +++ b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -31,8 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _RTE_VECT_H_ -#define _RTE_VECT_H_ +#ifndef _RTE_VECT_X86_H_ +#define _RTE_VECT_X86_H_ /** * @file @@ -40,6 +40,9 @@ * RTE SSE/AVX related header. */ +#include +#include "generic/rte_vect.h" + #if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) #ifdef __SSE__ @@ -106,7 +109,8 @@ typedef union rte_ymm { #endif /* __AVX__ */ #ifdef RTE_ARCH_I686 -#define _mm_cvtsi128_si64(a) ({ \ +#define _mm_cvtsi128_si64(a) \ +__extension__ ({ \ rte_xmm_t m; \ m.x = (a); \ (m.u64[0]); \ @@ -117,7 +121,8 @@ typedef union rte_ymm { * Prior to version 12.1 icc doesn't support _mm_set_epi64x. */ #if (defined(__ICC) && __ICC < 1210) -#define _mm_set_epi64x(a, b) ({ \ +#define _mm_set_epi64x(a, b) \ +__extension__ ({ \ rte_xmm_t m; \ m.u64[0] = b; \ m.u64[1] = a; \ @@ -129,4 +134,4 @@ typedef union rte_ymm { } #endif -#endif /* _RTE_VECT_H_ */ +#endif /* _RTE_VECT_X86_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_atomic.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_atomic.h index bfb4fe44..7b81705b 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_atomic.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_atomic.h @@ -42,6 +42,7 @@ */ #include +#include #ifdef __DOXYGEN__ @@ -99,6 +100,33 @@ static inline void rte_smp_wmb(void); */ static inline void rte_smp_rmb(void); +/** + * General memory barrier for I/O device + * + * Guarantees that the LOAD and STORE operations that precede the + * rte_io_mb() call are visible to I/O device or CPU before the + * LOAD and STORE operations that follow it. + */ +static inline void rte_io_mb(void); + +/** + * Write memory barrier for I/O device + * + * Guarantees that the STORE operations that precede the + * rte_io_wmb() call are visible to I/O device before the STORE + * operations that follow it. + */ +static inline void rte_io_wmb(void); + +/** + * Read memory barrier for IO device + * + * Guarantees that the LOAD operations on I/O device that precede the + * rte_io_rmb() call are visible to CPU before the LOAD + * operations that follow it. + */ +static inline void rte_io_rmb(void); + #endif /* __DOXYGEN__ */ /** diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h index c46fdcf2..e00bccbc 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h @@ -50,6 +50,8 @@ #include #endif +#include + /* * Compile-time endianness detection */ diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_cpuflags.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_cpuflags.h index c1da357c..71321f32 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_cpuflags.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_cpuflags.h @@ -44,6 +44,7 @@ /** * Enumeration of all CPU features supported */ +__extension__ enum rte_cpu_flag_t; /** @@ -55,6 +56,7 @@ enum rte_cpu_flag_t; * flag name * NULL if flag ID is invalid */ +__extension__ const char * rte_cpu_get_flag_name(enum rte_cpu_flag_t feature); @@ -68,6 +70,7 @@ rte_cpu_get_flag_name(enum rte_cpu_flag_t feature); * 0 if flag is not available * -ENOENT if flag is invalid */ +__extension__ int rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature); diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_cycles.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_cycles.h index 8cc21f20..00103ca9 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_cycles.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_cycles.h @@ -180,15 +180,16 @@ rte_get_timer_hz(void) default: rte_panic("Invalid timer source specified\n"); } } - /** * Wait at least us microseconds. + * This function can be replaced with user-defined function. + * @see rte_delay_us_callback_register * * @param us * The number of microseconds to wait. */ -void -rte_delay_us(unsigned us); +extern void +(*rte_delay_us)(unsigned int us); /** * Wait at least ms milliseconds. @@ -202,4 +203,21 @@ rte_delay_ms(unsigned ms) rte_delay_us(ms * 1000); } +/** + * Blocking delay function. + * + * @param us + * Number of microseconds to wait. + */ +void rte_delay_us_block(unsigned int us); + +/** + * Replace rte_delay_us with user defined function. + * + * @param userfunc + * User function which replaces rte_delay_us. rte_delay_us_block restores + * buildin block delay function. + */ +void rte_delay_us_callback_register(void(*userfunc)(unsigned int)); + #endif /* _RTE_CYCLES_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_io.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_io.h new file mode 100644 index 00000000..d82ee695 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_io.h @@ -0,0 +1,381 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_IO_H_ +#define _RTE_IO_H_ + +#include + +/** + * @file + * I/O device memory operations + * + * This file defines the generic API for I/O device memory read/write operations + */ + +#include +#include +#include + +#ifdef __DOXYGEN__ + +/** + * Read a 8-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint8_t +rte_read8_relaxed(const volatile void *addr); + +/** + * Read a 16-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint16_t +rte_read16_relaxed(const volatile void *addr); + +/** + * Read a 32-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint32_t +rte_read32_relaxed(const volatile void *addr); + +/** + * Read a 64-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint64_t +rte_read64_relaxed(const volatile void *addr); + +/** + * Write a 8-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ + +static inline void +rte_write8_relaxed(uint8_t value, volatile void *addr); + +/** + * Write a 16-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write16_relaxed(uint16_t value, volatile void *addr); + +/** + * Write a 32-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write32_relaxed(uint32_t value, volatile void *addr); + +/** + * Write a 64-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write64_relaxed(uint64_t value, volatile void *addr); + +/** + * Read a 8-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint8_t +rte_read8(const volatile void *addr); + +/** + * Read a 16-bit value from I/O device memory address *addr*. + * + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint16_t +rte_read16(const volatile void *addr); + +/** + * Read a 32-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint32_t +rte_read32(const volatile void *addr); + +/** + * Read a 64-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint64_t +rte_read64(const volatile void *addr); + +/** + * Write a 8-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ + +static inline void +rte_write8(uint8_t value, volatile void *addr); + +/** + * Write a 16-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write16(uint16_t value, volatile void *addr); + +/** + * Write a 32-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write32(uint32_t value, volatile void *addr); + +/** + * Write a 64-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write64(uint64_t value, volatile void *addr); + +#endif /* __DOXYGEN__ */ + +#ifndef RTE_OVERRIDE_IO_H + +static inline uint8_t __attribute__((always_inline)) +rte_read8_relaxed(const volatile void *addr) +{ + return *(const volatile uint8_t *)addr; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16_relaxed(const volatile void *addr) +{ + return *(const volatile uint16_t *)addr; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32_relaxed(const volatile void *addr) +{ + return *(const volatile uint32_t *)addr; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64_relaxed(const volatile void *addr) +{ + return *(const volatile uint64_t *)addr; +} + +static inline void __attribute__((always_inline)) +rte_write8_relaxed(uint8_t value, volatile void *addr) +{ + *(volatile uint8_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write16_relaxed(uint16_t value, volatile void *addr) +{ + *(volatile uint16_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write32_relaxed(uint32_t value, volatile void *addr) +{ + *(volatile uint32_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write64_relaxed(uint64_t value, volatile void *addr) +{ + *(volatile uint64_t *)addr = value; +} + +static inline uint8_t __attribute__((always_inline)) +rte_read8(const volatile void *addr) +{ + uint8_t val; + val = rte_read8_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16(const volatile void *addr) +{ + uint16_t val; + val = rte_read16_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32(const volatile void *addr) +{ + uint32_t val; + val = rte_read32_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64(const volatile void *addr) +{ + uint64_t val; + val = rte_read64_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline void __attribute__((always_inline)) +rte_write8(uint8_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write8_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write16(uint16_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write16_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write32(uint32_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write32_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write64(uint64_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write64_relaxed(value, addr); +} + +#endif /* RTE_OVERRIDE_IO_H */ + +#endif /* _RTE_IO_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_memcpy.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_memcpy.h index afb0afe4..4e9d8794 100644 --- a/src/dpdk/lib/librte_eal/common/include/generic/rte_memcpy.h +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_memcpy.h @@ -64,6 +64,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src); static inline void rte_mov32(uint8_t *dst, const uint8_t *src); +#ifdef __DOXYGEN__ + /** * Copy 48 bytes from one location to another using optimised * instructions. The locations should not overlap. @@ -76,6 +78,8 @@ rte_mov32(uint8_t *dst, const uint8_t *src); static inline void rte_mov48(uint8_t *dst, const uint8_t *src); +#endif /* __DOXYGEN__ */ + /** * Copy 64 bytes from one location to another using optimised * instructions. The locations should not overlap. diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h new file mode 100644 index 00000000..600ee9f3 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h @@ -0,0 +1,214 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VECT_H_ +#define _RTE_VECT_H_ + +/** + * @file + * SIMD vector types + * + * This file defines types to use vector instructions with generic C code. + */ + +#include + +/* Unsigned vector types */ + +/** + * 64 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v64u8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint8_t rte_v64u8_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v64u16_t){ a0, a1, a2, a3 } + */ +typedef uint16_t rte_v64u16_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v64u32_t){ a0, a1 } + */ +typedef uint32_t rte_v64u32_t __attribute__((vector_size(8), aligned(8))); + +/** + * 128 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v128u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef uint8_t rte_v128u8_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v128u16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint16_t rte_v128u16_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v128u32_t){ a0, a1, a2, a3, a4 } + */ +typedef uint32_t rte_v128u32_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 64 bits elements. + * + * a = (rte_v128u64_t){ a0, a1 } + */ +typedef uint64_t rte_v128u64_t __attribute__((vector_size(16), aligned(16))); + +/** + * 256 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v256u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15, + * a16, a17, a18, a19, a20, a21, a22, a23, + * a24, a25, a26, a27, a28, a29, a30, a31 } + */ +typedef uint8_t rte_v256u8_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v256u16_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef uint16_t rte_v256u16_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v256u32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint32_t rte_v256u32_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 64 bits elements. + * + * a = (rte_v256u64_t){ a0, a1, a2, a3 } + */ +typedef uint64_t rte_v256u64_t __attribute__((vector_size(32), aligned(32))); + + +/* Signed vector types */ + +/** + * 64 bits vector size to use with 8 bits elements. + * + * a = (rte_v64s8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int8_t rte_v64s8_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with 16 bits elements. + * + * a = (rte_v64s16_t){ a0, a1, a2, a3 } + */ +typedef int16_t rte_v64s16_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with 32 bits elements. + * + * a = (rte_v64s32_t){ a0, a1 } + */ +typedef int32_t rte_v64s32_t __attribute__((vector_size(8), aligned(8))); + +/** + * 128 bits vector size to use with 8 bits elements. + * + * a = (rte_v128s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef int8_t rte_v128s8_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 16 bits elements. + * + * a = (rte_v128s16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int16_t rte_v128s16_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 32 bits elements. + * + * a = (rte_v128s32_t){ a0, a1, a2, a3 } + */ +typedef int32_t rte_v128s32_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 64 bits elements. + * + * a = (rte_v128s64_t){ a1, a2 } + */ +typedef int64_t rte_v128s64_t __attribute__((vector_size(16), aligned(16))); + +/** + * 256 bits vector size to use with 8 bits elements. + * + * a = (rte_v256s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15, + * a16, a17, a18, a19, a20, a21, a22, a23, + * a24, a25, a26, a27, a28, a29, a30, a31 } + */ +typedef int8_t rte_v256s8_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 16 bits elements. + * + * a = (rte_v256s16_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef int16_t rte_v256s16_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 32 bits elements. + * + * a = (rte_v256s32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int32_t rte_v256s32_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 64 bits elements. + * + * a = (rte_v256s64_t){ a0, a1, a2, a3 } + */ +typedef int64_t rte_v256s64_t __attribute__((vector_size(32), aligned(32))); + +#endif /* _RTE_VECT_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/rte_bus.h b/src/dpdk/lib/librte_eal/common/include/rte_bus.h new file mode 100644 index 00000000..7c369692 --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/rte_bus.h @@ -0,0 +1,158 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 NXP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BUS_H_ +#define _RTE_BUS_H_ + +/** + * @file + * + * DPDK device bus interface + * + * This file exposes API and interfaces for bus abstraction + * over the devices and drivers in EAL. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include +#include + +/** Double linked list of buses */ +TAILQ_HEAD(rte_bus_list, rte_bus); + +/** + * Bus specific scan for devices attached on the bus. + * For each bus object, the scan would be reponsible for finding devices and + * adding them to its private device list. + * + * A bus should mandatorily implement this method. + * + * @return + * 0 for successful scan + * <0 for unsuccessful scan with error value + */ +typedef int (*rte_bus_scan_t)(void); + +/** + * Implementation specific probe function which is responsible for linking + * devices on that bus with applicable drivers. + * + * This is called while iterating over each registered bus. + * + * @return + * 0 for successful probe + * !0 for any error while probing + */ +typedef int (*rte_bus_probe_t)(void); + +/** + * A structure describing a generic bus. + */ +struct rte_bus { + TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */ + const char *name; /**< Name of the bus */ + rte_bus_scan_t scan; /**< Scan for devices attached to bus */ + rte_bus_probe_t probe; /**< Probe devices on bus */ +}; + +/** + * Register a Bus handler. + * + * @param bus + * A pointer to a rte_bus structure describing the bus + * to be registered. + */ +void rte_bus_register(struct rte_bus *bus); + +/** + * Unregister a Bus handler. + * + * @param bus + * A pointer to a rte_bus structure describing the bus + * to be unregistered. + */ +void rte_bus_unregister(struct rte_bus *bus); + +/** + * Scan all the buses. + * + * @return + * 0 in case of success in scanning all buses + * !0 in case of failure to scan + */ +int rte_bus_scan(void); + +/** + * For each device on the buses, perform a driver 'match' and call the + * driver-specific probe for device initialization. + * + * @return + * 0 for successful match/probe + * !0 otherwise + */ +int rte_bus_probe(void); + +/** + * Dump information of all the buses registered with EAL. + * + * @param f + * A valid and open output stream handle + * + * @return + * 0 in case of success + * !0 in case there is error in opening the output stream + */ +void rte_bus_dump(FILE *f); + +/** + * Helper for Bus registration. + * The constructor has higher priority than PMD constructors. + */ +#define RTE_REGISTER_BUS(nm, bus) \ +static void __attribute__((constructor(101), used)) businitfn_ ##nm(void) \ +{\ + (bus).name = RTE_STR(nm);\ + rte_bus_register(&bus); \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BUS_H */ diff --git a/src/dpdk/lib/librte_eal/common/include/rte_common.h b/src/dpdk/lib/librte_eal/common/include/rte_common.h index 332f2a43..8dda3e29 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_common.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_common.h @@ -59,6 +59,13 @@ extern "C" { #define asm __asm__ #endif +/** C extension macro for environments lacking C11 features. */ +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L +#define RTE_STD_C11 __extension__ +#else +#define RTE_STD_C11 +#endif + #ifdef RTE_ARCH_STRICT_ALIGN typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1))); typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1))); @@ -268,7 +275,8 @@ rte_align64pow2(uint64_t v) /** * Macro to return the minimum of two numbers */ -#define RTE_MIN(a, b) ({ \ +#define RTE_MIN(a, b) \ + __extension__ ({ \ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a < _b ? _a : _b; \ @@ -277,7 +285,8 @@ rte_align64pow2(uint64_t v) /** * Macro to return the maximum of two numbers */ -#define RTE_MAX(a, b) ({ \ +#define RTE_MAX(a, b) \ + __extension__ ({ \ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a > _b ? _a : _b; \ @@ -322,10 +331,39 @@ rte_bsf32(uint32_t v) #define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) #endif +/** + * Return pointer to the wrapping struct instance. + * + * Example: + * + * struct wrapper { + * ... + * struct child c; + * ... + * }; + * + * struct child *x = obtain(...); + * struct wrapper *w = container_of(x, struct wrapper, c); + */ +#ifndef container_of +#define container_of(ptr, type, member) __extension__ ({ \ + typeof(((type *)0)->member) *_ptr = (ptr); \ + (type *)(((char *)_ptr) - offsetof(type, member)); }) +#endif + #define _RTE_STR(x) #x /** Take a macro value and get a string version of it */ #define RTE_STR(x) _RTE_STR(x) +/** + * ISO C helpers to modify format strings using variadic macros. + * This is a replacement for the ", ## __VA_ARGS__" GNU extension. + * An empty %s argument is appended to avoid a dangling comma. + */ +#define RTE_FMT(fmt, ...) fmt "%.0s", __VA_ARGS__ "" +#define RTE_FMT_HEAD(fmt, ...) fmt +#define RTE_FMT_TAIL(fmt, ...) __VA_ARGS__ + /** Mask value of type "tp" for the first "ln" bit set. */ #define RTE_LEN2MASK(ln, tp) \ ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln)))) diff --git a/src/dpdk/lib/librte_eal/common/include/rte_dev.h b/src/dpdk/lib/librte_eal/common/include/rte_dev.h index 95789f9d..b17791f5 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_dev.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_dev.h @@ -100,37 +100,56 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) } \ } while (0) +/** + * A generic memory resource representation. + */ +struct rte_mem_resource { + uint64_t phys_addr; /**< Physical address, 0 if not resource. */ + uint64_t len; /**< Length of the resource. */ + void *addr; /**< Virtual address, NULL when not mapped. */ +}; /** Double linked list of device drivers. */ TAILQ_HEAD(rte_driver_list, rte_driver); +/** Double linked list of devices. */ +TAILQ_HEAD(rte_device_list, rte_device); + +/* Forward declaration */ +struct rte_driver; /** - * Initialization function called for each device driver once. + * A structure describing a generic device. */ -typedef int (rte_dev_init_t)(const char *name, const char *args); +struct rte_device { + TAILQ_ENTRY(rte_device) next; /**< Next device */ + const struct rte_driver *driver;/**< Associated driver */ + int numa_node; /**< NUMA node connection */ + struct rte_devargs *devargs; /**< Device user arguments */ +}; /** - * Uninitilization function called for each device driver once. + * Insert a device detected by a bus scanning. + * + * @param dev + * A pointer to a rte_device structure describing the detected device. */ -typedef int (rte_dev_uninit_t)(const char *name); +void rte_eal_device_insert(struct rte_device *dev); /** - * Driver type enumeration + * Remove a device (e.g. when being unplugged). + * + * @param dev + * A pointer to a rte_device structure describing the device to be removed. */ -enum pmd_type { - PMD_VDEV = 0, - PMD_PDEV = 1, -}; +void rte_eal_device_remove(struct rte_device *dev); /** * A structure describing a device driver. */ struct rte_driver { TAILQ_ENTRY(rte_driver) next; /**< Next in list. */ - enum pmd_type type; /**< PMD Driver type */ const char *name; /**< Driver name. */ - rte_dev_init_t *init; /**< Device init. function. */ - rte_dev_uninit_t *uninit; /**< Device uninit. function. */ + const char *alias; /**< Driver alias. */ }; /** @@ -178,31 +197,73 @@ int rte_eal_vdev_init(const char *name, const char *args); */ int rte_eal_vdev_uninit(const char *name); -#define DRIVER_EXPORT_NAME_ARRAY(n, idx) n##idx[] +/** + * Attach a device to a registered driver. + * + * @param name + * The device name, that refers to a pci device (or some private + * way of designating a vdev device). Based on this device name, eal + * will identify a driver capable of handling it and pass it to the + * driver probing function. + * @param devargs + * Device arguments to be passed to the driver. + * @return + * 0 on success, negative on error. + */ +int rte_eal_dev_attach(const char *name, const char *devargs); -#define DRIVER_EXPORT_NAME(name, idx) \ -static const char DRIVER_EXPORT_NAME_ARRAY(this_pmd_name, idx) \ -__attribute__((used)) = RTE_STR(name) +/** + * Detach a device from its driver. + * + * @param name + * Same description as for rte_eal_dev_attach(). + * Here, eal will call the driver detaching function. + * @return + * 0 on success, negative on error. + */ +int rte_eal_dev_detach(const char *name); -#define PMD_REGISTER_DRIVER(drv, nm)\ -void devinitfn_ ##drv(void);\ -void __attribute__((constructor, used)) devinitfn_ ##drv(void)\ -{\ - (drv).name = RTE_STR(nm);\ - rte_eal_driver_register(&drv);\ -} \ -DRIVER_EXPORT_NAME(nm, __COUNTER__) +#define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[] + +#define RTE_PMD_EXPORT_NAME(name, idx) \ +static const char RTE_PMD_EXPORT_NAME_ARRAY(this_pmd_name, idx) \ +__attribute__((used)) = RTE_STR(name) #define DRV_EXP_TAG(name, tag) __##name##_##tag -#define DRIVER_REGISTER_PCI_TABLE(name, table) \ +#define RTE_PMD_REGISTER_PCI_TABLE(name, table) \ static const char DRV_EXP_TAG(name, pci_tbl_export)[] __attribute__((used)) = \ RTE_STR(table) -#define DRIVER_REGISTER_PARAM_STRING(name, str) \ +#define RTE_PMD_REGISTER_PARAM_STRING(name, str) \ static const char DRV_EXP_TAG(name, param_string_export)[] \ __attribute__((used)) = str +/** + * Advertise the list of kernel modules required to run this driver + * + * This string lists the kernel modules required for the devices + * associated to a PMD. The format of each line of the string is: + * " ". + * + * The possible formats for the device pattern are: + * "*" all devices supported by this driver + * "pci:*" all PCI devices supported by this driver + * "pci:v8086:d*:sv*:sd*" all PCI devices supported by this driver + * whose vendor id is 0x8086. + * + * The format of the kernel modules list is a parenthesed expression + * containing logical-and (&) and logical-or (|). + * + * The device pattern and the kmod expression are separated by a space. + * + * Example: + * - "* igb_uio | uio_pci_generic | vfio" + */ +#define RTE_PMD_REGISTER_KMOD_DEP(name, str) \ +static const char DRV_EXP_TAG(name, kmod_dep_export)[] \ +__attribute__((used)) = str + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_eal/common/include/rte_devargs.h b/src/dpdk/lib/librte_eal/common/include/rte_devargs.h index 53c59f56..88120a1c 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_devargs.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_devargs.h @@ -76,6 +76,7 @@ struct rte_devargs { TAILQ_ENTRY(rte_devargs) next; /** Type of device. */ enum rte_devtype type; + RTE_STD_C11 union { /** Used if type is RTE_DEVTYPE_*_PCI. */ struct { @@ -106,8 +107,8 @@ extern struct rte_devargs_list devargs_list; * "04:00.0,arg=val". * * For virtual devices, the format of arguments string is "DRIVER_NAME*" - * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", - * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring", + * "net_ring0", "net_pmdAnything,arg=0:arg2=1". * * The function parses the arguments string to get driver name and driver * arguments. @@ -134,8 +135,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str, * "04:00.0,arg=val". * * For virtual devices, the format of arguments string is "DRIVER_NAME*" - * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", - * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring", + * "net_ring0", "net_pmdAnything,arg=0:arg2=1". The validity of the * driver name is not checked by this function, it is done when probing * the drivers. * diff --git a/src/dpdk/lib/librte_eal/common/include/rte_eal.h b/src/dpdk/lib/librte_eal/common/include/rte_eal.h index a71d6f57..03fee500 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_eal.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_eal.h @@ -44,6 +44,7 @@ #include #include +#include #ifdef __cplusplus extern "C" { @@ -145,14 +146,19 @@ int rte_eal_iopl_init(void); * This behavior may change in the future. * * @param argc - * The argc argument that was given to the main() function. + * A non-negative value. If it is greater than 0, the array members + * for argv[0] through argv[argc] (non-inclusive) shall contain pointers + * to strings. * @param argv - * The argv argument that was given to the main() function. + * An array of strings. The contents of the array, as well as the strings + * which are pointed to by the array, may be modified by this function. * @return * - On success, the number of parsed arguments, which is greater or * equal to zero. After the call to rte_eal_init(), - * all arguments argv[x] with x < ret may be modified and should - * not be accessed by the application. + * all arguments argv[x] with x < ret may have been modified by this + * function call and should not be further interpreted by the + * application. The EAL does not take any ownership of the memory used + * for either the argv array, or its members. * - On failure, a negative error value. */ int rte_eal_init(int argc, char **argv); @@ -252,6 +258,9 @@ static inline int rte_gettid(void) return RTE_PER_LCORE(_thread_id); } +#define RTE_INIT(func) \ +static void __attribute__((constructor, used)) func(void) + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_eal/common/include/rte_interrupts.h b/src/dpdk/lib/librte_eal/common/include/rte_interrupts.h index ff11ef3a..6cade018 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_interrupts.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_interrupts.h @@ -34,6 +34,8 @@ #ifndef _RTE_INTERRUPTS_H_ #define _RTE_INTERRUPTS_H_ +#include + /** * @file * @@ -68,7 +70,7 @@ typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle, * - On success, zero. * - On failure, a negative value. */ -int rte_intr_callback_register(struct rte_intr_handle *intr_handle, +int rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg); /** @@ -86,7 +88,7 @@ int rte_intr_callback_register(struct rte_intr_handle *intr_handle, * - On success, return the number of callback entities removed. * - On failure, a negative value. */ -int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg); /** @@ -99,7 +101,7 @@ int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, * - On success, zero. * - On failure, a negative value. */ -int rte_intr_enable(struct rte_intr_handle *intr_handle); +int rte_intr_enable(const struct rte_intr_handle *intr_handle); /** * It disables the interrupt for the specified handle. @@ -111,7 +113,7 @@ int rte_intr_enable(struct rte_intr_handle *intr_handle); * - On success, zero. * - On failure, a negative value. */ -int rte_intr_disable(struct rte_intr_handle *intr_handle); +int rte_intr_disable(const struct rte_intr_handle *intr_handle); #ifdef __cplusplus } diff --git a/src/dpdk/lib/librte_eal/common/include/rte_log.h b/src/dpdk/lib/librte_eal/common/include/rte_log.h index b1add04c..954b96cf 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_log.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_log.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,8 +42,6 @@ * This file provides a log API to RTE applications. */ -#include "rte_common.h" /* for __rte_deprecated macro */ - #ifdef __cplusplus extern "C" { #endif @@ -56,7 +54,7 @@ extern "C" { struct rte_logs { uint32_t type; /**< Bitfield with enabled logs. */ uint32_t level; /**< Log level. */ - FILE *file; /**< Pointer to current FILE* for logs. */ + FILE *file; /**< Output file set by rte_openlog_stream, or NULL. */ }; /** Global log informations */ @@ -81,6 +79,7 @@ extern struct rte_logs rte_logs; #define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */ #define RTE_LOGTYPE_MBUF 0x00010000 /**< Log related to mbuf. */ #define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */ +#define RTE_LOGTYPE_EFD 0x00040000 /**< Log related to EFD. */ /* these log types can be used in an application */ #define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */ @@ -102,9 +101,6 @@ extern struct rte_logs rte_logs; #define RTE_LOG_INFO 7U /**< Informational. */ #define RTE_LOG_DEBUG 8U /**< Debug-level messages. */ -/** The default log stream. */ -extern FILE *eal_default_log_stream; - /** * Change the stream that will be used by the logging system. * @@ -123,9 +119,8 @@ int rte_openlog_stream(FILE *f); /** * Set the global log level. * - * After this call, all logs that are lower or equal than level and - * lower or equal than the RTE_LOG_LEVEL configuration option will be - * displayed. + * After this call, logs with a level lower or equal than the level + * passed as argument will be displayed. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -180,45 +175,6 @@ int rte_log_cur_msg_loglevel(void); */ int rte_log_cur_msg_logtype(void); -/** - * @deprecated - * Enable or disable the history (enabled by default) - * - * @param enable - * true to enable, or 0 to disable history. - */ -__rte_deprecated -void rte_log_set_history(int enable); - -/** - * @deprecated - * Dump the log history to a file - * - * @param f - * A pointer to a file for output - */ -__rte_deprecated -void rte_log_dump_history(FILE *f); - -/** - * @deprecated - * Add a log message to the history. - * - * This function can be called from a user-defined log stream. It adds - * the given message in the history that can be dumped using - * rte_log_dump_history(). - * - * @param buf - * A data buffer containing the message to be saved in the history. - * @param size - * The length of the data buffer. - * @return - * - 0: Success. - * - (-ENOBUFS) if there is no room to store the message. - */ -__rte_deprecated -int rte_log_add_in_history(const char *buf, size_t size); - /** * Generates a log message. * @@ -228,9 +184,8 @@ int rte_log_add_in_history(const char *buf, size_t size); * The level argument determines if the log should be displayed or * not, depending on the global rte_logs variable. * - * The preferred alternative is the RTE_LOG() function because debug logs may - * be removed at compilation time if optimization is enabled. Moreover, - * logs are automatically prefixed by type when using the macro. + * The preferred alternative is the RTE_LOG() because it adds the + * level and type in the logged string. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -261,8 +216,8 @@ int rte_log(uint32_t level, uint32_t logtype, const char *format, ...) * not, depending on the global rte_logs variable. A trailing * newline may be added if needed. * - * The preferred alternative is the RTE_LOG() because debug logs may be - * removed at compilation time. + * The preferred alternative is the RTE_LOG() because it adds the + * level and type in the logged string. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -283,15 +238,8 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) /** * Generates a log message. * - * The RTE_LOG() is equivalent to rte_log() with two differences: - - * - RTE_LOG() can be used to remove debug logs at compilation time, - * depending on RTE_LOG_LEVEL configuration option, and compilation - * optimization level. If optimization is enabled, the tests - * involving constants only are pre-computed. If compilation is done - * with -O0, these tests will be done at run time. - * - The log level and log type names are smaller, for example: - * RTE_LOG(INFO, EAL, "this is a %s", "log"); + * The RTE_LOG() is a helper that prefixes the string with the log level + * and type, and call rte_log(). * * @param l * Log level. A value between EMERG (1) and DEBUG (8). The short name is @@ -307,7 +255,31 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) * - Negative on error. */ #define RTE_LOG(l, t, ...) \ - (void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ? \ + rte_log(RTE_LOG_ ## l, \ + RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) + +/** + * Generates a log message for data path. + * + * Similar to RTE_LOG(), except that it is removed at compilation time + * if the RTE_LOG_DP_LEVEL configuration option is lower than the log + * level argument. + * + * @param l + * Log level. A value between EMERG (1) and DEBUG (8). The short name is + * expanded by the macro, so it cannot be an integer value. + * @param t + * The log type, for example, EAL. The short name is expanded by the + * macro, so it cannot be an integer value. + * @param ... + * The fmt string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +#define RTE_LOG_DP(l, t, ...) \ + (void)((RTE_LOG_ ## l <= RTE_LOG_DP_LEVEL) ? \ rte_log(RTE_LOG_ ## l, \ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \ 0) diff --git a/src/dpdk/lib/librte_eal/common/include/rte_malloc.h b/src/dpdk/lib/librte_eal/common/include/rte_malloc.h index 74bb78c7..008ce134 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_malloc.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_malloc.h @@ -294,7 +294,7 @@ rte_malloc_get_socket_stats(int socket, /** * Dump statistics. * - * Dump for the specified type to the console. If the type argument is + * Dump for the specified type to a file. If the type argument is * NULL, all memory types will be dumped. * * @param f diff --git a/src/dpdk/lib/librte_eal/common/include/rte_memory.h b/src/dpdk/lib/librte_eal/common/include/rte_memory.h index 06611093..4aa5d1f7 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_memory.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_memory.h @@ -44,6 +44,8 @@ #include #include +#include + #ifdef RTE_EXEC_ENV_LINUXAPP #include #endif @@ -54,6 +56,7 @@ extern "C" { #include +__extension__ enum rte_page_sizes { RTE_PGSIZE_4K = 1ULL << 12, RTE_PGSIZE_64K = 1ULL << 16, @@ -103,13 +106,11 @@ typedef uint64_t phys_addr_t; /**< Physical address definition. */ */ struct rte_memseg { phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 union { void *addr; /**< Start virtual address. */ uint64_t addr_64; /**< Makes sure addr is always 64 bits */ }; -#ifdef RTE_LIBRTE_IVSHMEM - phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ -#endif size_t len; /**< Length of the segment. */ uint64_t hugepage_sz; /**< The pagesize of underlying memory */ int32_t socket_id; /**< NUMA socket ID. */ @@ -161,7 +162,7 @@ phys_addr_t rte_mem_virt2phy(const void *virt); const struct rte_memseg *rte_eal_get_physmem_layout(void); /** - * Dump the physical memory layout to the console. + * Dump the physical memory layout to a file. * * @param f * A pointer to a file for output diff --git a/src/dpdk/lib/librte_eal/common/include/rte_memzone.h b/src/dpdk/lib/librte_eal/common/include/rte_memzone.h index f69b5a87..1d0827f4 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_memzone.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_memzone.h @@ -53,6 +53,7 @@ #include #include +#include #ifdef __cplusplus extern "C" { @@ -78,13 +79,11 @@ struct rte_memzone { char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 union { void *addr; /**< Start virtual address. */ uint64_t addr_64; /**< Makes sure addr is always 64-bits */ }; -#ifdef RTE_LIBRTE_IVSHMEM - phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ -#endif size_t len; /**< Length of the memzone. */ uint64_t hugepage_sz; /**< The page size of underlying memory */ @@ -256,12 +255,10 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name, /** * Free a memzone. * - * Note: an IVSHMEM zone cannot be freed. - * * @param mz * A pointer to the memzone * @return - * -EINVAL - invalid parameter, IVSHMEM memzone. + * -EINVAL - invalid parameter. * 0 - success */ int rte_memzone_free(const struct rte_memzone *mz); @@ -280,7 +277,7 @@ int rte_memzone_free(const struct rte_memzone *mz); const struct rte_memzone *rte_memzone_lookup(const char *name); /** - * Dump all reserved memzones to the console. + * Dump all reserved memzones to a file. * * @param f * A pointer to a file for output diff --git a/src/dpdk/lib/librte_eal/common/include/rte_pci.h b/src/dpdk/lib/librte_eal/common/include/rte_pci.h index fa749626..8557e477 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_pci.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_pci.h @@ -82,7 +82,9 @@ extern "C" { #include #include +#include #include +#include TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */ TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */ @@ -95,6 +97,7 @@ const char *pci_get_sysfs_path(void); /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 +#define PCI_PRI_STR_SIZE sizeof("XXXX:XX:XX.X") /** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ #define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 @@ -105,15 +108,6 @@ const char *pci_get_sysfs_path(void); /** Nb. of values in PCI resource format. */ #define PCI_RESOURCE_FMT_NVAL 3 -/** - * A structure describing a PCI resource. - */ -struct rte_pci_resource { - uint64_t phys_addr; /**< Physical address, 0 if no resource. */ - uint64_t len; /**< Length of the resource. */ - void *addr; /**< Virtual address, NULL when not mapped. */ -}; - /** Maximum number of PCI resources. */ #define PCI_MAX_RESOURCE 6 @@ -155,17 +149,23 @@ enum rte_kernel_driver { */ struct rte_pci_device { TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ + struct rte_device device; /**< Inherit core device */ struct rte_pci_addr addr; /**< PCI location. */ struct rte_pci_id id; /**< PCI ID. */ - struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */ + struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; + /**< PCI Memory Resource */ struct rte_intr_handle intr_handle; /**< Interrupt handle */ struct rte_pci_driver *driver; /**< Associated driver */ uint16_t max_vfs; /**< sriov enable if not zero */ - int numa_node; /**< NUMA node connection */ - struct rte_devargs *devargs; /**< Device user arguments */ enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ }; +/** + * @internal + * Helper macro for drivers that need to convert to struct rte_pci_device. + */ +#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device) + /** Any PCI device identifier (vendor, device, ...) */ #define PCI_ANY_ID (0xffff) #define RTE_CLASS_ANY_ID (0xffffff) @@ -193,33 +193,29 @@ struct rte_pci_driver; /** * Initialisation function for the driver called during PCI probing. */ -typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *); +typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *); /** * Uninitialisation function for the driver called during hotplugging. */ -typedef int (pci_devuninit_t)(struct rte_pci_device *); +typedef int (pci_remove_t)(struct rte_pci_device *); /** * A structure describing a PCI driver. */ struct rte_pci_driver { TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ - const char *name; /**< Driver name. */ - pci_devinit_t *devinit; /**< Device init. function. */ - pci_devuninit_t *devuninit; /**< Device uninit function. */ + struct rte_driver driver; /**< Inherit core driver. */ + pci_probe_t *probe; /**< Device Probe function. */ + pci_remove_t *remove; /**< Device Remove function. */ const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ uint32_t drv_flags; /**< Flags contolling handling of device. */ }; /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ #define RTE_PCI_DRV_NEED_MAPPING 0x0001 -/** Device needs to be unbound even if no module is provided */ -#define RTE_PCI_DRV_FORCE_UNBIND 0x0004 /** Device driver supports link state interrupt */ #define RTE_PCI_DRV_INTR_LSC 0x0008 -/** Device driver supports detaching capability */ -#define RTE_PCI_DRV_DETACHABLE 0x0010 /** * A structure describing a PCI mapping. @@ -308,6 +304,28 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) } #undef GET_PCIADDR_FIELD +/** + * Utility function to write a pci device name, this device name can later be + * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_* + * BDF helpers. + * + * @param addr + * The PCI Bus-Device-Function address + * @param output + * The output buffer string + * @param size + * The output buffer size + */ +static inline void +rte_eal_pci_device_name(const struct rte_pci_addr *addr, + char *output, size_t size) +{ + RTE_VERIFY(size >= PCI_PRI_STR_SIZE); + RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT, + addr->domain, addr->bus, + addr->devid, addr->function) >= 0); +} + /* Compare two PCI device addresses. */ /** * Utility function to compare two PCI device addresses. @@ -442,7 +460,7 @@ int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); * Close the single PCI device. * * Scan the content of the PCI bus, and find the pci device specified by pci - * address, then call the devuninit() function for registered driver that has a + * address, then call the remove() function for registered driver that has a * matching entry in its id_table for discovered device. * * @param addr @@ -470,6 +488,16 @@ void rte_eal_pci_dump(FILE *f); */ void rte_eal_pci_register(struct rte_pci_driver *driver); +/** Helper for PCI device registration from driver (eth, crypto) instance */ +#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ +RTE_INIT(pciinitfn_ ##nm); \ +static void pciinitfn_ ##nm(void) \ +{\ + (pci_drv).driver.name = RTE_STR(nm);\ + rte_eal_pci_register(&pci_drv); \ +} \ +RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + /** * Unregister a PCI driver. * diff --git a/src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h b/src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h deleted file mode 100644 index 6ec8ae8c..00000000 --- a/src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h +++ /dev/null @@ -1,326 +0,0 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef RTE_PCI_DEV_ID_DECL_IGB -#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) -#endif - -#ifndef RTE_PCI_DEV_ID_DECL_IGBVF -#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) -#endif - -#ifndef RTE_PCI_DEV_ID_DECL_IXGBE -#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) -#endif - -#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF -#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) -#endif - -#ifndef PCI_VENDOR_ID_INTEL -/** Vendor ID used by Intel devices */ -#define PCI_VENDOR_ID_INTEL 0x8086 -#endif - -/******************** Physical IGB devices from e1000_hw.h ********************/ - -#define E1000_DEV_ID_82576 0x10C9 -#define E1000_DEV_ID_82576_FIBER 0x10E6 -#define E1000_DEV_ID_82576_SERDES 0x10E7 -#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 -#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 -#define E1000_DEV_ID_82576_NS 0x150A -#define E1000_DEV_ID_82576_NS_SERDES 0x1518 -#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D -#define E1000_DEV_ID_82575EB_COPPER 0x10A7 -#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 -#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 -#define E1000_DEV_ID_82580_COPPER 0x150E -#define E1000_DEV_ID_82580_FIBER 0x150F -#define E1000_DEV_ID_82580_SERDES 0x1510 -#define E1000_DEV_ID_82580_SGMII 0x1511 -#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 -#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 -#define E1000_DEV_ID_I350_COPPER 0x1521 -#define E1000_DEV_ID_I350_FIBER 0x1522 -#define E1000_DEV_ID_I350_SERDES 0x1523 -#define E1000_DEV_ID_I350_SGMII 0x1524 -#define E1000_DEV_ID_I350_DA4 0x1546 -#define E1000_DEV_ID_I210_COPPER 0x1533 -#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 -#define E1000_DEV_ID_I210_COPPER_IT 0x1535 -#define E1000_DEV_ID_I210_FIBER 0x1536 -#define E1000_DEV_ID_I210_SERDES 0x1537 -#define E1000_DEV_ID_I210_SGMII 0x1538 -#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B -#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C -#define E1000_DEV_ID_I211_COPPER 0x1539 -#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 -#define E1000_DEV_ID_I354_SGMII 0x1F41 -#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 -#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 -#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A -#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C -#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD) - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER) - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP) - -/****************** Physical IXGBE devices from ixgbe_type.h ******************/ - -#define IXGBE_DEV_ID_82598 0x10B6 -#define IXGBE_DEV_ID_82598_BX 0x1508 -#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6 -#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7 -#define IXGBE_DEV_ID_82598AT 0x10C8 -#define IXGBE_DEV_ID_82598AT2 0x150B -#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB -#define IXGBE_DEV_ID_82598EB_CX4 0x10DD -#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC -#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1 -#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1 -#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4 -#define IXGBE_DEV_ID_82599_KX4 0x10F7 -#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 -#define IXGBE_DEV_ID_82599_KR 0x1517 -#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 -#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C -#define IXGBE_DEV_ID_82599_CX4 0x10F9 -#define IXGBE_DEV_ID_82599_SFP 0x10FB -#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 -#define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 -#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 -#define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 -#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A -#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 -#define IXGBE_DEV_ID_82599_SFP_EM 0x1507 -#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D -#define IXGBE_DEV_ID_82599_SFP_SF_QP 0x154A -#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558 -#define IXGBE_DEV_ID_82599EN_SFP 0x1557 -#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC -#define IXGBE_DEV_ID_82599_T3_LOM 0x151C -#define IXGBE_DEV_ID_82599_LS 0x154F -#define IXGBE_DEV_ID_X540T 0x1528 -#define IXGBE_DEV_ID_X540T1 0x1560 -#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC -#define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD -#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE -#define IXGBE_DEV_ID_X550T 0x1563 -#define IXGBE_DEV_ID_X550T1 0x15D1 -#define IXGBE_DEV_ID_X550EM_A_KR 0x15C2 -#define IXGBE_DEV_ID_X550EM_A_KR_L 0x15C3 -#define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4 -#define IXGBE_DEV_ID_X550EM_A_SGMII 0x15C6 -#define IXGBE_DEV_ID_X550EM_A_SGMII_L 0x15C7 -#define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8 -#define IXGBE_DEV_ID_X550EM_A_QSFP 0x15CA -#define IXGBE_DEV_ID_X550EM_A_QSFP_N 0x15CC -#define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE -#define IXGBE_DEV_ID_X550EM_A_1G_T 0x15E4 -#define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15E5 -#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA -#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB - -#ifdef RTE_NIC_BYPASS -#define IXGBE_DEV_ID_82599_BYPASS 0x155D -#endif - -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_DEV_ID_82598AF_SINGLE_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_DEV_ID_82599_COMBO_BACKPLANE) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR) - -#ifdef RTE_NIC_BYPASS -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS) -#endif - -/****************** Virtual IGB devices from e1000_hw.h ******************/ - -#define E1000_DEV_ID_82576_VF 0x10CA -#define E1000_DEV_ID_82576_VF_HV 0x152D -#define E1000_DEV_ID_I350_VF 0x1520 -#define E1000_DEV_ID_I350_VF_HV 0x152F - -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF) -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV) -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF) -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV) - -/****************** Virtual IXGBE devices from ixgbe_type.h ******************/ - -#define IXGBE_DEV_ID_82599_VF 0x10ED -#define IXGBE_DEV_ID_82599_VF_HV 0x152E -#define IXGBE_DEV_ID_X540_VF 0x1515 -#define IXGBE_DEV_ID_X540_VF_HV 0x1530 -#define IXGBE_DEV_ID_X550_VF_HV 0x1564 -#define IXGBE_DEV_ID_X550_VF 0x1565 -#define IXGBE_DEV_ID_X550EM_A_VF 0x15C5 -#define IXGBE_DEV_ID_X550EM_A_VF_HV 0x15B4 -#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8 -#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 - -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV) - -/* - * Undef all RTE_PCI_DEV_ID_DECL_* here. - */ -#undef RTE_PCI_DEV_ID_DECL_IGB -#undef RTE_PCI_DEV_ID_DECL_IGBVF -#undef RTE_PCI_DEV_ID_DECL_IXGBE -#undef RTE_PCI_DEV_ID_DECL_IXGBEVF diff --git a/src/dpdk/lib/librte_eal/common/include/rte_tailq.h b/src/dpdk/lib/librte_eal/common/include/rte_tailq.h index cc3c0f1d..3aae098a 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_tailq.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_tailq.h @@ -107,7 +107,7 @@ struct rte_tailq_elem { RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name) /** - * Dump tail queues to the console. + * Dump tail queues to a file. * * @param f * A pointer to a file for output @@ -148,8 +148,8 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name); int rte_eal_tailq_register(struct rte_tailq_elem *t); #define EAL_REGISTER_TAILQ(t) \ -void tailqinitfn_ ##t(void); \ -void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \ +RTE_INIT(tailqinitfn_ ##t); \ +static void tailqinitfn_ ##t(void) \ { \ if (rte_eal_tailq_register(&t) < 0) \ rte_panic("Cannot initialize tailq: %s\n", t.name); \ diff --git a/src/dpdk/lib/librte_eal/common/include/rte_time.h b/src/dpdk/lib/librte_eal/common/include/rte_time.h index 4b13b9c1..28c6274c 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_time.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_time.h @@ -31,6 +31,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _RTE_TIME_H_ +#define _RTE_TIME_H_ + +#include +#include + #define NSEC_PER_SEC 1000000000L /** @@ -120,3 +126,5 @@ rte_ns_to_timespec(uint64_t nsec) return ts; } + +#endif /* _RTE_TIME_H_ */ diff --git a/src/dpdk/lib/librte_eal/common/include/rte_vdev.h b/src/dpdk/lib/librte_eal/common/include/rte_vdev.h new file mode 100644 index 00000000..784e837d --- /dev/null +++ b/src/dpdk/lib/librte_eal/common/include/rte_vdev.h @@ -0,0 +1,102 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_VDEV_H +#define RTE_VDEV_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** Double linked list of virtual device drivers. */ +TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); + +/** + * Probe function called for each virtual device driver once. + */ +typedef int (rte_vdev_probe_t)(const char *name, const char *args); + +/** + * Remove function called for each virtual device driver once. + */ +typedef int (rte_vdev_remove_t)(const char *name); + +/** + * A virtual device driver abstraction. + */ +struct rte_vdev_driver { + TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */ + struct rte_driver driver; /**< Inherited general driver. */ + rte_vdev_probe_t *probe; /**< Virtual device probe function. */ + rte_vdev_remove_t *remove; /**< Virtual device remove function. */ +}; + +/** + * Register a virtual device driver. + * + * @param driver + * A pointer to a rte_vdev_driver structure describing the driver + * to be registered. + */ +void rte_eal_vdrv_register(struct rte_vdev_driver *driver); + +/** + * Unregister a virtual device driver. + * + * @param driver + * A pointer to a rte_vdev_driver structure describing the driver + * to be unregistered. + */ +void rte_eal_vdrv_unregister(struct rte_vdev_driver *driver); + +#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\ +RTE_INIT(vdrvinitfn_ ##vdrv);\ +static const char *vdrvinit_ ## nm ## _alias;\ +static void vdrvinitfn_ ##vdrv(void)\ +{\ + (vdrv).driver.name = RTE_STR(nm);\ + (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\ + rte_eal_vdrv_register(&vdrv);\ +} \ +RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + +#define RTE_PMD_REGISTER_ALIAS(nm, alias)\ +static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk/lib/librte_eal/common/include/rte_version.h b/src/dpdk/lib/librte_eal/common/include/rte_version.h index 615deb7f..76bfe601 100644 --- a/src/dpdk/lib/librte_eal/common/include/rte_version.h +++ b/src/dpdk/lib/librte_eal/common/include/rte_version.h @@ -45,6 +45,7 @@ extern "C" { #include #include +#include #include /** @@ -55,12 +56,12 @@ extern "C" { /** * Major version/year number i.e. the yy in yy.mm.z */ -#define RTE_VER_YEAR 16 +#define RTE_VER_YEAR 17 /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 7 +#define RTE_VER_MONTH 2 /** * Patch level number i.e. the z in yy.mm.z @@ -70,14 +71,14 @@ extern "C" { /** * Extra string to be appended to version number */ -#define RTE_VER_SUFFIX "" +#define RTE_VER_SUFFIX "-rc" /** * Patch release number * 0-15 = release candidates * 16 = release */ -#define RTE_VER_RELEASE 16 +#define RTE_VER_RELEASE 2 /** * Macro to compute a version number usable for comparisons diff --git a/src/dpdk/lib/librte_eal/common/include/rte_warnings.h b/src/dpdk/lib/librte_eal/common/include/rte_warnings.h deleted file mode 100644 index 54b545c9..00000000 --- a/src/dpdk/lib/librte_eal/common/include/rte_warnings.h +++ /dev/null @@ -1,84 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * @file - * Definitions of warnings for use of various insecure functions - */ - -#ifndef _RTE_WARNINGS_H_ -#define _RTE_WARNINGS_H_ - -#ifdef RTE_INSECURE_FUNCTION_WARNING - -/* we need to include all used standard header files so that they appear - * _before_ we poison the function names. - */ - -#include -#include -#include -#include -#include -#ifdef RTE_EXEC_ENV_LINUXAPP -#include -#endif - -/* the following function are deemed not fully secure for use e.g. they - * do not always null-terminate arguments */ -#pragma GCC poison sprintf strtok snprintf vsnprintf -#pragma GCC poison strlen strcpy strcat -#pragma GCC poison sscanf - -/* other unsafe functions may be implemented as macros so just undef them */ -#ifdef strsep -#undef strsep -#else -#pragma GCC poison strsep -#endif - -#ifdef strncpy -#undef strncpy -#else -#pragma GCC poison strncpy -#endif - -#ifdef strncat -#undef strncat -#else -#pragma GCC poison strncat -#endif - -#endif - -#endif /* RTE_WARNINGS_H */ diff --git a/src/dpdk/lib/librte_eal/common/malloc_heap.c b/src/dpdk/lib/librte_eal/common/malloc_heap.c index 763fa324..267a4c6c 100644 --- a/src/dpdk/lib/librte_eal/common/malloc_heap.c +++ b/src/dpdk/lib/librte_eal/common/malloc_heap.c @@ -221,14 +221,6 @@ rte_eal_malloc_heap_init(void) for (ms = &mcfg->memseg[0], ms_cnt = 0; (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0); ms_cnt++, ms++) { -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * it is not memory to allocate from. - */ - if (ms->ioremap_addr != 0) - continue; -#endif malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms); } diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal.c index 3fb2188f..bf6b818c 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal.c @@ -69,7 +69,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -238,7 +240,8 @@ rte_eal_config_attach(void) mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), PROT_READ, MAP_SHARED, mem_cfg_fd, 0); if (mem_config == MAP_FAILED) - rte_panic("Cannot mmap memory for rte_config\n"); + rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n", + errno, strerror(errno)); rte_config.mem_config = mem_config; } @@ -263,9 +266,17 @@ rte_eal_config_reattach(void) mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) { + if (mem_config != MAP_FAILED) + /* errno is stale, don't use */ + rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]" + " - please use '--base-virtaddr' option\n", + rte_mem_cfg_addr, mem_config); + else + rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n", + errno, strerror(errno)); + } close(mem_cfg_fd); - if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) - rte_panic("Cannot mmap memory for rte_config\n"); rte_config.mem_config = mem_config; } @@ -740,6 +751,9 @@ rte_eal_init(int argc, char **argv) char cpuset[RTE_CPU_AFFINITY_STR_LEN]; char thread_name[RTE_MAX_THREAD_NAME_LEN]; + /* checks if the machine is adequate */ + rte_cpu_check_supported(); + if (!rte_atomic32_test_and_set(&run_once)) return -1; @@ -748,9 +762,6 @@ rte_eal_init(int argc, char **argv) thread_id = pthread_self(); - if (rte_eal_log_early_init() < 0) - rte_panic("Cannot init early logs\n"); - eal_log_level_parse(argc, argv); /* set log level as early as possible */ @@ -789,6 +800,9 @@ rte_eal_init(int argc, char **argv) rte_config_init(); + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) + rte_panic("Cannot init logs\n"); + if (rte_eal_pci_init() < 0) rte_panic("Cannot init PCI\n"); @@ -797,11 +811,6 @@ rte_eal_init(int argc, char **argv) rte_panic("Cannot init VFIO\n"); #endif -#ifdef RTE_LIBRTE_IVSHMEM - if (rte_eal_ivshmem_init() < 0) - rte_panic("Cannot init IVSHMEM\n"); -#endif - if (rte_eal_memory_init() < 0) rte_panic("Cannot init memory\n"); @@ -814,14 +823,6 @@ rte_eal_init(int argc, char **argv) if (rte_eal_tailqs_init() < 0) rte_panic("Cannot init tail queues for objects\n"); -#ifdef RTE_LIBRTE_IVSHMEM - if (rte_eal_ivshmem_obj_init() < 0) - rte_panic("Cannot init IVSHMEM objects\n"); -#endif - - if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n"); - if (rte_eal_alarm_init() < 0) rte_panic("Cannot init interrupt-handling thread\n"); @@ -841,12 +842,12 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, (int)thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); - if (rte_eal_intr_init() < 0) rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_bus_scan()) + rte_panic("Cannot scan the buses for devices\n"); + RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -883,10 +884,17 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) + rte_panic("Cannot probe devices\n"); + /* Probe & Initialize PCI devices */ if (rte_eal_pci_probe()) rte_panic("Cannot probe PCI\n"); + if (rte_eal_dev_init() < 0) + rte_panic("Cannot init pmd devices\n"); + rte_eal_mcfg_complete(); return fctret; diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 54ab6253..b5b3f2bd 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -73,9 +73,6 @@ static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ -// TREX_PATCH -int eal_err_read_from_file_is_error = 1; - /** * union for pipe fds. */ @@ -139,7 +136,7 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(struct rte_intr_handle *intr_handle) { +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -186,7 +183,7 @@ vfio_enable_intx(struct rte_intr_handle *intr_handle) { /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(struct rte_intr_handle *intr_handle) { +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -229,7 +226,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) { /* enable MSI interrupts */ static int -vfio_enable_msi(struct rte_intr_handle *intr_handle) { +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -258,7 +255,7 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) { /* disable MSI interrupts */ static int -vfio_disable_msi(struct rte_intr_handle *intr_handle) { +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -281,9 +278,30 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) { return ret; } +static int +get_max_intr(const struct rte_intr_handle *intr_handle) +{ + struct rte_intr_source *src; + + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->intr_handle.fd != intr_handle->fd) + continue; + + if (!src->intr_handle.max_intr) + src->intr_handle.max_intr = 1; + else if (src->intr_handle.max_intr > RTE_MAX_RXTX_INTR_VEC_ID) + src->intr_handle.max_intr + = RTE_MAX_RXTX_INTR_VEC_ID + 1; + + return src->intr_handle.max_intr; + } + + return -1; +} + /* enable MSI-X interrupts */ static int -vfio_enable_msix(struct rte_intr_handle *intr_handle) { +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -293,12 +311,15 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; - if (!intr_handle->max_intr) - intr_handle->max_intr = 1; - else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) - intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; - irq_set->count = intr_handle->max_intr; + ret = get_max_intr(intr_handle); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Invalid number of MSI-X irqs for fd %d\n", + intr_handle->fd); + return -1; + } + + irq_set->count = ret; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; @@ -321,7 +342,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { /* disable MSI-X interrupts */ static int -vfio_disable_msix(struct rte_intr_handle *intr_handle) { +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; int len, ret; @@ -346,7 +367,7 @@ vfio_disable_msix(struct rte_intr_handle *intr_handle) { #endif static int -uio_intx_intr_disable(struct rte_intr_handle *intr_handle) +uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -370,7 +391,7 @@ uio_intx_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intx_intr_enable(struct rte_intr_handle *intr_handle) +uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -394,7 +415,7 @@ uio_intx_intr_enable(struct rte_intr_handle *intr_handle) } static int -uio_intr_disable(struct rte_intr_handle *intr_handle) +uio_intr_disable(const struct rte_intr_handle *intr_handle) { const int value = 0; @@ -408,7 +429,7 @@ uio_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intr_enable(struct rte_intr_handle *intr_handle) +uio_intr_enable(const struct rte_intr_handle *intr_handle) { const int value = 1; @@ -422,7 +443,7 @@ uio_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_callback_register(struct rte_intr_handle *intr_handle, +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg) { int ret, wake_thread; @@ -494,7 +515,7 @@ rte_intr_callback_register(struct rte_intr_handle *intr_handle, } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb_fn, void *cb_arg) { int ret; @@ -558,7 +579,7 @@ rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, } int -rte_intr_enable(struct rte_intr_handle *intr_handle) +rte_intr_enable(const struct rte_intr_handle *intr_handle) { if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -602,7 +623,7 @@ rte_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_disable(struct rte_intr_handle *intr_handle) +rte_intr_disable(const struct rte_intr_handle *intr_handle) { if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -712,19 +733,10 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) if (errno == EINTR || errno == EWOULDBLOCK) continue; - // TREX_PATCH. Because of issues with e1000, we want this message to - // have lower priority only if running on e1000 card - if (eal_err_read_from_file_is_error) { - RTE_LOG(ERR, EAL, "Error reading from file " - "descriptor %d: %s\n", - events[n].data.fd, - strerror(errno)); - } else { - RTE_LOG(INFO, EAL, "Error reading from file " - "descriptor %d: %s\n", - events[n].data.fd, - strerror(errno)); - } + RTE_LOG(ERR, EAL, "Error reading from file " + "descriptor %d: %s\n", + events[n].data.fd, + strerror(errno)); } else if (bytes_read == 0) RTE_LOG(ERR, EAL, "Read nothing from file " "descriptor %d\n", events[n].data.fd); @@ -1169,7 +1181,7 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) RTE_LOG(ERR, EAL, "can't setup eventfd, error %i (%s)\n", errno, strerror(errno)); - return -1; + return -errno; } intr_handle->efds[i] = fd; } diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c deleted file mode 100644 index 67b3caf2..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c +++ /dev/null @@ -1,954 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "eal_internal_cfg.h" -#include "eal_private.h" - -#define PCI_VENDOR_ID_IVSHMEM 0x1Af4 -#define PCI_DEVICE_ID_IVSHMEM 0x1110 - -#define IVSHMEM_MAGIC 0x0BADC0DE - -#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2" -#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config" - -#define PHYS 0x1 -#define VIRT 0x2 -#define IOREMAP 0x4 -#define FULL (PHYS|VIRT|IOREMAP) - -#define METADATA_SIZE_ALIGNED \ - (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) - -#define CONTAINS(x,y)\ - (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len)) - -#define DIM(x) (sizeof(x)/sizeof(x[0])) - -struct ivshmem_pci_device { - char path[PATH_MAX]; - phys_addr_t ioremap_addr; -}; - -/* data type to store in config */ -struct ivshmem_segment { - struct rte_ivshmem_metadata_entry entry; - uint64_t align; - char path[PATH_MAX]; -}; -struct ivshmem_shared_config { - struct ivshmem_segment segment[RTE_MAX_MEMSEG]; - uint32_t segment_idx; - struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS]; - uint32_t pci_devs_idx; -}; -static struct ivshmem_shared_config * ivshmem_config; -static int memseg_idx; -static int pagesz; - -/* Tailq heads to add rings to */ -TAILQ_HEAD(rte_ring_list, rte_tailq_entry); - -/* - * Utility functions - */ - -static int -is_ivshmem_device(struct rte_pci_device * dev) -{ - return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM - && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM; -} - -static void * -map_metadata(int fd, uint64_t len) -{ - size_t metadata_len = sizeof(struct rte_ivshmem_metadata); - size_t aligned_len = METADATA_SIZE_ALIGNED; - - return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, len - aligned_len); -} - -static void -unmap_metadata(void * ptr) -{ - munmap(ptr, sizeof(struct rte_ivshmem_metadata)); -} - -static int -has_ivshmem_metadata(int fd, uint64_t len) -{ - struct rte_ivshmem_metadata metadata; - void * ptr; - - ptr = map_metadata(fd, len); - - if (ptr == MAP_FAILED) - return -1; - - metadata = *(struct rte_ivshmem_metadata*) (ptr); - - unmap_metadata(ptr); - - return metadata.magic_number == IVSHMEM_MAGIC; -} - -static void -remove_segment(struct ivshmem_segment * ms, int len, int idx) -{ - int i; - - for (i = idx; i < len - 1; i++) - memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment)); - memset(&ms[len-1], 0, sizeof(struct ivshmem_segment)); -} - -static int -overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) -{ - uint64_t start1, end1, start2, end2; - uint64_t p_start1, p_end1, p_start2, p_end2; - uint64_t i_start1, i_end1, i_start2, i_end2; - int result = 0; - - /* gather virtual addresses */ - start1 = mz1->addr_64; - end1 = mz1->addr_64 + mz1->len; - start2 = mz2->addr_64; - end2 = mz2->addr_64 + mz2->len; - - /* gather physical addresses */ - p_start1 = mz1->phys_addr; - p_end1 = mz1->phys_addr + mz1->len; - p_start2 = mz2->phys_addr; - p_end2 = mz2->phys_addr + mz2->len; - - /* gather ioremap addresses */ - i_start1 = mz1->ioremap_addr; - i_end1 = mz1->ioremap_addr + mz1->len; - i_start2 = mz2->ioremap_addr; - i_end2 = mz2->ioremap_addr + mz2->len; - - /* check for overlap in virtual addresses */ - if (start1 >= start2 && start1 < end2) - result |= VIRT; - if (start2 >= start1 && start2 < end1) - result |= VIRT; - - /* check for overlap in physical addresses */ - if (p_start1 >= p_start2 && p_start1 < p_end2) - result |= PHYS; - if (p_start2 >= p_start1 && p_start2 < p_end1) - result |= PHYS; - - /* check for overlap in ioremap addresses */ - if (i_start1 >= i_start2 && i_start1 < i_end2) - result |= IOREMAP; - if (i_start2 >= i_start1 && i_start2 < i_end1) - result |= IOREMAP; - - return result; -} - -static int -adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2) -{ - uint64_t start1, end1, start2, end2; - uint64_t p_start1, p_end1, p_start2, p_end2; - uint64_t i_start1, i_end1, i_start2, i_end2; - int result = 0; - - /* gather virtual addresses */ - start1 = mz1->addr_64; - end1 = mz1->addr_64 + mz1->len; - start2 = mz2->addr_64; - end2 = mz2->addr_64 + mz2->len; - - /* gather physical addresses */ - p_start1 = mz1->phys_addr; - p_end1 = mz1->phys_addr + mz1->len; - p_start2 = mz2->phys_addr; - p_end2 = mz2->phys_addr + mz2->len; - - /* gather ioremap addresses */ - i_start1 = mz1->ioremap_addr; - i_end1 = mz1->ioremap_addr + mz1->len; - i_start2 = mz2->ioremap_addr; - i_end2 = mz2->ioremap_addr + mz2->len; - - /* check if segments are virtually adjacent */ - if (start1 == end2) - result |= VIRT; - if (start2 == end1) - result |= VIRT; - - /* check if segments are physically adjacent */ - if (p_start1 == p_end2) - result |= PHYS; - if (p_start2 == p_end1) - result |= PHYS; - - /* check if segments are ioremap-adjacent */ - if (i_start1 == i_end2) - result |= IOREMAP; - if (i_start2 == i_end1) - result |= IOREMAP; - - return result; -} - -static int -has_adjacent_segments(struct ivshmem_segment * ms, int len) -{ - int i, j; - - for (i = 0; i < len; i++) - for (j = i + 1; j < len; j++) { - /* we're only interested in fully adjacent segments; partially - * adjacent segments can coexist. - */ - if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL) - return 1; - } - return 0; -} - -static int -has_overlapping_segments(struct ivshmem_segment * ms, int len) -{ - int i, j; - - for (i = 0; i < len; i++) - for (j = i + 1; j < len; j++) - if (overlap(&ms[i].entry.mz, &ms[j].entry.mz)) - return 1; - return 0; -} - -static int -seg_compare(const void * a, const void * b) -{ - const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a; - const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b; - - /* move unallocated zones to the end */ - if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL) - return 0; - if (s1->entry.mz.addr == 0) - return 1; - if (s2->entry.mz.addr == 0) - return -1; - - return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr; -} - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG -static void -entry_dump(struct rte_ivshmem_metadata_entry *e) -{ - RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr, - RTE_PTR_ADD(e->mz.addr, e->mz.len)); - RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n", - e->mz.phys_addr, - e->mz.phys_addr + e->mz.len); - RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n", - e->mz.ioremap_addr, - e->mz.ioremap_addr + e->mz.len); - RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len); - RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset); -} -#endif - - - -/* - * Actual useful code - */ - -/* read through metadata mapped from the IVSHMEM device */ -static int -read_metadata(char * path, int path_len, int fd, uint64_t flen) -{ - struct rte_ivshmem_metadata metadata; - struct rte_ivshmem_metadata_entry * entry; - int idx, i; - void * ptr; - - ptr = map_metadata(fd, flen); - - if (ptr == MAP_FAILED) - return -1; - - metadata = *(struct rte_ivshmem_metadata*) (ptr); - - unmap_metadata(ptr); - - RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name); - - idx = ivshmem_config->segment_idx; - - for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES && - idx <= RTE_MAX_MEMSEG; i++) { - - if (idx == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Not enough memory segments!\n"); - return -1; - } - - entry = &metadata.entry[i]; - - /* stop on uninitialized memzone */ - if (entry->mz.len == 0) - break; - - /* copy metadata entry */ - memcpy(&ivshmem_config->segment[idx].entry, entry, - sizeof(struct rte_ivshmem_metadata_entry)); - - /* copy path */ - snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path); - - idx++; - } - ivshmem_config->segment_idx = idx; - - return 0; -} - -/* check through each segment and look for adjacent or overlapping ones. */ -static int -cleanup_segments(struct ivshmem_segment * ms, int tbl_len) -{ - struct ivshmem_segment * s, * tmp; - int i, j, concat, seg_adjacent, seg_overlapping; - uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2; - - qsort(ms, tbl_len, sizeof(struct ivshmem_segment), - seg_compare); - - while (has_overlapping_segments(ms, tbl_len) || - has_adjacent_segments(ms, tbl_len)) { - - for (i = 0; i < tbl_len; i++) { - s = &ms[i]; - - concat = 0; - - for (j = i + 1; j < tbl_len; j++) { - tmp = &ms[j]; - - /* check if this segment is overlapping with existing segment, - * or is adjacent to existing segment */ - seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz); - seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz); - - /* check if segments fully overlap or are fully adjacent */ - if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) { - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Concatenating segments\n"); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); - entry_dump(&s->entry); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); - entry_dump(&tmp->entry); -#endif - - start1 = s->entry.mz.addr_64; - start2 = tmp->entry.mz.addr_64; - p_start1 = s->entry.mz.phys_addr; - p_start2 = tmp->entry.mz.phys_addr; - i_start1 = s->entry.mz.ioremap_addr; - i_start2 = tmp->entry.mz.ioremap_addr; - end1 = s->entry.mz.addr_64 + s->entry.mz.len; - end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len; - - /* settle for minimum start address and maximum length */ - s->entry.mz.addr_64 = RTE_MIN(start1, start2); - s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2); - s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2); - s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset); - s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64; - concat = 1; - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Resulting segment:\n"); - entry_dump(&s->entry); - -#endif - } - /* if segments not fully overlap, we have an error condition. - * adjacent segments can coexist. - */ - else if (seg_overlapping > 0) { - RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j); -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); - entry_dump(&s->entry); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); - entry_dump(&tmp->entry); -#endif - return -1; - } - if (concat) - break; - } - /* if we concatenated, remove segment at j */ - if (concat) { - remove_segment(ms, tbl_len, j); - tbl_len--; - break; - } - } - } - - return tbl_len; -} - -static int -create_shared_config(void) -{ - char path[PATH_MAX]; - int fd; - - /* build ivshmem config file path */ - snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, - internal_config.hugefile_prefix); - - fd = open(path, O_CREAT | O_RDWR, 0600); - - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno)); - return -1; - } - - /* try ex-locking first - if the file is locked, we have a problem */ - if (flock(fd, LOCK_EX | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno)); - close(fd); - return -1; - } - - if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) { - RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno)); - return -1; - } - - ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - if (ivshmem_config == MAP_FAILED) - return -1; - - memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config)); - - /* change the exclusive lock we got earlier to a shared lock */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); - return -1; - } - - close(fd); - - return 0; -} - -/* open shared config file and, if present, map the config. - * having no config file is not an error condition, as we later check if - * ivshmem_config is NULL (if it is, that means nothing was mapped). */ -static int -open_shared_config(void) -{ - char path[PATH_MAX]; - int fd; - - /* build ivshmem config file path */ - snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, - internal_config.hugefile_prefix); - - fd = open(path, O_RDONLY); - - /* if the file doesn't exist, just return success */ - if (fd < 0 && errno == ENOENT) - return 0; - /* else we have an error condition */ - else if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s: %s\n", - path, strerror(errno)); - return -1; - } - - /* try ex-locking first - if the lock *does* succeed, this means it's a - * stray config file, so it should be deleted. - */ - if (flock(fd, LOCK_EX | LOCK_NB) != -1) { - - /* if we can't remove the file, something is wrong */ - if (unlink(path) < 0) { - RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path, - strerror(errno)); - return -1; - } - - /* release the lock */ - flock(fd, LOCK_UN); - close(fd); - - /* return success as having a stray config file is equivalent to not - * having config file at all. - */ - return 0; - } - - ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), - PROT_READ, MAP_SHARED, fd, 0); - - if (ivshmem_config == MAP_FAILED) - return -1; - - /* place a shared lock on config file */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); - return -1; - } - - close(fd); - - return 0; -} - -/* - * This function does the following: - * - * 1) Builds a table of ivshmem_segments with proper offset alignment - * 2) Cleans up that table so that we don't have any overlapping or adjacent - * memory segments - * 3) Creates memsegs from this table and maps them into memory. - */ -static inline int -map_all_segments(void) -{ - struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG]; - struct ivshmem_pci_device * pci_dev; - struct rte_mem_config * mcfg; - struct ivshmem_segment * seg; - int fd, fd_zero; - unsigned i, j; - struct rte_memzone mz; - struct rte_memseg ms; - void * base_addr; - uint64_t align, len; - phys_addr_t ioremap_addr; - - ioremap_addr = 0; - - memset(ms_tbl, 0, sizeof(ms_tbl)); - memset(&mz, 0, sizeof(struct rte_memzone)); - memset(&ms, 0, sizeof(struct rte_memseg)); - - /* first, build a table of memsegs to map, to avoid failed mmaps due to - * overlaps - */ - for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) { - if (i == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Too many segments requested!\n"); - return -1; - } - - seg = &ivshmem_config->segment[i]; - - /* copy segment to table */ - memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment)); - - /* find ioremap addr */ - for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) { - pci_dev = &ivshmem_config->pci_devs[j]; - if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) { - ioremap_addr = pci_dev->ioremap_addr; - break; - } - } - if (ioremap_addr == 0) { - RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n"); - return -1; - } - - /* work out alignments */ - align = seg->entry.mz.addr_64 - - RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000); - len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000); - - /* save original alignments */ - ms_tbl[i].align = align; - - /* create a memory zone */ - mz.addr_64 = seg->entry.mz.addr_64 - align; - mz.len = len; - mz.hugepage_sz = seg->entry.mz.hugepage_sz; - mz.phys_addr = seg->entry.mz.phys_addr - align; - - /* find true physical address */ - mz.ioremap_addr = ioremap_addr + seg->entry.offset - align; - - ms_tbl[i].entry.offset = seg->entry.offset - align; - - memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone)); - } - - /* clean up the segments */ - memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx); - - if (memseg_idx < 0) - return -1; - - mcfg = rte_eal_get_configuration()->mem_config; - - fd_zero = open("/dev/zero", O_RDWR); - - if (fd_zero < 0) { - RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno)); - return -1; - } - - /* create memsegs and put them into DPDK memory */ - for (i = 0; i < (unsigned) memseg_idx; i++) { - - seg = &ms_tbl[i]; - - ms.addr_64 = seg->entry.mz.addr_64; - ms.hugepage_sz = seg->entry.mz.hugepage_sz; - ms.len = seg->entry.mz.len; - ms.nchannel = rte_memory_get_nchannel(); - ms.nrank = rte_memory_get_nrank(); - ms.phys_addr = seg->entry.mz.phys_addr; - ms.ioremap_addr = seg->entry.mz.ioremap_addr; - ms.socket_id = seg->entry.mz.socket_id; - - base_addr = mmap(ms.addr, ms.len, - PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0); - - if (base_addr == MAP_FAILED || base_addr != ms.addr) { - RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n"); - return -1; - } - - fd = open(seg->path, O_RDWR); - - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path, - strerror(errno)); - return -1; - } - - munmap(ms.addr, ms.len); - - base_addr = mmap(ms.addr, ms.len, - PROT_READ | PROT_WRITE, MAP_SHARED, fd, - seg->entry.offset); - - - if (base_addr == MAP_FAILED || base_addr != ms.addr) { - RTE_LOG(ERR, EAL, "Cannot map segment into memory: " - "expected %p got %p (%s)\n", ms.addr, base_addr, - strerror(errno)); - return -1; - } - - RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at " - "offset 0x%" PRIx64 "\n", - ms.addr, ms.len, seg->entry.offset); - - /* put the pointers back into their real positions using original - * alignment */ - ms.addr_64 += seg->align; - ms.phys_addr += seg->align; - ms.ioremap_addr += seg->align; - ms.len -= seg->align; - - /* at this point, the rest of DPDK memory is not initialized, so we - * expect memsegs to be empty */ - memcpy(&mcfg->memseg[i], &ms, - sizeof(struct rte_memseg)); - - close(fd); - - RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n", - ms.len); - } - - return 0; -} - -/* this happens at a later stage, after general EAL memory initialization */ -int -rte_eal_ivshmem_obj_init(void) -{ - struct rte_ring_list* ring_list = NULL; - struct rte_mem_config * mcfg; - struct ivshmem_segment * seg; - struct rte_memzone * mz; - struct rte_ring * r; - struct rte_tailq_entry *te; - unsigned i, ms, idx; - uint64_t offset; - - /* secondary process would not need any object discovery - it'll all - * already be in shared config */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL) - return 0; - - /* check that we have an initialised ring tail queue */ - ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list); - if (ring_list == NULL) { - RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n"); - return -1; - } - - mcfg = rte_eal_get_configuration()->mem_config; - - /* create memzones */ - for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) { - - seg = &ivshmem_config->segment[i]; - - /* add memzone */ - if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) { - RTE_LOG(ERR, EAL, "No more memory zones available!\n"); - return -1; - } - - idx = mcfg->memzone_cnt; - - RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n", - seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len); - - memcpy(&mcfg->memzone[idx], &seg->entry.mz, - sizeof(struct rte_memzone)); - - /* find ioremap address */ - for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) { - if (ms == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Physical address of segment not found!\n"); - return -1; - } - if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) { - offset = mcfg->memzone[idx].addr_64 - - mcfg->memseg[ms].addr_64; - mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr + - offset; - break; - } - } - - mcfg->memzone_cnt++; - } - - rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); - - /* find rings */ - for (i = 0; i < mcfg->memzone_cnt; i++) { - mz = &mcfg->memzone[i]; - - /* check if memzone has a ring prefix */ - if (strncmp(mz->name, RTE_RING_MZ_PREFIX, - sizeof(RTE_RING_MZ_PREFIX) - 1) != 0) - continue; - - r = (struct rte_ring*) (mz->addr_64); - - te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); - if (te == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n"); - return -1; - } - - te->data = (void *) r; - - TAILQ_INSERT_TAIL(ring_list, te, next); - - RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr); - } - rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - rte_memzone_dump(stdout); - rte_ring_list_dump(stdout); -#endif - - return 0; -} - -/* initialize ivshmem structures */ -int rte_eal_ivshmem_init(void) -{ - struct rte_pci_device * dev; - struct rte_pci_resource * res; - int fd, ret; - char path[PATH_MAX]; - - /* initialize everything to 0 */ - memset(path, 0, sizeof(path)); - ivshmem_config = NULL; - - pagesz = getpagesize(); - - RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n"); - - if (rte_eal_process_type() == RTE_PROC_SECONDARY) { - - if (open_shared_config() < 0) { - RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n"); - return -1; - } - } - else { - - TAILQ_FOREACH(dev, &pci_device_list, next) { - - if (is_ivshmem_device(dev)) { - - /* IVSHMEM memory is always on BAR2 */ - res = &dev->mem_resource[2]; - - /* if we don't have a BAR2 */ - if (res->len == 0) - continue; - - /* construct pci device path */ - snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH, - dev->addr.domain, dev->addr.bus, dev->addr.devid, - dev->addr.function); - - /* try to find memseg */ - fd = open(path, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", path); - return -1; - } - - /* check if it's a DPDK IVSHMEM device */ - ret = has_ivshmem_metadata(fd, res->len); - - /* is DPDK device */ - if (ret == 1) { - - /* config file creation is deferred until the first - * DPDK device is found. then, it has to be created - * only once. */ - if (ivshmem_config == NULL && - create_shared_config() < 0) { - RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n"); - close(fd); - return -1; - } - - if (read_metadata(path, sizeof(path), fd, res->len) < 0) { - RTE_LOG(ERR, EAL, "Could not read metadata from" - " device %02x:%02x.%x!\n", dev->addr.bus, - dev->addr.devid, dev->addr.function); - close(fd); - return -1; - } - - if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) { - RTE_LOG(WARNING, EAL, - "IVSHMEM PCI device limit exceeded. Increase " - "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in " - "your config file.\n"); - break; - } - - RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n", - dev->addr.bus, dev->addr.devid, dev->addr.function); - - ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr; - snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path, - sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path), - "%s", path); - - ivshmem_config->pci_devs_idx++; - } - /* failed to read */ - else if (ret < 0) { - RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n", - strerror(errno)); - close(fd); - return -1; - } - /* not a DPDK device */ - else - RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n"); - - /* close the BAR fd */ - close(fd); - } - } - } - - /* ivshmem_config is not NULL only if config was created and/or mapped */ - if (ivshmem_config) { - if (map_all_segments() < 0) { - RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n"); - return -1; - } - } - else { - RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n"); - } - - return 0; -} - -#endif diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c index d3911004..e3a50aa3 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c @@ -97,45 +97,7 @@ rte_eal_log_init(const char *id, int facility) openlog(id, LOG_NDELAY | LOG_PID, facility); - if (rte_eal_common_log_init(log_stream) < 0) - return -1; - - return 0; -} - -/* early logs */ - -/* - * early log function, used before rte_eal_log_init - */ -static ssize_t -early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) -{ - ssize_t ret; - ret = fwrite(buf, size, 1, stdout); - fflush(stdout); - if (ret == 0) - return -1; - return ret; -} - -static cookie_io_functions_t early_log_func = { - .write = early_log_write, -}; -static FILE *early_log_stream; + eal_log_set_default(log_stream); -/* - * init the log library, called by rte_eal_init() to enable early - * logs - */ -int -rte_eal_log_early_init(void) -{ - early_log_stream = fopencookie(NULL, "w+", early_log_func); - if (early_log_stream == NULL) { - printf("Cannot configure early_log_stream\n"); - return -1; - } - rte_openlog_stream(early_log_stream); return 0; } diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c index 41e0a928..a956bb22 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -376,25 +376,15 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, void *vma_addr = NULL; size_t vma_len = 0; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - RTE_SET_USED(vma_len); -#endif - for (i = 0; i < hpi->num_pages[0]; i++) { uint64_t hugepage_sz = hpi->hugepage_sz; if (orig) { hugepg_tbl[i].file_id = i; hugepg_tbl[i].size = hugepage_sz; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - eal_get_hugefile_temp_path(hugepg_tbl[i].filepath, - sizeof(hugepg_tbl[i].filepath), hpi->hugedir, - hugepg_tbl[i].file_id); -#else eal_get_hugefile_path(hugepg_tbl[i].filepath, sizeof(hugepg_tbl[i].filepath), hpi->hugedir, hugepg_tbl[i].file_id); -#endif hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; } #ifndef RTE_ARCH_64 @@ -408,8 +398,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, continue; } #endif - -#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS else if (vma_len == 0) { unsigned j, num_pages; @@ -439,10 +427,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, if (vma_addr == NULL) vma_len = hugepage_sz; } -#endif /* try to create hugepage file */ - fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); + fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600); if (fd < 0) { RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); @@ -505,169 +492,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, return i; } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - -/* - * Remaps all hugepages into single file segments - */ -static int -remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) -{ - int fd; - unsigned i = 0, j, num_pages, page_idx = 0; - void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; - size_t vma_len = 0; - size_t hugepage_sz = hpi->hugepage_sz; - size_t total_size, offset; - char filepath[MAX_HUGEPAGE_PATH]; - phys_addr_t physaddr; - int socket; - - while (i < hpi->num_pages[0]) { - -#ifndef RTE_ARCH_64 - /* for 32-bit systems, don't remap 1G pages and 16G pages, - * just reuse original map address as final map address. - */ - if ((hugepage_sz == RTE_PGSIZE_1G) - || (hugepage_sz == RTE_PGSIZE_16G)) { - hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; - hugepg_tbl[i].orig_va = NULL; - i++; - continue; - } -#endif - - /* reserve a virtual area for next contiguous - * physical block: count the number of - * contiguous physical pages. */ - for (j = i+1; j < hpi->num_pages[0] ; j++) { -#ifdef RTE_ARCH_PPC_64 - /* The physical addresses are sorted in descending - * order on PPC64 */ - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr - hugepage_sz) - break; -#else - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr + hugepage_sz) - break; -#endif - } - num_pages = j - i; - vma_len = num_pages * hugepage_sz; - - socket = hugepg_tbl[i].socket_id; - - /* get the biggest virtual memory area up to - * vma_len. If it fails, vma_addr is NULL, so - * let the kernel provide the address. */ - vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); - - /* If we can't find a big enough virtual area, work out how many pages - * we are going to get */ - if (vma_addr == NULL) - j = i + 1; - else if (vma_len != num_pages * hugepage_sz) { - num_pages = vma_len / hugepage_sz; - j = i + num_pages; - - } - - hugepg_tbl[page_idx].file_id = page_idx; - eal_get_hugefile_path(filepath, - sizeof(filepath), - hpi->hugedir, - hugepg_tbl[page_idx].file_id); - - /* try to create hugepage file */ - fd = open(filepath, O_CREAT | O_RDWR, 0755); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); - return -1; - } - - total_size = 0; - for (;i < j; i++) { - - /* unmap current segment */ - if (total_size > 0) - munmap(vma_addr, total_size); - - /* unmap original page */ - munmap(hugepg_tbl[i].orig_va, hugepage_sz); - unlink(hugepg_tbl[i].filepath); - - total_size += hugepage_sz; - - old_addr = vma_addr; - - /* map new, bigger segment, and populate page tables, - * the kernel fills this segment with zeros */ - vma_addr = mmap(vma_addr, total_size, - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); - - if (vma_addr == MAP_FAILED || vma_addr != old_addr) { - RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); - close(fd); - return -1; - } - } - - /* set shared flock on the file. */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", - __func__, strerror(errno)); - close(fd); - return -1; - } - - snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", - filepath); - - physaddr = rte_mem_virt2phy(vma_addr); - - if (physaddr == RTE_BAD_PHYS_ADDR) - return -1; - - hugepg_tbl[page_idx].final_va = vma_addr; - - hugepg_tbl[page_idx].physaddr = physaddr; - - hugepg_tbl[page_idx].repeated = num_pages; - - hugepg_tbl[page_idx].socket_id = socket; - - close(fd); - - /* verify the memory segment - that is, check that every VA corresponds - * to the physical address we expect to see - */ - for (offset = 0; offset < vma_len; offset += hugepage_sz) { - uint64_t expected_physaddr; - - expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; - page_addr = RTE_PTR_ADD(vma_addr, offset); - physaddr = rte_mem_virt2phy(page_addr); - - if (physaddr != expected_physaddr) { - RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " - "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 - " (expected 0x%" PRIx64 ")\n", - page_addr, offset, physaddr, expected_physaddr); - return -1; - } - } - - page_idx++; - } - - /* zero out the rest */ - memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); - return page_idx; -} -#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */ - /* Unmap all hugepages from original mapping */ static int unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) @@ -681,7 +505,6 @@ unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info } return 0; } -#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */ /* * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge @@ -875,12 +698,6 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, for (page = 0; page < nrpages; page++) { struct hugepage_file *hp = &hugepg_tbl[page]; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* if this page was already cleared */ - if (hp->final_va == NULL) - continue; -#endif - /* find a page that matches the criteria */ if ((hp->size == hpi[size].hugepage_sz) && (hp->socket_id == (int) socket)) { @@ -889,11 +706,7 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, if (pages_found == hpi[size].num_pages[socket]) { uint64_t unmap_len; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - unmap_len = hp->size * hp->repeated; -#else unmap_len = hp->size; -#endif /* get start addr and len of the remaining segment */ munmap(hp->final_va, (size_t) unmap_len); @@ -904,50 +717,10 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, __func__, hp->filepath, strerror(errno)); return -1; } - } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* else, check how much do we need to map */ - else { - int nr_pg_left = - hpi[size].num_pages[socket] - pages_found; - - /* if we need enough memory to fit into the segment */ - if (hp->repeated <= nr_pg_left) { - pages_found += hp->repeated; - } - /* truncate the segment */ - else { - uint64_t final_size = nr_pg_left * hp->size; - uint64_t seg_size = hp->repeated * hp->size; - - void * unmap_va = RTE_PTR_ADD(hp->final_va, - final_size); - int fd; - - munmap(unmap_va, seg_size - final_size); - - fd = open(hp->filepath, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - hp->filepath, strerror(errno)); - return -1; - } - if (ftruncate(fd, final_size) < 0) { - RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n", - hp->filepath, strerror(errno)); - return -1; - } - close(fd); - - pages_found += nr_pg_left; - hp->repeated = nr_pg_left; - } - } -#else - /* else, lock the page and skip */ - else + } else { + /* lock the page and skip */ pages_found++; -#endif + } } /* match page */ } /* foreach page */ @@ -1177,9 +950,6 @@ rte_eal_hugepage_init(void) int i, j, new_memseg; int nr_hugefiles, nr_hugepages = 0; void *addr; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - int new_pages_count[MAX_HUGEPAGE_SIZES]; -#endif test_proc_pagemap_readable(); @@ -1260,13 +1030,6 @@ rte_eal_hugepage_init(void) pages_old = hpi->num_pages[0]; pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1); if (pages_new < pages_old) { -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - RTE_LOG(ERR, EAL, - "%d not %d hugepages of size %u MB allocated\n", - pages_new, pages_old, - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; -#else RTE_LOG(DEBUG, EAL, "%d not %d hugepages of size %u MB allocated\n", pages_new, pages_old, @@ -1278,7 +1041,6 @@ rte_eal_hugepage_init(void) hpi->num_pages[0] = pages_new; if (pages_new == 0) continue; -#endif } /* find physical addresses and sockets for each hugepage */ @@ -1297,18 +1059,6 @@ rte_eal_hugepage_init(void) qsort(&tmp_hp[hp_offset], hpi->num_pages[0], sizeof(struct hugepage_file), cmp_physaddr); -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* remap all hugepages into single file segments */ - new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi); - if (new_pages_count[i] < 0){ - RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; - } - - /* we have processed a num of hugepages of this size, so inc offset */ - hp_offset += new_pages_count[i]; -#else /* remap all hugepages */ if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) != hpi->num_pages[0]) { @@ -1323,7 +1073,6 @@ rte_eal_hugepage_init(void) /* we have processed a num of hugepages of this size, so inc offset */ hp_offset += hpi->num_pages[0]; -#endif } huge_recover_sigbus(); @@ -1331,14 +1080,7 @@ rte_eal_hugepage_init(void) if (internal_config.memory == 0 && internal_config.force_sockets == 0) internal_config.memory = eal_get_hugepage_mem_size(); -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - nr_hugefiles = 0; - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { - nr_hugefiles += new_pages_count[i]; - } -#else nr_hugefiles = nr_hugepages; -#endif /* clean out the numbers of pages */ @@ -1356,12 +1098,7 @@ rte_eal_hugepage_init(void) for (j = 0; j < nb_hpsizes; j++) { if (tmp_hp[i].size == internal_config.hugepage_info[j].hugepage_sz) { -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - internal_config.hugepage_info[j].num_pages[socket] += - tmp_hp[i].repeated; -#else internal_config.hugepage_info[j].num_pages[socket]++; -#endif } } } @@ -1436,15 +1173,8 @@ rte_eal_hugepage_init(void) free(tmp_hp); tmp_hp = NULL; - /* find earliest free memseg - this is needed because in case of IVSHMEM, - * segments might have already been initialized */ - for (j = 0; j < RTE_MAX_MEMSEG; j++) - if (mcfg->memseg[j].addr == NULL) { - /* move to previous segment and exit loop */ - j--; - break; - } - + /* first memseg index shall be 0 after incrementing it below */ + j = -1; for (i = 0; i < nr_hugefiles; i++) { new_memseg = 0; @@ -1482,11 +1212,7 @@ rte_eal_hugepage_init(void) mcfg->memseg[j].phys_addr = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated; -#else mcfg->memseg[j].len = hugepage[i].size; -#endif mcfg->memseg[j].socket_id = hugepage[i].socket_id; mcfg->memseg[j].hugepage_sz = hugepage[i].size; } @@ -1552,7 +1278,8 @@ rte_eal_hugepage_attach(void) struct hugepage_file *hp = NULL; unsigned num_hp = 0; unsigned i, s = 0; /* s used to track the segment number */ - off_t size; + unsigned max_seg = RTE_MAX_MEMSEG; + off_t size = 0; int fd, fd_zero = -1, fd_hugepage = -1; if (aslr_enabled() > 0) { @@ -1597,15 +1324,6 @@ rte_eal_hugepage_attach(void) if (mcfg->memseg[s].len == 0) break; -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * doesn't need mapping as it was already mapped earlier - */ - if (mcfg->memseg[s].ioremap_addr != 0) - continue; -#endif - /* * fdzero is mmapped to get a contiguous block of virtual * addresses of the appropriate memseg size. @@ -1615,10 +1333,21 @@ rte_eal_hugepage_attach(void) PROT_READ, MAP_PRIVATE, fd_zero, 0); if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) { - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in /dev/zero to requested address [%p]: '%s'\n", - (unsigned long long)mcfg->memseg[s].len, - mcfg->memseg[s].addr, strerror(errno)); + max_seg = s; + if (base_addr != MAP_FAILED) { + /* errno is stale, don't use */ + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in /dev/zero at [%p], got [%p] - " + "please use '--base-virtaddr' option\n", + (unsigned long long)mcfg->memseg[s].len, + mcfg->memseg[s].addr, base_addr); + munmap(base_addr, mcfg->memseg[s].len); + } else { + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in /dev/zero at [%p]: '%s'\n", + (unsigned long long)mcfg->memseg[s].len, + mcfg->memseg[s].addr, strerror(errno)); + } if (aslr_enabled() > 0) { RTE_LOG(ERR, EAL, "It is recommended to " "disable ASLR in the kernel " @@ -1644,16 +1373,6 @@ rte_eal_hugepage_attach(void) void *addr, *base_addr; uintptr_t offset = 0; size_t mapping_size; -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * doesn't need mapping as it was already mapped earlier - */ - if (mcfg->memseg[s].ioremap_addr != 0) { - s++; - continue; - } -#endif /* * free previously mapped memory so we can map the * hugepages into the space @@ -1672,11 +1391,7 @@ rte_eal_hugepage_attach(void) hp[i].filepath); goto error; } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - mapping_size = hp[i].size * hp[i].repeated; -#else mapping_size = hp[i].size; -#endif addr = mmap(RTE_PTR_ADD(base_addr, offset), mapping_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); @@ -1701,11 +1416,8 @@ rte_eal_hugepage_attach(void) return 0; error: - s = 0; - while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0) { - munmap(mcfg->memseg[s].addr, mcfg->memseg[s].len); - s++; - } + for (i = 0; i < max_seg && mcfg->memseg[i].len > 0; i++) + munmap(mcfg->memseg[i].addr, mcfg->memseg[i].len); if (hp != NULL && hp != MAP_FAILED) munmap(hp, size); if (fd_zero >= 0) diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c index cd9de7cc..e2fc219b 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -54,45 +54,6 @@ * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev) -{ - int n; - FILE *f; - char filename[PATH_MAX]; - char buf[BUFSIZ]; - struct rte_pci_addr *loc = &dev->addr; - - /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ - snprintf(filename, sizeof(filename), - "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - - f = fopen(filename, "w"); - if (f == NULL) /* device was not bound */ - return 0; - - n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", - loc->domain, loc->bus, loc->devid, loc->function); - if ((n < 0) || (n >= (int)sizeof(buf))) { - RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); - goto error; - } - if (fwrite(buf, n, 1, f) == 0) { - RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, - filename); - goto error; - } - - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} - static int pci_get_kernel_driver_by_path(const char *filename, char *dri_name) { @@ -267,8 +228,7 @@ error: /* Scan one pci sysfs entry, and fill the devices list from it. */ static int -pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, - uint8_t devid, uint8_t function) +pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) { char filename[PATH_MAX]; unsigned long tmp; @@ -281,10 +241,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, return -1; memset(dev, 0, sizeof(*dev)); - dev->addr.domain = domain; - dev->addr.bus = bus; - dev->addr.devid = devid; - dev->addr.function = function; + dev->addr = *addr; /* get vendor id */ snprintf(filename, sizeof(filename), "%s/vendor", dirname); @@ -350,13 +307,13 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dirname); if (access(filename, R_OK) != 0) { /* if no NUMA support, set default to 0 */ - dev->numa_node = 0; + dev->device.numa_node = 0; } else { if (eal_parse_sysfs_value(filename, &tmp) < 0) { free(dev); return -1; } - dev->numa_node = tmp; + dev->device.numa_node = tmp; } /* parse resources */ @@ -390,6 +347,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, /* device is valid, add in list (sorted) */ if (TAILQ_EMPTY(&pci_device_list)) { + rte_eal_device_insert(&dev->device); TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } else { struct rte_pci_device *dev2; @@ -402,6 +360,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, if (ret < 0) { TAILQ_INSERT_BEFORE(dev2, dev, next); + rte_eal_device_insert(&dev->device); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; @@ -411,18 +370,30 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, } return 0; } + rte_eal_device_insert(&dev->device); TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } return 0; } +int +pci_update_device(const struct rte_pci_addr *addr) +{ + char filename[PATH_MAX]; + + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT, + pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, + addr->function); + + return pci_scan_one(filename, addr); +} + /* * split up a pci address into its constituent parts. */ static int -parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, - uint8_t *bus, uint8_t *devid, uint8_t *function) +parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) { /* first split on ':' */ union splitaddr { @@ -450,10 +421,10 @@ parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, /* now convert to int values */ errno = 0; - *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); - *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); - *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); - *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); + addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); + addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); + addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10); if (errno != 0) goto error; @@ -474,8 +445,7 @@ rte_eal_pci_scan(void) struct dirent *e; DIR *dir; char dirname[PATH_MAX]; - uint16_t domain; - uint8_t bus, devid, function; + struct rte_pci_addr addr; dir = opendir(pci_get_sysfs_path()); if (dir == NULL) { @@ -488,13 +458,12 @@ rte_eal_pci_scan(void) if (e->d_name[0] == '.') continue; - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, - &bus, &devid, &function) != 0) + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) continue; snprintf(dirname, sizeof(dirname), "%s/%s", pci_get_sysfs_path(), e->d_name); - if (pci_scan_one(dirname, domain, bus, devid, function) < 0) + if (pci_scan_one(dirname, &addr) < 0) goto error; } closedir(dir); @@ -743,9 +712,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) int rte_eal_pci_init(void) { - TAILQ_INIT(&pci_driver_list); - TAILQ_INIT(&pci_device_list); - /* for debug purposes, PCI can be disabled */ if (internal_config.no_pci) return 0; diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 1786b754..3e4ffb57 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -133,7 +133,7 @@ pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); dev = makedev(major, minor); ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); - if (f == NULL) { + if (ret != 0) { RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", __func__, strerror(errno)); return -1; diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h index 3dacbff8..d459bf48 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -82,6 +82,7 @@ struct rte_epoll_event { /** Handle for interrupts. */ struct rte_intr_handle { + RTE_STD_C11 union { int vfio_dev_fd; /**< VFIO device file descriptor */ int uio_cfg_fd; /**< UIO config file descriptor diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h index 2acdfd9b..09713b0c 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h +++ b/src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -61,6 +61,9 @@ #ifdef __KERNEL__ #include +#define RTE_STD_C11 +#else +#include #endif /** @@ -85,6 +88,7 @@ enum rte_kni_req_id { */ struct rte_kni_request { uint32_t req_id; /**< Request id */ + RTE_STD_C11 union { uint32_t new_mtu; /**< New MTU */ uint8_t if_up; /**< 1: interface up, 0: interface down */ @@ -102,7 +106,7 @@ struct rte_kni_fifo { volatile unsigned read; /**< Next position to be read */ unsigned len; /**< Circular buffer length */ unsigned elem_size; /**< Pointer size - for 32/64 bit OS */ - void * volatile buffer[0]; /**< The buffer contains mbuf pointers */ + void *volatile buffer[]; /**< The buffer contains mbuf pointers */ }; /* @@ -111,7 +115,8 @@ struct rte_kni_fifo { */ struct rte_kni_mbuf { void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); - char pad0[10]; + uint64_t buf_physaddr; + char pad0[2]; uint16_t data_off; /**< Start address of data in segment buffer. */ char pad1[2]; uint8_t nb_segs; /**< Number of segments. */ @@ -159,6 +164,7 @@ struct rte_kni_device_info { uint16_t group_id; /**< Group ID */ uint32_t core_id; /**< core ID to bind for kernel thread */ + __extension__ uint8_t force_bind : 1; /**< Flag for kernel thread binding */ /* mbuf size */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/compat.h b/src/dpdk/lib/librte_eal/linuxapp/kni/compat.h index 647ba3ce..78da08e5 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/compat.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/compat.h @@ -19,13 +19,25 @@ #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -#define sk_sleep(s) (s)->sk_sleep +#define sk_sleep(s) ((s)->sk_sleep) +#else +#define HAVE_SOCKET_WQ +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) +#define HAVE_STATIC_SOCK_MAP_FD +#else +#define kni_sock_map_fd(s) sock_map_fd(s, 0) #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) #define HAVE_CHANGE_CARRIER_CB #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN) +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) #define HAVE_IOV_ITER_MSGHDR #endif @@ -35,6 +47,23 @@ #define HAVE_REBUILD_HEADER #endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +#define HAVE_SK_ALLOC_KERN_PARAM +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) #define HAVE_TRANS_START_HELPER #endif + +/* + * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev() + * For old kernels just backported the commit that enables the macro + * (685343fc3ba6) but still uses old API, it is required to undefine macro to + * select correct version of API, this is safe since KNI doesn't use the value. + * This fix is specific to RedHat/CentOS kernels. + */ +#if (defined(RHEL_RELEASE_CODE) && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) +#undef NET_NAME_UNKNOWN +#endif diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c index b8c9a13f..d558af20 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h index 1aec75ab..185ccdf1 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c index 6095d3b4..220c9a40 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h index b21294ec..55c8a5f4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h index 63b228c5..d42c7998 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h index 347cef71..35886e93 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c index 1e9f3e6e..7e4c20a9 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h index 57b2eb56..b8fa70d0 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c index 4ee59ba9..74319def 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h index 6a1b0f52..3bcdd88c 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c index a1700398..51dfae5d 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h index c94b2185..0627f271 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c index 3ef0d98b..bd64429f 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h index bbf838c8..64685d9d 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c index 6188d007..1ce59154 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h index fe62785a..17bc53c3 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h index d1cf98e2..c1ab60c4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c index 140a2a47..d8a77c45 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h index 5387c5e7..db24fb0b 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h index 0e083c54..830ec991 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h index e5554ca3..d077b49e 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c deleted file mode 100644 index c07f9f53..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c index af7e68a5..d7a987d5 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c deleted file mode 100644 index 07a1ae07..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c +++ /dev/null @@ -1,260 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" -#include "e1000_82575.h" -#include "e1000_hw.h" -#ifdef IGB_HWMON -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_I2C_SUPPORT -static struct i2c_board_info i350_sensor_info = { - I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)), -}; -#endif /* HAVE_I2C_SUPPORT */ - -/* hwmon callback functions */ -static ssize_t igb_hwmon_show_location(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - return sprintf(buf, "loc%u\n", - igb_attr->sensor->location); -} - -static ssize_t igb_hwmon_show_temp(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - unsigned int value; - - /* reset the temp field */ - igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw); - - value = igb_attr->sensor->temp; - - /* display millidegree */ - value *= 1000; - - return sprintf(buf, "%u\n", value); -} - -static ssize_t igb_hwmon_show_cautionthresh(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - unsigned int value = igb_attr->sensor->caution_thresh; - - /* display millidegree */ - value *= 1000; - - return sprintf(buf, "%u\n", value); -} - -static ssize_t igb_hwmon_show_maxopthresh(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - unsigned int value = igb_attr->sensor->max_op_thresh; - - /* display millidegree */ - value *= 1000; - - return sprintf(buf, "%u\n", value); -} - -/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file. - * @ adapter: pointer to the adapter structure - * @ offset: offset in the eeprom sensor data table - * @ type: type of sensor data to display - * - * For each file we want in hwmon's sysfs interface we need a device_attribute - * This is included in our hwmon_attr struct that contains the references to - * the data structures we need to get the data to display. - */ -static int igb_add_hwmon_attr(struct igb_adapter *adapter, - unsigned int offset, int type) { - int rc; - unsigned int n_attr; - struct hwmon_attr *igb_attr; - - n_attr = adapter->igb_hwmon_buff.n_hwmon; - igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr]; - - switch (type) { - case IGB_HWMON_TYPE_LOC: - igb_attr->dev_attr.show = igb_hwmon_show_location; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_label", offset); - break; - case IGB_HWMON_TYPE_TEMP: - igb_attr->dev_attr.show = igb_hwmon_show_temp; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_input", offset); - break; - case IGB_HWMON_TYPE_CAUTION: - igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_max", offset); - break; - case IGB_HWMON_TYPE_MAX: - igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_crit", offset); - break; - default: - rc = -EPERM; - return rc; - } - - /* These always the same regardless of type */ - igb_attr->sensor = - &adapter->hw.mac.thermal_sensor_data.sensor[offset]; - igb_attr->hw = &adapter->hw; - igb_attr->dev_attr.store = NULL; - igb_attr->dev_attr.attr.mode = S_IRUGO; - igb_attr->dev_attr.attr.name = igb_attr->name; - sysfs_attr_init(&igb_attr->dev_attr.attr); - rc = device_create_file(&adapter->pdev->dev, - &igb_attr->dev_attr); - if (rc == 0) - ++adapter->igb_hwmon_buff.n_hwmon; - - return rc; -} - -static void igb_sysfs_del_adapter(struct igb_adapter *adapter) -{ - int i; - - if (adapter == NULL) - return; - - for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) { - device_remove_file(&adapter->pdev->dev, - &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr); - } - - kfree(adapter->igb_hwmon_buff.hwmon_list); - - if (adapter->igb_hwmon_buff.device) - hwmon_device_unregister(adapter->igb_hwmon_buff.device); -} - -/* called from igb_main.c */ -void igb_sysfs_exit(struct igb_adapter *adapter) -{ - igb_sysfs_del_adapter(adapter); -} - -/* called from igb_main.c */ -int igb_sysfs_init(struct igb_adapter *adapter) -{ - struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff; - unsigned int i; - int n_attrs; - int rc = 0; -#ifdef HAVE_I2C_SUPPORT - struct i2c_client *client = NULL; -#endif /* HAVE_I2C_SUPPORT */ - - /* If this method isn't defined we don't support thermals */ - if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL) - goto exit; - - /* Don't create thermal hwmon interface if no sensors present */ - rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw)); - if (rc) - goto exit; -#ifdef HAVE_I2C_SUPPORT - /* init i2c_client */ - client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info); - if (client == NULL) { - dev_info(&adapter->pdev->dev, - "Failed to create new i2c device..\n"); - goto exit; - } - adapter->i2c_client = client; -#endif /* HAVE_I2C_SUPPORT */ - - /* Allocation space for max attributes - * max num sensors * values (loc, temp, max, caution) - */ - n_attrs = E1000_MAX_SENSORS * 4; - igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr), - GFP_KERNEL); - if (!igb_hwmon->hwmon_list) { - rc = -ENOMEM; - goto err; - } - - igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev); - if (IS_ERR(igb_hwmon->device)) { - rc = PTR_ERR(igb_hwmon->device); - goto err; - } - - for (i = 0; i < E1000_MAX_SENSORS; i++) { - - /* Only create hwmon sysfs entries for sensors that have - * meaningful data. - */ - if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0) - continue; - - /* Bail if any hwmon attr struct fails to initialize */ - rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION); - rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC); - rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP); - rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX); - if (rc) - goto err; - } - - goto exit; - -err: - igb_sysfs_del_adapter(adapter); -exit: - return rc; -} -#endif /* IGB_HWMON */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c index 96acec58..f4dca5a3 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List @@ -76,7 +76,7 @@ static const char igb_driver_string[] = static const char igb_copyright[] = "Copyright (c) 2007-2013 Intel Corporation."; -static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { +const struct pci_device_id igb_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, @@ -195,7 +195,11 @@ static void igb_process_mdd_event(struct igb_adapter *); #ifdef IFLA_VF_MAX static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, +#ifdef HAVE_VF_VLAN_PROTO + int vf, u16 vlan, u8 qos, __be16 vlan_proto); +#else int vf, u16 vlan, u8 qos); +#endif #ifdef HAVE_VF_SPOOFCHK_CONFIGURE static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); @@ -1558,6 +1562,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter) ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); connsw = E1000_READ_REG(hw, E1000_CONNSW); link = igb_has_link(adapter); + (void) link; /* need to live swap if current media is copper and we have fiber/serdes * to go to. @@ -6411,7 +6416,11 @@ static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) } static int igb_ndo_set_vf_vlan(struct net_device *netdev, +#ifdef HAVE_VF_VLAN_PROTO + int vf, u16 vlan, u8 qos, __be16 vlan_proto) +#else int vf, u16 vlan, u8 qos) +#endif { int err = 0; struct igb_adapter *adapter = netdev_priv(netdev); @@ -6419,6 +6428,12 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, /* VLAN IDs accepted range 0-4094 */ if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7)) return -EINVAL; + +#ifdef HAVE_VF_VLAN_PROTO + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; +#endif + if (vlan || qos) { err = igb_vlvf_set(adapter, vlan, !!vlan, vf); if (err) @@ -6579,7 +6594,12 @@ static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) if (adapter->vf_data[vf].pf_vlan) igb_ndo_set_vf_vlan(adapter->netdev, vf, adapter->vf_data[vf].pf_vlan, +#ifdef HAVE_VF_VLAN_PROTO + adapter->vf_data[vf].pf_qos, + htons(ETH_P_8021Q)); +#else adapter->vf_data[vf].pf_qos); +#endif else igb_clear_vf_vfta(adapter, vf); #endif diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c index f79ce7c1..c922ca2f 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c deleted file mode 100644 index 66236d29..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c +++ /dev/null @@ -1,363 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" -#include "e1000_82575.h" -#include "e1000_hw.h" - -#ifdef IGB_PROCFS -#ifndef IGB_HWMON - -#include -#include -#include -#include -#include - -static struct proc_dir_entry *igb_top_dir = NULL; - - -bool igb_thermal_present(struct igb_adapter *adapter) -{ - s32 status; - struct e1000_hw *hw; - - if (adapter == NULL) - return false; - hw = &adapter->hw; - - /* - * Only set I2C bit-bang mode if an external thermal sensor is - * supported on this device. - */ - if (adapter->ets) { - status = e1000_set_i2c_bb(hw); - if (status != E1000_SUCCESS) - return false; - } - - status = hw->mac.ops.init_thermal_sensor_thresh(hw); - if (status != E1000_SUCCESS) - return false; - - return true; -} - - -static int igb_macburn(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct e1000_hw *hw; - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - hw = &adapter->hw; - if (hw == NULL) - return snprintf(page, count, "error: no hw data\n"); - - return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", - (unsigned int)hw->mac.perm_addr[0], - (unsigned int)hw->mac.perm_addr[1], - (unsigned int)hw->mac.perm_addr[2], - (unsigned int)hw->mac.perm_addr[3], - (unsigned int)hw->mac.perm_addr[4], - (unsigned int)hw->mac.perm_addr[5]); -} - -static int igb_macadmn(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct e1000_hw *hw; - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - hw = &adapter->hw; - if (hw == NULL) - return snprintf(page, count, "error: no hw data\n"); - - return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", - (unsigned int)hw->mac.addr[0], - (unsigned int)hw->mac.addr[1], - (unsigned int)hw->mac.addr[2], - (unsigned int)hw->mac.addr[3], - (unsigned int)hw->mac.addr[4], - (unsigned int)hw->mac.addr[5]); -} - -static int igb_numeports(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct e1000_hw *hw; - int ports; - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - hw = &adapter->hw; - if (hw == NULL) - return snprintf(page, count, "error: no hw data\n"); - - ports = 4; - - return snprintf(page, count, "%d\n", ports); -} - -static int igb_porttype(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - return snprintf(page, count, "%d\n", - test_bit(__IGB_DOWN, &adapter->state)); -} - -static int igb_therm_location(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - return snprintf(page, count, "%d\n", therm_data->sensor_data->location); -} - -static int igb_therm_maxopthresh(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - return snprintf(page, count, "%d\n", - therm_data->sensor_data->max_op_thresh); -} - -static int igb_therm_cautionthresh(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - return snprintf(page, count, "%d\n", - therm_data->sensor_data->caution_thresh); -} - -static int igb_therm_temp(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - s32 status; - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - status = e1000_get_thermal_sensor_data(therm_data->hw); - if (status != E1000_SUCCESS) - snprintf(page, count, "error: status %d returned\n", status); - - return snprintf(page, count, "%d\n", therm_data->sensor_data->temp); -} - -struct igb_proc_type{ - char name[32]; - int (*read)(char*, char**, off_t, int, int*, void*); -}; - -struct igb_proc_type igb_proc_entries[] = { - {"numeports", &igb_numeports}, - {"porttype", &igb_porttype}, - {"macburn", &igb_macburn}, - {"macadmn", &igb_macadmn}, - {"", NULL} -}; - -struct igb_proc_type igb_internal_entries[] = { - {"location", &igb_therm_location}, - {"temp", &igb_therm_temp}, - {"cautionthresh", &igb_therm_cautionthresh}, - {"maxopthresh", &igb_therm_maxopthresh}, - {"", NULL} -}; - -void igb_del_proc_entries(struct igb_adapter *adapter) -{ - int index, i; - char buf[16]; /* much larger than the sensor number will ever be */ - - if (igb_top_dir == NULL) - return; - - for (i = 0; i < E1000_MAX_SENSORS; i++) { - if (adapter->therm_dir[i] == NULL) - continue; - - for (index = 0; ; index++) { - if (igb_internal_entries[index].read == NULL) - break; - - remove_proc_entry(igb_internal_entries[index].name, - adapter->therm_dir[i]); - } - snprintf(buf, sizeof(buf), "sensor_%d", i); - remove_proc_entry(buf, adapter->info_dir); - } - - if (adapter->info_dir != NULL) { - for (index = 0; ; index++) { - if (igb_proc_entries[index].read == NULL) - break; - remove_proc_entry(igb_proc_entries[index].name, - adapter->info_dir); - } - remove_proc_entry("info", adapter->eth_dir); - } - - if (adapter->eth_dir != NULL) - remove_proc_entry(pci_name(adapter->pdev), igb_top_dir); -} - -/* called from igb_main.c */ -void igb_procfs_exit(struct igb_adapter *adapter) -{ - igb_del_proc_entries(adapter); -} - -int igb_procfs_topdir_init(void) -{ - igb_top_dir = proc_mkdir("driver/igb", NULL); - if (igb_top_dir == NULL) - return -ENOMEM; - - return 0; -} - -void igb_procfs_topdir_exit(void) -{ - remove_proc_entry("driver/igb", NULL); -} - -/* called from igb_main.c */ -int igb_procfs_init(struct igb_adapter *adapter) -{ - int rc = 0; - int i; - int index; - char buf[16]; /* much larger than the sensor number will ever be */ - - adapter->eth_dir = NULL; - adapter->info_dir = NULL; - for (i = 0; i < E1000_MAX_SENSORS; i++) - adapter->therm_dir[i] = NULL; - - if ( igb_top_dir == NULL ) { - rc = -ENOMEM; - goto fail; - } - - adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir); - if (adapter->eth_dir == NULL) { - rc = -ENOMEM; - goto fail; - } - - adapter->info_dir = proc_mkdir("info", adapter->eth_dir); - if (adapter->info_dir == NULL) { - rc = -ENOMEM; - goto fail; - } - for (index = 0; ; index++) { - if (igb_proc_entries[index].read == NULL) { - break; - } - if (!(create_proc_read_entry(igb_proc_entries[index].name, - 0444, - adapter->info_dir, - igb_proc_entries[index].read, - adapter))) { - - rc = -ENOMEM; - goto fail; - } - } - if (igb_thermal_present(adapter) == false) - goto exit; - - for (i = 0; i < E1000_MAX_SENSORS; i++) { - - if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0) - continue; - - snprintf(buf, sizeof(buf), "sensor_%d", i); - adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir); - if (adapter->therm_dir[i] == NULL) { - rc = -ENOMEM; - goto fail; - } - for (index = 0; ; index++) { - if (igb_internal_entries[index].read == NULL) - break; - /* - * therm_data struct contains pointer the read func - * will be needing - */ - adapter->therm_data[i].hw = &adapter->hw; - adapter->therm_data[i].sensor_data = - &adapter->hw.mac.thermal_sensor_data.sensor[i]; - - if (!(create_proc_read_entry( - igb_internal_entries[index].name, - 0444, - adapter->therm_dir[i], - igb_internal_entries[index].read, - &adapter->therm_data[i]))) { - rc = -ENOMEM; - goto fail; - } - } - } - goto exit; - -fail: - igb_del_proc_entries(adapter); -exit: - return rc; -} - -#endif /* !IGB_HWMON */ -#endif /* IGB_PROCFS */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c deleted file mode 100644 index 454b70ce..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c +++ /dev/null @@ -1,944 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/****************************************************************************** - Copyright(c) 2011 Richard Cochran for some of the - 82576 and 82580 code -******************************************************************************/ - -#include "igb.h" - -#include -#include -#include -#include - -#define INCVALUE_MASK 0x7fffffff -#define ISGN 0x80000000 - -/* - * The 82580 timesync updates the system timer every 8ns by 8ns, - * and this update value cannot be reprogrammed. - * - * Neither the 82576 nor the 82580 offer registers wide enough to hold - * nanoseconds time values for very long. For the 82580, SYSTIM always - * counts nanoseconds, but the upper 24 bits are not available. The - * frequency is adjusted by changing the 32 bit fractional nanoseconds - * register, TIMINCA. - * - * For the 82576, the SYSTIM register time unit is affect by the - * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this - * field are needed to provide the nominal 16 nanosecond period, - * leaving 19 bits for fractional nanoseconds. - * - * We scale the NIC clock cycle by a large factor so that relatively - * small clock corrections can be added or subtracted at each clock - * tick. The drawbacks of a large factor are a) that the clock - * register overflows more quickly (not such a big deal) and b) that - * the increment per tick has to fit into 24 bits. As a result we - * need to use a shift of 19 so we can fit a value of 16 into the - * TIMINCA register. - * - * - * SYSTIMH SYSTIML - * +--------------+ +---+---+------+ - * 82576 | 32 | | 8 | 5 | 19 | - * +--------------+ +---+---+------+ - * \________ 45 bits _______/ fract - * - * +----------+---+ +--------------+ - * 82580 | 24 | 8 | | 32 | - * +----------+---+ +--------------+ - * reserved \______ 40 bits _____/ - * - * - * The 45 bit 82576 SYSTIM overflows every - * 2^45 * 10^-9 / 3600 = 9.77 hours. - * - * The 40 bit 82580 SYSTIM overflows every - * 2^40 * 10^-9 / 60 = 18.3 minutes. - */ - -#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) -#define IGB_PTP_TX_TIMEOUT (HZ * 15) -#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) -#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) -#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) -#define IGB_NBITS_82580 40 - -/* - * SYSTIM read access for the 82576 - */ - -static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) -{ - struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); - struct e1000_hw *hw = &igb->hw; - u64 val; - u32 lo, hi; - - lo = E1000_READ_REG(hw, E1000_SYSTIML); - hi = E1000_READ_REG(hw, E1000_SYSTIMH); - - val = ((u64) hi) << 32; - val |= lo; - - return val; -} - -/* - * SYSTIM read access for the 82580 - */ - -static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) -{ - struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); - struct e1000_hw *hw = &igb->hw; - u64 val; - u32 lo, hi; - - /* The timestamp latches on lowest register read. For the 82580 - * the lowest register is SYSTIMR instead of SYSTIML. However we only - * need to provide nanosecond resolution, so we just ignore it. - */ - E1000_READ_REG(hw, E1000_SYSTIMR); - lo = E1000_READ_REG(hw, E1000_SYSTIML); - hi = E1000_READ_REG(hw, E1000_SYSTIMH); - - val = ((u64) hi) << 32; - val |= lo; - - return val; -} - -/* - * SYSTIM read access for I210/I211 - */ - -static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts) -{ - struct e1000_hw *hw = &adapter->hw; - u32 sec, nsec; - - /* The timestamp latches on lowest register read. For I210/I211, the - * lowest register is SYSTIMR. Since we only need to provide nanosecond - * resolution, we can ignore it. - */ - E1000_READ_REG(hw, E1000_SYSTIMR); - nsec = E1000_READ_REG(hw, E1000_SYSTIML); - sec = E1000_READ_REG(hw, E1000_SYSTIMH); - - ts->tv_sec = sec; - ts->tv_nsec = nsec; -} - -static void igb_ptp_write_i210(struct igb_adapter *adapter, - const struct timespec *ts) -{ - struct e1000_hw *hw = &adapter->hw; - - /* - * Writing the SYSTIMR register is not necessary as it only provides - * sub-nanosecond resolution. - */ - E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec); - E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec); -} - -/** - * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp - * @adapter: board private structure - * @hwtstamps: timestamp structure to update - * @systim: unsigned 64bit system time value. - * - * We need to convert the system time value stored in the RX/TXSTMP registers - * into a hwtstamp which can be used by the upper level timestamping functions. - * - * The 'tmreg_lock' spinlock is used to protect the consistency of the - * system time value. This is needed because reading the 64 bit time - * value involves reading two (or three) 32 bit registers. The first - * read latches the value. Ditto for writing. - * - * In addition, here have extended the system time with an overflow - * counter in software. - **/ -static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim) -{ - unsigned long flags; - u64 ns; - - switch (adapter->hw.mac.type) { - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - spin_lock_irqsave(&adapter->tmreg_lock, flags); - - ns = timecounter_cyc2time(&adapter->tc, systim); - - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(ns); - break; - case e1000_i210: - case e1000_i211: - memset(hwtstamps, 0, sizeof(*hwtstamps)); - /* Upper 32 bits contain s, lower 32 bits contain ns. */ - hwtstamps->hwtstamp = ktime_set(systim >> 32, - systim & 0xFFFFFFFF); - break; - default: - break; - } -} - -/* - * PTP clock operations - */ - -static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - struct e1000_hw *hw = &igb->hw; - int neg_adj = 0; - u64 rate; - u32 incvalue; - - if (ppb < 0) { - neg_adj = 1; - ppb = -ppb; - } - rate = ppb; - rate <<= 14; - rate = div_u64(rate, 1953125); - - incvalue = 16 << IGB_82576_TSYNC_SHIFT; - - if (neg_adj) - incvalue -= rate; - else - incvalue += rate; - - E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK)); - - return 0; -} - -static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - struct e1000_hw *hw = &igb->hw; - int neg_adj = 0; - u64 rate; - u32 inca; - - if (ppb < 0) { - neg_adj = 1; - ppb = -ppb; - } - rate = ppb; - rate <<= 26; - rate = div_u64(rate, 1953125); - - /* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x - * as quickly. Account for this by dividing the adjustment by 2.5. - */ - if (hw->mac.type == e1000_i354) { - u32 status = E1000_READ_REG(hw, E1000_STATUS); - - if ((status & E1000_STATUS_2P5_SKU) && - !(status & E1000_STATUS_2P5_SKU_OVER)) { - rate <<= 1; - rate = div_u64(rate, 5); - } - } - - inca = rate & INCVALUE_MASK; - if (neg_adj) - inca |= ISGN; - - E1000_WRITE_REG(hw, E1000_TIMINCA, inca); - - return 0; -} - -static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - s64 now; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - now = timecounter_read(&igb->tc); - now += delta; - timecounter_init(&igb->tc, &igb->cc, now); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - struct timespec now, then = ns_to_timespec(delta); - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - igb_ptp_read_i210(igb, &now); - now = timespec_add(now, then); - igb_ptp_write_i210(igb, (const struct timespec *)&now); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp, - struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - u64 ns; - u32 remainder; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - ns = timecounter_read(&igb->tc); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder); - ts->tv_nsec = remainder; - - return 0; -} - -static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp, - struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - igb_ptp_read_i210(igb, ts); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_settime_82576(struct ptp_clock_info *ptp, - const struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - u64 ns; - - ns = ts->tv_sec * 1000000000ULL; - ns += ts->tv_nsec; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - timecounter_init(&igb->tc, &igb->cc, ns); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, - const struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - igb_ptp_write_i210(igb, ts); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) -{ - return -EOPNOTSUPP; -} - -/** - * igb_ptp_tx_work - * @work: pointer to work struct - * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. - */ -void igb_ptp_tx_work(struct work_struct *work) -{ - struct igb_adapter *adapter = container_of(work, struct igb_adapter, - ptp_tx_work); - struct e1000_hw *hw = &adapter->hw; - u32 tsynctxctl; - - if (!adapter->ptp_tx_skb) - return; - - if (time_is_before_jiffies(adapter->ptp_tx_start + - IGB_PTP_TX_TIMEOUT)) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - adapter->tx_hwtstamp_timeouts++; - dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang"); - return; - } - - tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL); - if (tsynctxctl & E1000_TSYNCTXCTL_VALID) - igb_ptp_tx_hwtstamp(adapter); - else - /* reschedule to check later */ - schedule_work(&adapter->ptp_tx_work); -} - -static void igb_ptp_overflow_check(struct work_struct *work) -{ - struct igb_adapter *igb = - container_of(work, struct igb_adapter, ptp_overflow_work.work); - struct timespec ts; - - igb->ptp_caps.gettime(&igb->ptp_caps, &ts); - - pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); - - schedule_delayed_work(&igb->ptp_overflow_work, - IGB_SYSTIM_OVERFLOW_PERIOD); -} - -/** - * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched - * @adapter: private network adapter structure - * - * This watchdog task is scheduled to detect error case where hardware has - * dropped an Rx packet that was timestamped when the ring is full. The - * particular error is rare but leaves the device in a state unable to timestamp - * any future packets. - */ -void igb_ptp_rx_hang(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct igb_ring *rx_ring; - u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL); - unsigned long rx_event; - int n; - - if (hw->mac.type != e1000_82576) - return; - - /* If we don't have a valid timestamp in the registers, just update the - * timeout counter and exit - */ - if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) { - adapter->last_rx_ptp_check = jiffies; - return; - } - - /* Determine the most recent watchdog or rx_timestamp event */ - rx_event = adapter->last_rx_ptp_check; - for (n = 0; n < adapter->num_rx_queues; n++) { - rx_ring = adapter->rx_ring[n]; - if (time_after(rx_ring->last_rx_timestamp, rx_event)) - rx_event = rx_ring->last_rx_timestamp; - } - - /* Only need to read the high RXSTMP register to clear the lock */ - if (time_is_before_jiffies(rx_event + 5 * HZ)) { - E1000_READ_REG(hw, E1000_RXSTMPH); - adapter->last_rx_ptp_check = jiffies; - adapter->rx_hwtstamp_cleared++; - dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang"); - } -} - -/** - * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp - * @adapter: Board private structure. - * - * If we were asked to do hardware stamping and such a time stamp is - * available, then it must have been for this skb here because we only - * allow only one such packet into the queue. - */ -void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps shhwtstamps; - u64 regval; - - regval = E1000_READ_REG(hw, E1000_TXSTMPL); - regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32; - - igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); - skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; -} - -/** - * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp - * @q_vector: Pointer to interrupt specific structure - * @va: Pointer to address containing Rx buffer - * @skb: Buffer containing timestamp and packet - * - * This function is meant to retrieve a timestamp from the first buffer of an - * incoming frame. The value is stored in little endian format starting on - * byte 8. - */ -void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, - unsigned char *va, - struct sk_buff *skb) -{ - __le64 *regval = (__le64 *)va; - - /* - * The timestamp is recorded in little endian format. - * DWORD: 0 1 2 3 - * Field: Reserved Reserved SYSTIML SYSTIMH - */ - igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), - le64_to_cpu(regval[1])); -} - -/** - * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register - * @q_vector: Pointer to interrupt specific structure - * @skb: Buffer containing timestamp and packet - * - * This function is meant to retrieve a timestamp from the internal registers - * of the adapter and store it in the skb. - */ -void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, - struct sk_buff *skb) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - u64 regval; - - /* - * If this bit is set, then the RX registers contain the time stamp. No - * other packet will be time stamped until we read these registers, so - * read the registers to make them available again. Because only one - * packet can be time stamped at a time, we know that the register - * values must belong to this one here and therefore we don't need to - * compare any of the additional attributes stored for it. - * - * If nothing went wrong, then it should have a shared tx_flags that we - * can turn into a skb_shared_hwtstamps. - */ - if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) - return; - - regval = E1000_READ_REG(hw, E1000_RXSTMPL); - regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32; - - igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); -} - -/** - * igb_ptp_hwtstamp_ioctl - control hardware time stamping - * @netdev: - * @ifreq: - * @cmd: - * - * Outgoing time stamping can be enabled and disabled. Play nice and - * disable it when requested, although it shouldn't case any overhead - * when no packet needs it. At most one packet in the queue may be - * marked for time stamping, otherwise it would be impossible to tell - * for sure to which packet the hardware time stamp belongs. - * - * Incoming time stamping has to be configured via the hardware - * filters. Not all combinations are supported, in particular event - * type has to be specified. Matching the kind of event packet is - * not supported, with the exception of "all V2 events regardless of - * level 2 or 4". - * - **/ -int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, - struct ifreq *ifr, int cmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct hwtstamp_config config; - u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; - u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - u32 tsync_rx_cfg = 0; - bool is_l4 = false; - bool is_l2 = false; - u32 regval; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - tsync_tx_ctl = 0; - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - tsync_rx_ctl = 0; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - is_l2 = true; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_ALL: - /* - * 82576 cannot timestamp all packets, which it needs to do to - * support both V1 Sync and Delay_Req messages - */ - if (hw->mac.type != e1000_82576) { - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - } - /* fall through */ - default: - config.rx_filter = HWTSTAMP_FILTER_NONE; - return -ERANGE; - } - - if (hw->mac.type == e1000_82575) { - if (tsync_rx_ctl | tsync_tx_ctl) - return -EINVAL; - return 0; - } - - /* - * Per-packet timestamping only works if all packets are - * timestamped, so enable timestamping in all packets as - * long as one rx filter was configured. - */ - if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { - tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - is_l2 = true; - is_l4 = true; - - if ((hw->mac.type == e1000_i210) || - (hw->mac.type == e1000_i211)) { - regval = E1000_READ_REG(hw, E1000_RXPBS); - regval |= E1000_RXPBS_CFG_TS_EN; - E1000_WRITE_REG(hw, E1000_RXPBS, regval); - } - } - - /* enable/disable TX */ - regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL); - regval &= ~E1000_TSYNCTXCTL_ENABLED; - regval |= tsync_tx_ctl; - E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval); - - /* enable/disable RX */ - regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL); - regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); - regval |= tsync_rx_ctl; - E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval); - - /* define which PTP packets are time stamped */ - E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg); - - /* define ethertype filter for timestamped packets */ - if (is_l2) - E1000_WRITE_REG(hw, E1000_ETQF(3), - (E1000_ETQF_FILTER_ENABLE | /* enable filter */ - E1000_ETQF_1588 | /* enable timestamping */ - ETH_P_1588)); /* 1588 eth protocol type */ - else - E1000_WRITE_REG(hw, E1000_ETQF(3), 0); - - /* L4 Queue Filter[3]: filter by destination port and protocol */ - if (is_l4) { - u32 ftqf = (IPPROTO_UDP /* UDP */ - | E1000_FTQF_VF_BP /* VF not compared */ - | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ - | E1000_FTQF_MASK); /* mask all inputs */ - ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ - - E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT)); - E1000_WRITE_REG(hw, E1000_IMIREXT(3), - (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); - if (hw->mac.type == e1000_82576) { - /* enable source port check */ - E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT)); - ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; - } - E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf); - } else { - E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK); - } - E1000_WRITE_FLUSH(hw); - - /* clear TX/RX time stamp registers, just to be sure */ - regval = E1000_READ_REG(hw, E1000_TXSTMPL); - regval = E1000_READ_REG(hw, E1000_TXSTMPH); - regval = E1000_READ_REG(hw, E1000_RXSTMPL); - regval = E1000_READ_REG(hw, E1000_RXSTMPH); - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -void igb_ptp_init(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - - switch (hw->mac.type) { - case e1000_82576: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); - adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 999999881; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; - adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; - adapter->ptp_caps.gettime = igb_ptp_gettime_82576; - adapter->ptp_caps.settime = igb_ptp_settime_82576; - adapter->ptp_caps.enable = igb_ptp_enable; - adapter->cc.read = igb_ptp_read_82576; - adapter->cc.mask = CLOCKSOURCE_MASK(64); - adapter->cc.mult = 1; - adapter->cc.shift = IGB_82576_TSYNC_SHIFT; - /* Dial the nominal frequency. */ - E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | - INCVALUE_82576); - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); - adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 62499999; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; - adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; - adapter->ptp_caps.gettime = igb_ptp_gettime_82576; - adapter->ptp_caps.settime = igb_ptp_settime_82576; - adapter->ptp_caps.enable = igb_ptp_enable; - adapter->cc.read = igb_ptp_read_82580; - adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); - adapter->cc.mult = 1; - adapter->cc.shift = 0; - /* Enable the timer functions by clearing bit 31. */ - E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); - break; - case e1000_i210: - case e1000_i211: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); - adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 62499999; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; - adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; - adapter->ptp_caps.gettime = igb_ptp_gettime_i210; - adapter->ptp_caps.settime = igb_ptp_settime_i210; - adapter->ptp_caps.enable = igb_ptp_enable; - /* Enable the timer functions by clearing bit 31. */ - E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); - break; - default: - adapter->ptp_clock = NULL; - return; - } - - E1000_WRITE_FLUSH(hw); - - spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); - - /* Initialize the clock and overflow work for devices that need it. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { - struct timespec ts = ktime_to_timespec(ktime_get_real()); - - igb_ptp_settime_i210(&adapter->ptp_caps, &ts); - } else { - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); - - INIT_DELAYED_WORK(&adapter->ptp_overflow_work, - igb_ptp_overflow_check); - - schedule_delayed_work(&adapter->ptp_overflow_work, - IGB_SYSTIM_OVERFLOW_PERIOD); - } - - /* Initialize the time sync interrupts for devices that support it. */ - if (hw->mac.type >= e1000_82580) { - E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); - E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); - } - - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, - &adapter->pdev->dev); - if (IS_ERR(adapter->ptp_clock)) { - adapter->ptp_clock = NULL; - dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else { - dev_info(&adapter->pdev->dev, "added PHC on %s\n", - adapter->netdev->name); - adapter->flags |= IGB_FLAG_PTP; - } -} - -/** - * igb_ptp_stop - Disable PTP device and stop the overflow check. - * @adapter: Board private structure. - * - * This function stops the PTP support and cancels the delayed work. - **/ -void igb_ptp_stop(struct igb_adapter *adapter) -{ - switch (adapter->hw.mac.type) { - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - cancel_delayed_work_sync(&adapter->ptp_overflow_work); - break; - case e1000_i210: - case e1000_i211: - /* No delayed work to cancel. */ - break; - default: - return; - } - - cancel_work_sync(&adapter->ptp_tx_work); - if (adapter->ptp_tx_skb) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - } - - if (adapter->ptp_clock) { - ptp_clock_unregister(adapter->ptp_clock); - dev_info(&adapter->pdev->dev, "removed PHC on %s\n", - adapter->netdev->name); - adapter->flags &= ~IGB_FLAG_PTP; - } -} - -/** - * igb_ptp_reset - Re-enable the adapter for PTP following a reset. - * @adapter: Board private structure. - * - * This function handles the reset work required to re-enable the PTP device. - **/ -void igb_ptp_reset(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - - if (!(adapter->flags & IGB_FLAG_PTP)) - return; - - switch (adapter->hw.mac.type) { - case e1000_82576: - /* Dial the nominal frequency. */ - E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | - INCVALUE_82576); - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - /* Enable the timer functions and interrupts. */ - E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); - E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); - E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); - break; - default: - /* No work to do. */ - return; - } - - /* Re-initialize the timer. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { - struct timespec ts = ktime_to_timespec(ktime_get_real()); - - igb_ptp_settime_i210(&adapter->ptp_caps, &ts); - } else { - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); - } -} diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h index 18da64a3..9d49b45e 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c index 015c8952..205da562 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h index e51e7c4e..c6d4c568 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c deleted file mode 100644 index bde3a83c..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c +++ /dev/null @@ -1,1482 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" -#include "kcompat.h" - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) -/* From lib/vsprintf.c */ -#include - -static int skip_atoi(const char **s) -{ - int i=0; - - while (isdigit(**s)) - i = i*10 + *((*s)++) - '0'; - return i; -} - -#define _kc_ZEROPAD 1 /* pad with zero */ -#define _kc_SIGN 2 /* unsigned/signed long */ -#define _kc_PLUS 4 /* show plus */ -#define _kc_SPACE 8 /* space if plus */ -#define _kc_LEFT 16 /* left justified */ -#define _kc_SPECIAL 32 /* 0x */ -#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ - -static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) -{ - char c,sign,tmp[66]; - const char *digits; - const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; - const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - int i; - - digits = (type & _kc_LARGE) ? large_digits : small_digits; - if (type & _kc_LEFT) - type &= ~_kc_ZEROPAD; - if (base < 2 || base > 36) - return 0; - c = (type & _kc_ZEROPAD) ? '0' : ' '; - sign = 0; - if (type & _kc_SIGN) { - if (num < 0) { - sign = '-'; - num = -num; - size--; - } else if (type & _kc_PLUS) { - sign = '+'; - size--; - } else if (type & _kc_SPACE) { - sign = ' '; - size--; - } - } - if (type & _kc_SPECIAL) { - if (base == 16) - size -= 2; - else if (base == 8) - size--; - } - i = 0; - if (num == 0) - tmp[i++]='0'; - else while (num != 0) - tmp[i++] = digits[do_div(num,base)]; - if (i > precision) - precision = i; - size -= precision; - if (!(type&(_kc_ZEROPAD+_kc_LEFT))) { - while(size-->0) { - if (buf <= end) - *buf = ' '; - ++buf; - } - } - if (sign) { - if (buf <= end) - *buf = sign; - ++buf; - } - if (type & _kc_SPECIAL) { - if (base==8) { - if (buf <= end) - *buf = '0'; - ++buf; - } else if (base==16) { - if (buf <= end) - *buf = '0'; - ++buf; - if (buf <= end) - *buf = digits[33]; - ++buf; - } - } - if (!(type & _kc_LEFT)) { - while (size-- > 0) { - if (buf <= end) - *buf = c; - ++buf; - } - } - while (i < precision--) { - if (buf <= end) - *buf = '0'; - ++buf; - } - while (i-- > 0) { - if (buf <= end) - *buf = tmp[i]; - ++buf; - } - while (size-- > 0) { - if (buf <= end) - *buf = ' '; - ++buf; - } - return buf; -} - -int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) -{ - int len; - unsigned long long num; - int i, base; - char *str, *end, c; - const char *s; - - int flags; /* flags to number() */ - - int field_width; /* width of output field */ - int precision; /* min. # of digits for integers; max - number of chars for from string */ - int qualifier; /* 'h', 'l', or 'L' for integer fields */ - /* 'z' support added 23/7/1999 S.H. */ - /* 'z' changed to 'Z' --davidm 1/25/99 */ - - str = buf; - end = buf + size - 1; - - if (end < buf - 1) { - end = ((void *) -1); - size = end - buf + 1; - } - - for (; *fmt ; ++fmt) { - if (*fmt != '%') { - if (str <= end) - *str = *fmt; - ++str; - continue; - } - - /* process flags */ - flags = 0; - repeat: - ++fmt; /* this also skips first '%' */ - switch (*fmt) { - case '-': flags |= _kc_LEFT; goto repeat; - case '+': flags |= _kc_PLUS; goto repeat; - case ' ': flags |= _kc_SPACE; goto repeat; - case '#': flags |= _kc_SPECIAL; goto repeat; - case '0': flags |= _kc_ZEROPAD; goto repeat; - } - - /* get field width */ - field_width = -1; - if (isdigit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - field_width = va_arg(args, int); - if (field_width < 0) { - field_width = -field_width; - flags |= _kc_LEFT; - } - } - - /* get the precision */ - precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - precision = va_arg(args, int); - } - if (precision < 0) - precision = 0; - } - - /* get the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { - qualifier = *fmt; - ++fmt; - } - - /* default base */ - base = 10; - - switch (*fmt) { - case 'c': - if (!(flags & _kc_LEFT)) { - while (--field_width > 0) { - if (str <= end) - *str = ' '; - ++str; - } - } - c = (unsigned char) va_arg(args, int); - if (str <= end) - *str = c; - ++str; - while (--field_width > 0) { - if (str <= end) - *str = ' '; - ++str; - } - continue; - - case 's': - s = va_arg(args, char *); - if (!s) - s = ""; - - len = strnlen(s, precision); - - if (!(flags & _kc_LEFT)) { - while (len < field_width--) { - if (str <= end) - *str = ' '; - ++str; - } - } - for (i = 0; i < len; ++i) { - if (str <= end) - *str = *s; - ++str; ++s; - } - while (len < field_width--) { - if (str <= end) - *str = ' '; - ++str; - } - continue; - - case 'p': - if (field_width == -1) { - field_width = 2*sizeof(void *); - flags |= _kc_ZEROPAD; - } - str = number(str, end, - (unsigned long) va_arg(args, void *), - 16, field_width, precision, flags); - continue; - - - case 'n': - /* FIXME: - * What does C99 say about the overflow case here? */ - if (qualifier == 'l') { - long * ip = va_arg(args, long *); - *ip = (str - buf); - } else if (qualifier == 'Z') { - size_t * ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int * ip = va_arg(args, int *); - *ip = (str - buf); - } - continue; - - case '%': - if (str <= end) - *str = '%'; - ++str; - continue; - - /* integer number formats - set up the flags and "break" */ - case 'o': - base = 8; - break; - - case 'X': - flags |= _kc_LARGE; - case 'x': - base = 16; - break; - - case 'd': - case 'i': - flags |= _kc_SIGN; - case 'u': - break; - - default: - if (str <= end) - *str = '%'; - ++str; - if (*fmt) { - if (str <= end) - *str = *fmt; - ++str; - } else { - --fmt; - } - continue; - } - if (qualifier == 'L') - num = va_arg(args, long long); - else if (qualifier == 'l') { - num = va_arg(args, unsigned long); - if (flags & _kc_SIGN) - num = (signed long) num; - } else if (qualifier == 'Z') { - num = va_arg(args, size_t); - } else if (qualifier == 'h') { - num = (unsigned short) va_arg(args, int); - if (flags & _kc_SIGN) - num = (signed short) num; - } else { - num = va_arg(args, unsigned int); - if (flags & _kc_SIGN) - num = (signed int) num; - } - str = number(str, end, num, base, - field_width, precision, flags); - } - if (str <= end) - *str = '\0'; - else if (size > 0) - /* don't write out a null byte if the buf size is zero */ - *end = '\0'; - /* the trailing null byte doesn't count towards the total - * ++str; - */ - return str-buf; -} - -int _kc_snprintf(char * buf, size_t size, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i = _kc_vsnprintf(buf,size,fmt,args); - va_end(args); - return i; -} -#endif /* < 2.4.8 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) - -/**************************************/ -/* PCI DMA MAPPING */ - -#if defined(CONFIG_HIGHMEM) - -#ifndef PCI_DRAM_OFFSET -#define PCI_DRAM_OFFSET 0 -#endif - -u64 -_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, - size_t size, int direction) -{ - return (((u64) (page - mem_map) << PAGE_SHIFT) + offset + - PCI_DRAM_OFFSET); -} - -#else /* CONFIG_HIGHMEM */ - -u64 -_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, - size_t size, int direction) -{ - return pci_map_single(dev, (void *)page_address(page) + offset, size, - direction); -} - -#endif /* CONFIG_HIGHMEM */ - -void -_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, - int direction) -{ - return pci_unmap_single(dev, dma_addr, size, direction); -} - -#endif /* 2.4.13 => 2.4.3 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) - -/**************************************/ -/* PCI DRIVER API */ - -int -_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) -{ - if (!pci_dma_supported(dev, mask)) - return -EIO; - dev->dma_mask = mask; - return 0; -} - -int -_kc_pci_request_regions(struct pci_dev *dev, char *res_name) -{ - int i; - - for (i = 0; i < 6; i++) { - if (pci_resource_len(dev, i) == 0) - continue; - - if (pci_resource_flags(dev, i) & IORESOURCE_IO) { - if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { - pci_release_regions(dev); - return -EBUSY; - } - } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) { - if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { - pci_release_regions(dev); - return -EBUSY; - } - } - } - return 0; -} - -void -_kc_pci_release_regions(struct pci_dev *dev) -{ - int i; - - for (i = 0; i < 6; i++) { - if (pci_resource_len(dev, i) == 0) - continue; - - if (pci_resource_flags(dev, i) & IORESOURCE_IO) - release_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); - - else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) - release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); - } -} - -/**************************************/ -/* NETWORK DRIVER API */ - -struct net_device * -_kc_alloc_etherdev(int sizeof_priv) -{ - struct net_device *dev; - int alloc_size; - - alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31; - dev = kzalloc(alloc_size, GFP_KERNEL); - if (!dev) - return NULL; - - if (sizeof_priv) - dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31); - dev->name[0] = '\0'; - ether_setup(dev); - - return dev; -} - -int -_kc_is_valid_ether_addr(u8 *addr) -{ - const char zaddr[6] = { 0, }; - - return !(addr[0] & 1) && memcmp(addr, zaddr, 6); -} - -#endif /* 2.4.3 => 2.4.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) - -int -_kc_pci_set_power_state(struct pci_dev *dev, int state) -{ - return 0; -} - -int -_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable) -{ - return 0; -} - -#endif /* 2.4.6 => 2.4.3 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, - int off, int size) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; - frag->page_offset = off; - frag->size = size; - skb_shinfo(skb)->nr_frags = i + 1; -} - -/* - * Original Copyright: - * find_next_bit.c: fallback find next bit implementation - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + ffs(tmp); -} - -size_t _kc_strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = strlen(src); - - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; -} - -#ifndef do_div -#if BITS_PER_LONG == 32 -uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base) -{ - uint64_t rem = *n; - uint64_t b = base; - uint64_t res, d = 1; - uint32_t high = rem >> 32; - - /* Reduce the thing a bit first */ - res = 0; - if (high >= base) { - high /= base; - res = (uint64_t) high << 32; - rem -= (uint64_t) (high*base) << 32; - } - - while ((int64_t)b > 0 && b < rem) { - b = b+b; - d = d+d; - } - - do { - if (rem >= b) { - rem -= b; - res += d; - } - b >>= 1; - d >>= 1; - } while (d); - - *n = res; - return rem; -} -#endif /* BITS_PER_LONG == 32 */ -#endif /* do_div */ -#endif /* 2.6.0 => 2.4.6 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) -int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i = vsnprintf(buf, size, fmt, args); - va_end(args); - return (i >= size) ? (size - 1) : i; -} -#endif /* < 2.6.4 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) -DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1}; -#endif /* < 2.6.10 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) -char *_kc_kstrdup(const char *s, unsigned int gfp) -{ - size_t len; - char *buf; - - if (!s) - return NULL; - - len = strlen(s) + 1; - buf = kmalloc(len, gfp); - if (buf) - memcpy(buf, s, len); - return buf; -} -#endif /* < 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) -void *_kc_kzalloc(size_t size, int flags) -{ - void *ret = kmalloc(size, flags); - if (ret) - memset(ret, 0, size); - return ret; -} -#endif /* <= 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) -int _kc_skb_pad(struct sk_buff *skb, int pad) -{ - int ntail; - - /* If the skbuff is non linear tailroom is always zero.. */ - if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) { - memset(skb->data+skb->len, 0, pad); - return 0; - } - - ntail = skb->data_len + pad - (skb->end - skb->tail); - if (likely(skb_cloned(skb) || ntail > 0)) { - if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC)); - goto free_skb; - } - -#ifdef MAX_SKB_FRAGS - if (skb_is_nonlinear(skb) && - !__pskb_pull_tail(skb, skb->data_len)) - goto free_skb; - -#endif - memset(skb->data + skb->len, 0, pad); - return 0; - -free_skb: - kfree_skb(skb); - return -ENOMEM; -} - -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) -int _kc_pci_save_state(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct adapter_struct *adapter = netdev_priv(netdev); - int size = PCI_CONFIG_SPACE_LEN, i; - u16 pcie_cap_offset, pcie_link_status; - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) - /* no ->dev for 2.4 kernels */ - WARN_ON(pdev->dev.driver_data == NULL); -#endif - pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (pcie_cap_offset) { - if (!pci_read_config_word(pdev, - pcie_cap_offset + PCIE_LINK_STATUS, - &pcie_link_status)) - size = PCIE_CONFIG_SPACE_LEN; - } - pci_config_space_ich8lan(); -#ifdef HAVE_PCI_ERS - if (adapter->config_space == NULL) -#else - WARN_ON(adapter->config_space != NULL); -#endif - adapter->config_space = kmalloc(size, GFP_KERNEL); - if (!adapter->config_space) { - printk(KERN_ERR "Out of memory in pci_save_state\n"); - return -ENOMEM; - } - for (i = 0; i < (size / 4); i++) - pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]); - return 0; -} - -void _kc_pci_restore_state(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct adapter_struct *adapter = netdev_priv(netdev); - int size = PCI_CONFIG_SPACE_LEN, i; - u16 pcie_cap_offset; - u16 pcie_link_status; - - if (adapter->config_space != NULL) { - pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (pcie_cap_offset && - !pci_read_config_word(pdev, - pcie_cap_offset + PCIE_LINK_STATUS, - &pcie_link_status)) - size = PCIE_CONFIG_SPACE_LEN; - - pci_config_space_ich8lan(); - for (i = 0; i < (size / 4); i++) - pci_write_config_dword(pdev, i * 4, adapter->config_space[i]); -#ifndef HAVE_PCI_ERS - kfree(adapter->config_space); - adapter->config_space = NULL; -#endif - } -} -#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ - -#ifdef HAVE_PCI_ERS -void _kc_free_netdev(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - - if (adapter->config_space != NULL) - kfree(adapter->config_space); -#ifdef CONFIG_SYSFS - if (netdev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)netdev - netdev->padded); - } else { - BUG_ON(netdev->reg_state != NETREG_UNREGISTERED); - netdev->reg_state = NETREG_RELEASED; - class_device_put(&netdev->class_dev); - } -#else - kfree((char *)netdev - netdev->padded); -#endif -} -#endif - -void *_kc_kmemdup(const void *src, size_t len, unsigned gfp) -{ - void *p; - - p = kzalloc(len, gfp); - if (p) - memcpy(p, src, len); - return p; -} -#endif /* <= 2.6.19 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) -struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev) -{ - return ((struct adapter_struct *)netdev_priv(netdev))->pdev; -} -#endif /* < 2.6.21 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) -/* hexdump code taken from lib/hexdump.c */ -static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize, - int groupsize, unsigned char *linebuf, - size_t linebuflen, bool ascii) -{ - const u8 *ptr = buf; - u8 ch; - int j, lx = 0; - int ascii_column; - - if (rowsize != 16 && rowsize != 32) - rowsize = 16; - - if (!len) - goto nil; - if (len > rowsize) /* limit to one line at a time */ - len = rowsize; - if ((len % groupsize) != 0) /* no mixed size output */ - groupsize = 1; - - switch (groupsize) { - case 8: { - const u64 *ptr8 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%16.16llx", j ? " " : "", - (unsigned long long)*(ptr8 + j)); - ascii_column = 17 * ngroups + 2; - break; - } - - case 4: { - const u32 *ptr4 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%8.8x", j ? " " : "", *(ptr4 + j)); - ascii_column = 9 * ngroups + 2; - break; - } - - case 2: { - const u16 *ptr2 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%4.4x", j ? " " : "", *(ptr2 + j)); - ascii_column = 5 * ngroups + 2; - break; - } - - default: - for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { - ch = ptr[j]; - linebuf[lx++] = hex_asc(ch >> 4); - linebuf[lx++] = hex_asc(ch & 0x0f); - linebuf[lx++] = ' '; - } - if (j) - lx--; - - ascii_column = 3 * rowsize + 2; - break; - } - if (!ascii) - goto nil; - - while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) - linebuf[lx++] = ' '; - for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) - linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] - : '.'; -nil: - linebuf[lx++] = '\0'; -} - -void _kc_print_hex_dump(const char *level, - const char *prefix_str, int prefix_type, - int rowsize, int groupsize, - const void *buf, size_t len, bool ascii) -{ - const u8 *ptr = buf; - int i, linelen, remaining = len; - unsigned char linebuf[200]; - - if (rowsize != 16 && rowsize != 32) - rowsize = 16; - - for (i = 0; i < len; i += rowsize) { - linelen = min(remaining, rowsize); - remaining -= rowsize; - _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, - linebuf, sizeof(linebuf), ascii); - - switch (prefix_type) { - case DUMP_PREFIX_ADDRESS: - printk("%s%s%*p: %s\n", level, prefix_str, - (int)(2 * sizeof(void *)), ptr + i, linebuf); - break; - case DUMP_PREFIX_OFFSET: - printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); - break; - default: - printk("%s%s%s\n", level, prefix_str, linebuf); - break; - } - } -} - -#ifdef HAVE_I2C_SUPPORT -struct i2c_client * -_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info) -{ - struct i2c_client *client; - int status; - - client = kzalloc(sizeof *client, GFP_KERNEL); - if (!client) - return NULL; - - client->adapter = adap; - - client->dev.platform_data = info->platform_data; - - client->flags = info->flags; - client->addr = info->addr; - - strlcpy(client->name, info->type, sizeof(client->name)); - - /* Check for address business */ - status = i2c_check_addr(adap, client->addr); - if (status) - goto out_err; - - client->dev.parent = &client->adapter->dev; - client->dev.bus = &i2c_bus_type; - - status = i2c_attach_client(client); - if (status) - goto out_err; - - dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n", - client->name, dev_name(&client->dev)); - - return client; - -out_err: - dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x " - "(%d)\n", client->name, client->addr, status); - kfree(client); - return NULL; -} -#endif /* HAVE_I2C_SUPPORT */ -#endif /* < 2.6.22 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) -#ifdef NAPI -struct net_device *napi_to_poll_dev(const struct napi_struct *napi) -{ - struct adapter_q_vector *q_vector = container_of(napi, - struct adapter_q_vector, - napi); - return &q_vector->poll_dev; -} - -int __kc_adapter_clean(struct net_device *netdev, int *budget) -{ - int work_done; - int work_to_do = min(*budget, netdev->quota); - /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */ - struct napi_struct *napi = netdev->priv; - work_done = napi->poll(napi, work_to_do); - *budget -= work_done; - netdev->quota -= work_done; - return (work_done >= work_to_do) ? 1 : 0; -} -#endif /* NAPI */ -#endif /* <= 2.6.24 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) -void _kc_pci_disable_link_state(struct pci_dev *pdev, int state) -{ - struct pci_dev *parent = pdev->bus->self; - u16 link_state; - int pos; - - if (!parent) - return; - - pos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (pos) { - pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state); - link_state &= ~state; - pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state); - } -} -#endif /* < 2.6.26 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) -#ifdef HAVE_TX_MQ -void _kc_netif_tx_stop_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_stop_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_stop_subqueue(netdev, i); -} -void _kc_netif_tx_wake_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_wake_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_wake_subqueue(netdev, i); -} -void _kc_netif_tx_start_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_start_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_start_subqueue(netdev, i); -} -#endif /* HAVE_TX_MQ */ - -#ifndef __WARN_printf -void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...) -{ - va_list args; - - printk(KERN_WARNING "------------[ cut here ]------------\n"); - printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line); - va_start(args, fmt); - vprintk(fmt, args); - va_end(args); - - dump_stack(); -} -#endif /* __WARN_printf */ -#endif /* < 2.6.27 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) - -int -_kc_pci_prepare_to_sleep(struct pci_dev *dev) -{ - pci_power_t target_state; - int error; - - target_state = pci_choose_state(dev, PMSG_SUSPEND); - - pci_enable_wake(dev, target_state, true); - - error = pci_set_power_state(dev, target_state); - - if (error) - pci_enable_wake(dev, target_state, false); - - return error; -} - -int -_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable) -{ - int err; - - err = pci_enable_wake(dev, PCI_D3cold, enable); - if (err) - goto out; - - err = pci_enable_wake(dev, PCI_D3hot, enable); - -out: - return err; -} -#endif /* < 2.6.28 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) -static void __kc_pci_set_master(struct pci_dev *pdev, bool enable) -{ - u16 old_cmd, cmd; - - pci_read_config_word(pdev, PCI_COMMAND, &old_cmd); - if (enable) - cmd = old_cmd | PCI_COMMAND_MASTER; - else - cmd = old_cmd & ~PCI_COMMAND_MASTER; - if (cmd != old_cmd) { - dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n", - enable ? "enabling" : "disabling"); - pci_write_config_word(pdev, PCI_COMMAND, cmd); - } -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) ) - pdev->is_busmaster = enable; -#endif -} - -void _kc_pci_clear_master(struct pci_dev *dev) -{ - __kc_pci_set_master(dev, false); -} -#endif /* < 2.6.29 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) -#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) -int _kc_pci_num_vf(struct pci_dev *dev) -{ - int num_vf = 0; -#ifdef CONFIG_PCI_IOV - struct pci_dev *vfdev; - - /* loop through all ethernet devices starting at PF dev */ - vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL); - while (vfdev) { - if (vfdev->is_virtfn && vfdev->physfn == dev) - num_vf++; - - vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev); - } - -#endif - return num_vf; -} -#endif /* RHEL_RELEASE_CODE */ -#endif /* < 2.6.34 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) -#ifdef HAVE_TX_MQ -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))) -#ifndef CONFIG_NETDEVICES_MULTIQUEUE -void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) -{ - unsigned int real_num = dev->real_num_tx_queues; - struct Qdisc *qdisc; - int i; - - if (unlikely(txq > dev->num_tx_queues)) - ; - else if (txq > real_num) - dev->real_num_tx_queues = txq; - else if ( txq < real_num) { - dev->real_num_tx_queues = txq; - for (i = txq; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; - if (qdisc) { - spin_lock_bh(qdisc_lock(qdisc)); - qdisc_reset(qdisc); - spin_unlock_bh(qdisc_lock(qdisc)); - } - } - } -} -#endif /* CONFIG_NETDEVICES_MULTIQUEUE */ -#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */ -#endif /* HAVE_TX_MQ */ - -ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos, - const void __user *from, size_t count) -{ - loff_t pos = *ppos; - size_t res; - - if (pos < 0) - return -EINVAL; - if (pos >= available || !count) - return 0; - if (count > available - pos) - count = available - pos; - res = copy_from_user(to + pos, from, count); - if (res == count) - return -EFAULT; - count -= res; - *ppos = pos + count; - return count; -} - -#endif /* < 2.6.35 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) -static const u32 _kc_flags_dup_features = - (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); - -u32 _kc_ethtool_op_get_flags(struct net_device *dev) -{ - return dev->features & _kc_flags_dup_features; -} - -int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) -{ - if (data & ~supported) - return -EINVAL; - - dev->features = ((dev->features & ~_kc_flags_dup_features) | - (data & _kc_flags_dup_features)); - return 0; -} -#endif /* < 2.6.36 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) - - - -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ -#endif /* < 2.6.39 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) -void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, - int off, int size, unsigned int truesize) -{ - skb_fill_page_desc(skb, i, page, off, size); - skb->len += size; - skb->data_len += size; - skb->truesize += truesize; -} - -int _kc_simple_open(struct inode *inode, struct file *file) -{ - if (inode->i_private) - file->private_data = inode->i_private; - - return 0; -} - -#endif /* < 3.4.0 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) ) -#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ - !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) -static inline int __kc_pcie_cap_version(struct pci_dev *dev) -{ - int pos; - u16 reg16; - - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); - return reg16 & PCI_EXP_FLAGS_VERS; -} - -static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev) -{ - return true; -} - -static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev) -{ - int type = pci_pcie_type(dev); - - return __kc_pcie_cap_version(dev) > 1 || - type == PCI_EXP_TYPE_ROOT_PORT || - type == PCI_EXP_TYPE_ENDPOINT || - type == PCI_EXP_TYPE_LEG_END; -} - -static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev) -{ - int type = pci_pcie_type(dev); - int pos; - u16 pcie_flags_reg; - - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg); - - return __kc_pcie_cap_version(dev) > 1 || - type == PCI_EXP_TYPE_ROOT_PORT || - (type == PCI_EXP_TYPE_DOWNSTREAM && - pcie_flags_reg & PCI_EXP_FLAGS_SLOT); -} - -static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev) -{ - int type = pci_pcie_type(dev); - - return __kc_pcie_cap_version(dev) > 1 || - type == PCI_EXP_TYPE_ROOT_PORT || - type == PCI_EXP_TYPE_RC_EC; -} - -static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos) -{ - if (!pci_is_pcie(dev)) - return false; - - switch (pos) { - case PCI_EXP_FLAGS_TYPE: - return true; - case PCI_EXP_DEVCAP: - case PCI_EXP_DEVCTL: - case PCI_EXP_DEVSTA: - return __kc_pcie_cap_has_devctl(dev); - case PCI_EXP_LNKCAP: - case PCI_EXP_LNKCTL: - case PCI_EXP_LNKSTA: - return __kc_pcie_cap_has_lnkctl(dev); - case PCI_EXP_SLTCAP: - case PCI_EXP_SLTCTL: - case PCI_EXP_SLTSTA: - return __kc_pcie_cap_has_sltctl(dev); - case PCI_EXP_RTCTL: - case PCI_EXP_RTCAP: - case PCI_EXP_RTSTA: - return __kc_pcie_cap_has_rtctl(dev); - case PCI_EXP_DEVCAP2: - case PCI_EXP_DEVCTL2: - case PCI_EXP_LNKCAP2: - case PCI_EXP_LNKCTL2: - case PCI_EXP_LNKSTA2: - return __kc_pcie_cap_version(dev) > 1; - default: - return false; - } -} - -/* - * Note that these accessor functions are only for the "PCI Express - * Capability" (see PCIe spec r3.0, sec 7.8). They do not apply to the - * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.) - */ -int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val) -{ - int ret; - - *val = 0; - if (pos & 1) - return -EINVAL; - - if (__kc_pcie_capability_reg_implemented(dev, pos)) { - ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val); - /* - * Reset *val to 0 if pci_read_config_word() fails, it may - * have been written as 0xFFFF if hardware error happens - * during pci_read_config_word(). - */ - if (ret) - *val = 0; - return ret; - } - - /* - * For Functions that do not implement the Slot Capabilities, - * Slot Status, and Slot Control registers, these spaces must - * be hardwired to 0b, with the exception of the Presence Detect - * State bit in the Slot Status register of Downstream Ports, - * which must be hardwired to 1b. (PCIe Base Spec 3.0, sec 7.8) - */ - if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA && - pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) { - *val = PCI_EXP_SLTSTA_PDS; - } - - return 0; -} - -int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val) -{ - if (pos & 1) - return -EINVAL; - - if (!__kc_pcie_capability_reg_implemented(dev, pos)) - return 0; - - return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val); -} - -int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, - u16 clear, u16 set) -{ - int ret; - u16 val; - - ret = __kc_pcie_capability_read_word(dev, pos, &val); - if (!ret) { - val &= ~clear; - val |= set; - ret = __kc_pcie_capability_write_word(dev, pos, val); - } - - return ret; -} -#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ - !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */ -#endif /* < 3.7.0 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) ) -#endif /* 3.9.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) -#ifdef CONFIG_PCI_IOV -int __kc_pci_vfs_assigned(struct pci_dev *dev) -{ - unsigned int vfs_assigned = 0; -#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED - int pos; - struct pci_dev *vfdev; - unsigned short dev_id; - - /* only search if we are a PF */ - if (!dev->is_physfn) - return 0; - - /* find SR-IOV capability */ - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); - if (!pos) - return 0; - - /* - * determine the device ID for the VFs, the vendor ID will be the - * same as the PF so there is no need to check for that one - */ - pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id); - - /* loop through all the VFs to see if we own any that are assigned */ - vfdev = pci_get_device(dev->vendor, dev_id, NULL); - while (vfdev) { - /* - * It is considered assigned if it is a virtual function with - * our dev as the physical function and the assigned bit is set - */ - if (vfdev->is_virtfn && (vfdev->physfn == dev) && - (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)) - vfs_assigned++; - - vfdev = pci_get_device(dev->vendor, dev_id, vfdev); - } - -#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */ - return vfs_assigned; -} - -#endif /* CONFIG_PCI_IOV */ -#endif /* 3.10.0 */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index e2cf71e0..84826b26 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List @@ -3891,7 +3891,7 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \ || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) #define HAVE_NDO_DFLT_BRIDGE_ADD_MASK -#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) ) #define HAVE_NDO_FDB_ADD_VID #endif /* !RHEL 7.2 */ #endif /* >= 3.19.0 */ @@ -3901,12 +3901,13 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) /* vlan_tx_xx functions got renamed to skb_vlan */ #define vlan_tx_tag_get skb_vlan_tag_get #define vlan_tx_tag_present skb_vlan_tag_present -#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) ) #define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS #endif /* !RHEL 7.2 */ #endif /* 4.0.0 */ -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,3) )) /* ndo_bridge_getlink adds new nlflags parameter */ #define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS #endif /* >= 4.1.0 */ @@ -3915,4 +3916,21 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) /* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */ #define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL #endif /* >= 4.2.0 */ + +/* + * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4) + * For older kernels backported this commit, need to use renamed functions. + * This fix is specific to RedHat/CentOS kernels. + */ +#if (defined(RHEL_RELEASE_CODE) && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) +#define vlan_tx_tag_get skb_vlan_tag_get +#define vlan_tx_tag_present skb_vlan_tag_present +#endif + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) ) +#define HAVE_VF_VLAN_PROTO +#endif /* >= 4.9.0 */ + #endif /* _KCOMPAT_H_ */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c deleted file mode 100644 index e1a89388..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c +++ /dev/null @@ -1,1171 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/* - * net/core/ethtool.c - Ethtool ioctl handler - * Copyright (c) 2003 Matthew Wilcox - * - * This file is where we call all the ethtool_ops commands to get - * the information ethtool needs. We fall back to calling do_ioctl() - * for drivers which haven't been converted to ethtool_ops yet. - * - * It's GPL, stupid. - * - * Modification by sfeldma@pobox.com to work as backward compat - * solution for pre-ethtool_ops kernels. - * - copied struct ethtool_ops from ethtool.h - * - defined SET_ETHTOOL_OPS - * - put in some #ifndef NETIF_F_xxx wrappers - * - changes refs to dev->ethtool_ops to ethtool_ops - * - changed dev_ethtool to ethtool_ioctl - * - remove EXPORT_SYMBOL()s - * - added _kc_ prefix in built-in ethtool_op_xxx ops. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "kcompat.h" - -#undef SUPPORTED_10000baseT_Full -#define SUPPORTED_10000baseT_Full (1 << 12) -#undef ADVERTISED_10000baseT_Full -#define ADVERTISED_10000baseT_Full (1 << 12) -#undef SPEED_10000 -#define SPEED_10000 10000 - -#undef ethtool_ops -#define ethtool_ops _kc_ethtool_ops - -struct _kc_ethtool_ops { - int (*get_settings)(struct net_device *, struct ethtool_cmd *); - int (*set_settings)(struct net_device *, struct ethtool_cmd *); - void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); - int (*get_regs_len)(struct net_device *); - void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); - void (*get_wol)(struct net_device *, struct ethtool_wolinfo *); - int (*set_wol)(struct net_device *, struct ethtool_wolinfo *); - u32 (*get_msglevel)(struct net_device *); - void (*set_msglevel)(struct net_device *, u32); - int (*nway_reset)(struct net_device *); - u32 (*get_link)(struct net_device *); - int (*get_eeprom_len)(struct net_device *); - int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*get_coalesce)(struct net_device *, struct ethtool_coalesce *); - int (*set_coalesce)(struct net_device *, struct ethtool_coalesce *); - void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *); - int (*set_ringparam)(struct net_device *, struct ethtool_ringparam *); - void (*get_pauseparam)(struct net_device *, - struct ethtool_pauseparam*); - int (*set_pauseparam)(struct net_device *, - struct ethtool_pauseparam*); - u32 (*get_rx_csum)(struct net_device *); - int (*set_rx_csum)(struct net_device *, u32); - u32 (*get_tx_csum)(struct net_device *); - int (*set_tx_csum)(struct net_device *, u32); - u32 (*get_sg)(struct net_device *); - int (*set_sg)(struct net_device *, u32); - u32 (*get_tso)(struct net_device *); - int (*set_tso)(struct net_device *, u32); - int (*self_test_count)(struct net_device *); - void (*self_test)(struct net_device *, struct ethtool_test *, u64 *); - void (*get_strings)(struct net_device *, u32 stringset, u8 *); - int (*phys_id)(struct net_device *, u32); - int (*get_stats_count)(struct net_device *); - void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, - u64 *); -} *ethtool_ops = NULL; - -#undef SET_ETHTOOL_OPS -#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops)) - -/* - * Some useful ethtool_ops methods that are device independent. If we find that - * all drivers want to do the same thing here, we can turn these into dev_() - * function calls. - */ - -#undef ethtool_op_get_link -#define ethtool_op_get_link _kc_ethtool_op_get_link -u32 _kc_ethtool_op_get_link(struct net_device *dev) -{ - return netif_carrier_ok(dev) ? 1 : 0; -} - -#undef ethtool_op_get_tx_csum -#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum -u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev) -{ -#ifdef NETIF_F_IP_CSUM - return (dev->features & NETIF_F_IP_CSUM) != 0; -#else - return 0; -#endif -} - -#undef ethtool_op_set_tx_csum -#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum -int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data) -{ -#ifdef NETIF_F_IP_CSUM - if (data) -#ifdef NETIF_F_IPV6_CSUM - dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - else - dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); -#else - dev->features |= NETIF_F_IP_CSUM; - else - dev->features &= ~NETIF_F_IP_CSUM; -#endif -#endif - - return 0; -} - -#undef ethtool_op_get_sg -#define ethtool_op_get_sg _kc_ethtool_op_get_sg -u32 _kc_ethtool_op_get_sg(struct net_device *dev) -{ -#ifdef NETIF_F_SG - return (dev->features & NETIF_F_SG) != 0; -#else - return 0; -#endif -} - -#undef ethtool_op_set_sg -#define ethtool_op_set_sg _kc_ethtool_op_set_sg -int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data) -{ -#ifdef NETIF_F_SG - if (data) - dev->features |= NETIF_F_SG; - else - dev->features &= ~NETIF_F_SG; -#endif - - return 0; -} - -#undef ethtool_op_get_tso -#define ethtool_op_get_tso _kc_ethtool_op_get_tso -u32 _kc_ethtool_op_get_tso(struct net_device *dev) -{ -#ifdef NETIF_F_TSO - return (dev->features & NETIF_F_TSO) != 0; -#else - return 0; -#endif -} - -#undef ethtool_op_set_tso -#define ethtool_op_set_tso _kc_ethtool_op_set_tso -int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data) -{ -#ifdef NETIF_F_TSO - if (data) - dev->features |= NETIF_F_TSO; - else - dev->features &= ~NETIF_F_TSO; -#endif - - return 0; -} - -/* Handlers for each ethtool command */ - -static int ethtool_get_settings(struct net_device *dev, void *useraddr) -{ - struct ethtool_cmd cmd = { ETHTOOL_GSET }; - int err; - - if (!ethtool_ops->get_settings) - return -EOPNOTSUPP; - - err = ethtool_ops->get_settings(dev, &cmd); - if (err < 0) - return err; - - if (copy_to_user(useraddr, &cmd, sizeof(cmd))) - return -EFAULT; - return 0; -} - -static int ethtool_set_settings(struct net_device *dev, void *useraddr) -{ - struct ethtool_cmd cmd; - - if (!ethtool_ops->set_settings) - return -EOPNOTSUPP; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - - return ethtool_ops->set_settings(dev, &cmd); -} - -static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr) -{ - struct ethtool_drvinfo info; - struct ethtool_ops *ops = ethtool_ops; - - if (!ops->get_drvinfo) - return -EOPNOTSUPP; - - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GDRVINFO; - ops->get_drvinfo(dev, &info); - - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); - if (ops->get_regs_len) - info.regdump_len = ops->get_regs_len(dev); - if (ops->get_eeprom_len) - info.eedump_len = ops->get_eeprom_len(dev); - - if (copy_to_user(useraddr, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -static int ethtool_get_regs(struct net_device *dev, char *useraddr) -{ - struct ethtool_regs regs; - struct ethtool_ops *ops = ethtool_ops; - void *regbuf; - int reglen, ret; - - if (!ops->get_regs || !ops->get_regs_len) - return -EOPNOTSUPP; - - if (copy_from_user(®s, useraddr, sizeof(regs))) - return -EFAULT; - - reglen = ops->get_regs_len(dev); - if (regs.len > reglen) - regs.len = reglen; - - regbuf = kmalloc(reglen, GFP_USER); - if (!regbuf) - return -ENOMEM; - - ops->get_regs(dev, ®s, regbuf); - - ret = -EFAULT; - if (copy_to_user(useraddr, ®s, sizeof(regs))) - goto out; - useraddr += offsetof(struct ethtool_regs, data); - if (copy_to_user(useraddr, regbuf, reglen)) - goto out; - ret = 0; - -out: - kfree(regbuf); - return ret; -} - -static int ethtool_get_wol(struct net_device *dev, char *useraddr) -{ - struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; - - if (!ethtool_ops->get_wol) - return -EOPNOTSUPP; - - ethtool_ops->get_wol(dev, &wol); - - if (copy_to_user(useraddr, &wol, sizeof(wol))) - return -EFAULT; - return 0; -} - -static int ethtool_set_wol(struct net_device *dev, char *useraddr) -{ - struct ethtool_wolinfo wol; - - if (!ethtool_ops->set_wol) - return -EOPNOTSUPP; - - if (copy_from_user(&wol, useraddr, sizeof(wol))) - return -EFAULT; - - return ethtool_ops->set_wol(dev, &wol); -} - -static int ethtool_get_msglevel(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GMSGLVL }; - - if (!ethtool_ops->get_msglevel) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_msglevel(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_msglevel(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_msglevel) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - ethtool_ops->set_msglevel(dev, edata.data); - return 0; -} - -static int ethtool_nway_reset(struct net_device *dev) -{ - if (!ethtool_ops->nway_reset) - return -EOPNOTSUPP; - - return ethtool_ops->nway_reset(dev); -} - -static int ethtool_get_link(struct net_device *dev, void *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GLINK }; - - if (!ethtool_ops->get_link) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_link(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_get_eeprom(struct net_device *dev, void *useraddr) -{ - struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = ethtool_ops; - u8 *data; - int ret; - - if (!ops->get_eeprom || !ops->get_eeprom_len) - return -EOPNOTSUPP; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) - return -EINVAL; - - data = kmalloc(eeprom.len, GFP_USER); - if (!data) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) - goto out; - - ret = ops->get_eeprom(dev, &eeprom, data); - if (ret) - goto out; - - ret = -EFAULT; - if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) - goto out; - if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_set_eeprom(struct net_device *dev, void *useraddr) -{ - struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = ethtool_ops; - u8 *data; - int ret; - - if (!ops->set_eeprom || !ops->get_eeprom_len) - return -EOPNOTSUPP; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) - return -EINVAL; - - data = kmalloc(eeprom.len, GFP_USER); - if (!data) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) - goto out; - - ret = ops->set_eeprom(dev, &eeprom, data); - if (ret) - goto out; - - if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) - ret = -EFAULT; - -out: - kfree(data); - return ret; -} - -static int ethtool_get_coalesce(struct net_device *dev, void *useraddr) -{ - struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; - - if (!ethtool_ops->get_coalesce) - return -EOPNOTSUPP; - - ethtool_ops->get_coalesce(dev, &coalesce); - - if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) - return -EFAULT; - return 0; -} - -static int ethtool_set_coalesce(struct net_device *dev, void *useraddr) -{ - struct ethtool_coalesce coalesce; - - if (!ethtool_ops->get_coalesce) - return -EOPNOTSUPP; - - if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) - return -EFAULT; - - return ethtool_ops->set_coalesce(dev, &coalesce); -} - -static int ethtool_get_ringparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; - - if (!ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - ethtool_ops->get_ringparam(dev, &ringparam); - - if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_ringparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_ringparam ringparam; - - if (!ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) - return -EFAULT; - - return ethtool_ops->set_ringparam(dev, &ringparam); -} - -static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; - - if (!ethtool_ops->get_pauseparam) - return -EOPNOTSUPP; - - ethtool_ops->get_pauseparam(dev, &pauseparam); - - if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_pauseparam pauseparam; - - if (!ethtool_ops->get_pauseparam) - return -EOPNOTSUPP; - - if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) - return -EFAULT; - - return ethtool_ops->set_pauseparam(dev, &pauseparam); -} - -static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GRXCSUM }; - - if (!ethtool_ops->get_rx_csum) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_rx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_rx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - ethtool_ops->set_rx_csum(dev, edata.data); - return 0; -} - -static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTXCSUM }; - - if (!ethtool_ops->get_tx_csum) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_tx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_tx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return ethtool_ops->set_tx_csum(dev, edata.data); -} - -static int ethtool_get_sg(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GSG }; - - if (!ethtool_ops->get_sg) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_sg(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_sg(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_sg) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return ethtool_ops->set_sg(dev, edata.data); -} - -static int ethtool_get_tso(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTSO }; - - if (!ethtool_ops->get_tso) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_tso(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_tso(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_tso) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return ethtool_ops->set_tso(dev, edata.data); -} - -static int ethtool_self_test(struct net_device *dev, char *useraddr) -{ - struct ethtool_test test; - struct ethtool_ops *ops = ethtool_ops; - u64 *data; - int ret; - - if (!ops->self_test || !ops->self_test_count) - return -EOPNOTSUPP; - - if (copy_from_user(&test, useraddr, sizeof(test))) - return -EFAULT; - - test.len = ops->self_test_count(dev); - data = kmalloc(test.len * sizeof(u64), GFP_USER); - if (!data) - return -ENOMEM; - - ops->self_test(dev, &test, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &test, sizeof(test))) - goto out; - useraddr += sizeof(test); - if (copy_to_user(useraddr, data, test.len * sizeof(u64))) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_get_strings(struct net_device *dev, void *useraddr) -{ - struct ethtool_gstrings gstrings; - struct ethtool_ops *ops = ethtool_ops; - u8 *data; - int ret; - - if (!ops->get_strings) - return -EOPNOTSUPP; - - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) - return -EFAULT; - - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; - } - - data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); - if (!data) - return -ENOMEM; - - ops->get_strings(dev, gstrings.string_set, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) - goto out; - useraddr += sizeof(gstrings); - if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_phys_id(struct net_device *dev, void *useraddr) -{ - struct ethtool_value id; - - if (!ethtool_ops->phys_id) - return -EOPNOTSUPP; - - if (copy_from_user(&id, useraddr, sizeof(id))) - return -EFAULT; - - return ethtool_ops->phys_id(dev, id.data); -} - -static int ethtool_get_stats(struct net_device *dev, void *useraddr) -{ - struct ethtool_stats stats; - struct ethtool_ops *ops = ethtool_ops; - u64 *data; - int ret; - - if (!ops->get_ethtool_stats || !ops->get_stats_count) - return -EOPNOTSUPP; - - if (copy_from_user(&stats, useraddr, sizeof(stats))) - return -EFAULT; - - stats.n_stats = ops->get_stats_count(dev); - data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); - if (!data) - return -ENOMEM; - - ops->get_ethtool_stats(dev, &stats, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &stats, sizeof(stats))) - goto out; - useraddr += sizeof(stats); - if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -/* The main entry point in this file. Called from net/core/dev.c */ - -#define ETHTOOL_OPS_COMPAT -int ethtool_ioctl(struct ifreq *ifr) -{ - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); - void *useraddr = (void *) ifr->ifr_data; - u32 ethcmd; - - /* - * XXX: This can be pushed down into the ethtool_* handlers that - * need it. Keep existing behavior for the moment. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (!dev || !netif_device_present(dev)) - return -ENODEV; - - if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) - return -EFAULT; - - switch (ethcmd) { - case ETHTOOL_GSET: - return ethtool_get_settings(dev, useraddr); - case ETHTOOL_SSET: - return ethtool_set_settings(dev, useraddr); - case ETHTOOL_GDRVINFO: - return ethtool_get_drvinfo(dev, useraddr); - case ETHTOOL_GREGS: - return ethtool_get_regs(dev, useraddr); - case ETHTOOL_GWOL: - return ethtool_get_wol(dev, useraddr); - case ETHTOOL_SWOL: - return ethtool_set_wol(dev, useraddr); - case ETHTOOL_GMSGLVL: - return ethtool_get_msglevel(dev, useraddr); - case ETHTOOL_SMSGLVL: - return ethtool_set_msglevel(dev, useraddr); - case ETHTOOL_NWAY_RST: - return ethtool_nway_reset(dev); - case ETHTOOL_GLINK: - return ethtool_get_link(dev, useraddr); - case ETHTOOL_GEEPROM: - return ethtool_get_eeprom(dev, useraddr); - case ETHTOOL_SEEPROM: - return ethtool_set_eeprom(dev, useraddr); - case ETHTOOL_GCOALESCE: - return ethtool_get_coalesce(dev, useraddr); - case ETHTOOL_SCOALESCE: - return ethtool_set_coalesce(dev, useraddr); - case ETHTOOL_GRINGPARAM: - return ethtool_get_ringparam(dev, useraddr); - case ETHTOOL_SRINGPARAM: - return ethtool_set_ringparam(dev, useraddr); - case ETHTOOL_GPAUSEPARAM: - return ethtool_get_pauseparam(dev, useraddr); - case ETHTOOL_SPAUSEPARAM: - return ethtool_set_pauseparam(dev, useraddr); - case ETHTOOL_GRXCSUM: - return ethtool_get_rx_csum(dev, useraddr); - case ETHTOOL_SRXCSUM: - return ethtool_set_rx_csum(dev, useraddr); - case ETHTOOL_GTXCSUM: - return ethtool_get_tx_csum(dev, useraddr); - case ETHTOOL_STXCSUM: - return ethtool_set_tx_csum(dev, useraddr); - case ETHTOOL_GSG: - return ethtool_get_sg(dev, useraddr); - case ETHTOOL_SSG: - return ethtool_set_sg(dev, useraddr); - case ETHTOOL_GTSO: - return ethtool_get_tso(dev, useraddr); - case ETHTOOL_STSO: - return ethtool_set_tso(dev, useraddr); - case ETHTOOL_TEST: - return ethtool_self_test(dev, useraddr); - case ETHTOOL_GSTRINGS: - return ethtool_get_strings(dev, useraddr); - case ETHTOOL_PHYS_ID: - return ethtool_phys_id(dev, useraddr); - case ETHTOOL_GSTATS: - return ethtool_get_stats(dev, useraddr); - default: - return -EOPNOTSUPP; - } - - return -EOPNOTSUPP; -} - -#define mii_if_info _kc_mii_if_info -struct _kc_mii_if_info { - int phy_id; - int advertising; - int phy_id_mask; - int reg_num_mask; - - unsigned int full_duplex : 1; /* is full duplex? */ - unsigned int force_media : 1; /* is autoneg. disabled? */ - - struct net_device *dev; - int (*mdio_read) (struct net_device *dev, int phy_id, int location); - void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val); -}; - -struct ethtool_cmd; -struct mii_ioctl_data; - -#undef mii_link_ok -#define mii_link_ok _kc_mii_link_ok -#undef mii_nway_restart -#define mii_nway_restart _kc_mii_nway_restart -#undef mii_ethtool_gset -#define mii_ethtool_gset _kc_mii_ethtool_gset -#undef mii_ethtool_sset -#define mii_ethtool_sset _kc_mii_ethtool_sset -#undef mii_check_link -#define mii_check_link _kc_mii_check_link -extern int _kc_mii_link_ok (struct mii_if_info *mii); -extern int _kc_mii_nway_restart (struct mii_if_info *mii); -extern int _kc_mii_ethtool_gset(struct mii_if_info *mii, - struct ethtool_cmd *ecmd); -extern int _kc_mii_ethtool_sset(struct mii_if_info *mii, - struct ethtool_cmd *ecmd); -extern void _kc_mii_check_link (struct mii_if_info *mii); -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) -#undef generic_mii_ioctl -#define generic_mii_ioctl _kc_generic_mii_ioctl -extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, - struct mii_ioctl_data *mii_data, int cmd, - unsigned int *duplex_changed); -#endif /* > 2.4.6 */ - - -struct _kc_pci_dev_ext { - struct pci_dev *dev; - void *pci_drvdata; - struct pci_driver *driver; -}; - -struct _kc_net_dev_ext { - struct net_device *dev; - unsigned int carrier; -}; - - -/**************************************/ -/* mii support */ - -int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) -{ - struct net_device *dev = mii->dev; - u32 advert, bmcr, lpa, nego; - - ecmd->supported = - (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | - SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII); - - /* only supports twisted-pair */ - ecmd->port = PORT_MII; - - /* only supports internal transceiver */ - ecmd->transceiver = XCVR_INTERNAL; - - /* this isn't fully supported at higher layers */ - ecmd->phy_address = mii->phy_id; - - ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII; - advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); - if (advert & ADVERTISE_10HALF) - ecmd->advertising |= ADVERTISED_10baseT_Half; - if (advert & ADVERTISE_10FULL) - ecmd->advertising |= ADVERTISED_10baseT_Full; - if (advert & ADVERTISE_100HALF) - ecmd->advertising |= ADVERTISED_100baseT_Half; - if (advert & ADVERTISE_100FULL) - ecmd->advertising |= ADVERTISED_100baseT_Full; - - bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); - lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA); - if (bmcr & BMCR_ANENABLE) { - ecmd->advertising |= ADVERTISED_Autoneg; - ecmd->autoneg = AUTONEG_ENABLE; - - nego = mii_nway_result(advert & lpa); - if (nego == LPA_100FULL || nego == LPA_100HALF) - ecmd->speed = SPEED_100; - else - ecmd->speed = SPEED_10; - if (nego == LPA_100FULL || nego == LPA_10FULL) { - ecmd->duplex = DUPLEX_FULL; - mii->full_duplex = 1; - } else { - ecmd->duplex = DUPLEX_HALF; - mii->full_duplex = 0; - } - } else { - ecmd->autoneg = AUTONEG_DISABLE; - - ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10; - ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; - } - - /* ignore maxtxpkt, maxrxpkt for now */ - - return 0; -} - -int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) -{ - struct net_device *dev = mii->dev; - - if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100) - return -EINVAL; - if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL) - return -EINVAL; - if (ecmd->port != PORT_MII) - return -EINVAL; - if (ecmd->transceiver != XCVR_INTERNAL) - return -EINVAL; - if (ecmd->phy_address != mii->phy_id) - return -EINVAL; - if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE) - return -EINVAL; - - /* ignore supported, maxtxpkt, maxrxpkt */ - - if (ecmd->autoneg == AUTONEG_ENABLE) { - u32 bmcr, advert, tmp; - - if ((ecmd->advertising & (ADVERTISED_10baseT_Half | - ADVERTISED_10baseT_Full | - ADVERTISED_100baseT_Half | - ADVERTISED_100baseT_Full)) == 0) - return -EINVAL; - - /* advertise only what has been requested */ - advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); - tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); - if (ADVERTISED_10baseT_Half) - tmp |= ADVERTISE_10HALF; - if (ADVERTISED_10baseT_Full) - tmp |= ADVERTISE_10FULL; - if (ADVERTISED_100baseT_Half) - tmp |= ADVERTISE_100HALF; - if (ADVERTISED_100baseT_Full) - tmp |= ADVERTISE_100FULL; - if (advert != tmp) { - mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp); - mii->advertising = tmp; - } - - /* turn on autonegotiation, and force a renegotiate */ - bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); - bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); - mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr); - - mii->force_media = 0; - } else { - u32 bmcr, tmp; - - /* turn off auto negotiation, set speed and duplexity */ - bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); - tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX); - if (ecmd->speed == SPEED_100) - tmp |= BMCR_SPEED100; - if (ecmd->duplex == DUPLEX_FULL) { - tmp |= BMCR_FULLDPLX; - mii->full_duplex = 1; - } else - mii->full_duplex = 0; - if (bmcr != tmp) - mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp); - - mii->force_media = 1; - } - return 0; -} - -int _kc_mii_link_ok (struct mii_if_info *mii) -{ - /* first, a dummy read, needed to latch some MII phys */ - mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR); - if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS) - return 1; - return 0; -} - -int _kc_mii_nway_restart (struct mii_if_info *mii) -{ - int bmcr; - int r = -EINVAL; - - /* if autoneg is off, it's an error */ - bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR); - - if (bmcr & BMCR_ANENABLE) { - bmcr |= BMCR_ANRESTART; - mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr); - r = 0; - } - - return r; -} - -void _kc_mii_check_link (struct mii_if_info *mii) -{ - int cur_link = mii_link_ok(mii); - int prev_link = netif_carrier_ok(mii->dev); - - if (cur_link && !prev_link) - netif_carrier_on(mii->dev); - else if (prev_link && !cur_link) - netif_carrier_off(mii->dev); -} - -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) -int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, - struct mii_ioctl_data *mii_data, int cmd, - unsigned int *duplex_chg_out) -{ - int rc = 0; - unsigned int duplex_changed = 0; - - if (duplex_chg_out) - *duplex_chg_out = 0; - - mii_data->phy_id &= mii_if->phy_id_mask; - mii_data->reg_num &= mii_if->reg_num_mask; - - switch(cmd) { - case SIOCDEVPRIVATE: /* binary compat, remove in 2.5 */ - case SIOCGMIIPHY: - mii_data->phy_id = mii_if->phy_id; - /* fall through */ - - case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */ - case SIOCGMIIREG: - mii_data->val_out = - mii_if->mdio_read(mii_if->dev, mii_data->phy_id, - mii_data->reg_num); - break; - - case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */ - case SIOCSMIIREG: { - u16 val = mii_data->val_in; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (mii_data->phy_id == mii_if->phy_id) { - switch(mii_data->reg_num) { - case MII_BMCR: { - unsigned int new_duplex = 0; - if (val & (BMCR_RESET|BMCR_ANENABLE)) - mii_if->force_media = 0; - else - mii_if->force_media = 1; - if (mii_if->force_media && - (val & BMCR_FULLDPLX)) - new_duplex = 1; - if (mii_if->full_duplex != new_duplex) { - duplex_changed = 1; - mii_if->full_duplex = new_duplex; - } - break; - } - case MII_ADVERTISE: - mii_if->advertising = val; - break; - default: - /* do nothing */ - break; - } - } - - mii_if->mdio_write(mii_if->dev, mii_data->phy_id, - mii_data->reg_num, val); - break; - } - - default: - rc = -EOPNOTSUPP; - break; - } - - if ((rc == 0) && (duplex_chg_out) && (duplex_changed)) - *duplex_chg_out = 1; - - return rc; -} -#endif /* > 2.4.6 */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h index 222c2c71..59415469 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c index 24015844..e17b7f18 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h index c6abb020..00a584f4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c index c6f4130d..30de47eb 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h index 02be92ab..41024400 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c index ef7ce629..f00fe796 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h index a6ab30d2..98b74000 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c index 93659ca0..88b33fa0 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h index 9bd6f534..6ae5926f 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h index a6690451..5e6f9ac9 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c index 11472bd3..bc3cb2f4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h index cad28622..48f7dcfc 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c index 92fc9fc7..d26016c9 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List @@ -86,7 +86,7 @@ const char ixgbe_driver_version[] = DRV_VERSION; * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, * Class, Class Mask, private data (not used) } */ -DEFINE_PCI_DEVICE_TABLE(ixgbe_pci_tbl) = { +const struct pci_device_id ixgbe_pci_tbl[] = { {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598)}, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)}, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT)}, diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h index 124f00de..5ced84f8 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h index d161600b..c6f8e21f 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c index e3f5275e..234fa632 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h index bbe5a9e3..5ae171ac 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h deleted file mode 100644 index 5e3559fd..00000000 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h +++ /dev/null @@ -1,73 +0,0 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - - -#ifndef _IXGBE_SRIOV_H_ -#define _IXGBE_SRIOV_H_ - -int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, - int entries, u16 *hash_list, u32 vf); -void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); -int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf); -void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe); -void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf); -void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf); -void ixgbe_msg_task(struct ixgbe_adapter *adapter); -int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, - int vf, unsigned char *mac_addr); -void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); -void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); -#ifdef IFLA_VF_MAX -int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); -int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, - u8 qos); -int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); -#ifdef HAVE_VF_SPOOFCHK_CONFIGURE -int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); -#endif -int ixgbe_ndo_get_vf_config(struct net_device *netdev, - int vf, struct ifla_vf_info *ivi); -#endif -void ixgbe_disable_sriov(struct ixgbe_adapter *adapter); -#ifdef CONFIG_PCI_IOV -int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); -void ixgbe_enable_sriov(struct ixgbe_adapter *adapter); -#endif -int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter); -#ifdef IFLA_VF_MAX -void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter); -#endif /* IFLA_VF_MAX */ -void ixgbe_dump_registers(struct ixgbe_adapter *adapter); - -/* - * These are defined in ixgbe_type.h on behalf of the VF driver - * but we need them here unwrapped for the PF driver. - */ -#define IXGBE_DEV_ID_82599_VF 0x10ED -#define IXGBE_DEV_ID_X540_VF 0x1515 - -#endif /* _IXGBE_SRIOV_H_ */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h index 6b21c879..bda61fa4 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c index b99d9e84..2affe242 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h index 77e8952d..38bcc87b 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c index 5f2523ed..d84c7ccb 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h index bf27579b..4c7a6408 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List @@ -3140,4 +3140,16 @@ static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) #define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) #endif /* >= 3.16.0 */ +/* + * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4) + * For older kernels backported this commit, need to use renamed functions. + * This fix is specific to RedHat/CentOS kernels. + */ +#if (defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)) +#define vlan_tx_tag_get skb_vlan_tag_get +#define vlan_tx_tag_present skb_vlan_tag_present +#endif + #endif /* _KCOMPAT_H_ */ diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h index a0e5cb6b..58cbadd3 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h @@ -25,6 +25,11 @@ #ifndef _KNI_DEV_H_ #define _KNI_DEV_H_ +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -39,10 +44,11 @@ #include #define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ +#define MBUF_BURST_SZ 32 + /** * A structure describing the private information for a kni device. */ - struct kni_dev { /* kni list */ struct list_head list; @@ -50,7 +56,7 @@ struct kni_dev { struct net_device_stats stats; int status; uint16_t group_id; /* Group ID of a group of KNI devices */ - unsigned core_id; /* Core ID to bind */ + uint32_t core_id; /* Core ID to bind */ char name[RTE_KNI_NAMESIZE]; /* Network device name */ struct task_struct *pthread; @@ -84,38 +90,36 @@ struct kni_dev { /* response queue */ void *resp_q; - void * sync_kva; + void *sync_kva; void *sync_va; void *mbuf_kva; void *mbuf_va; /* mbuf size */ - unsigned mbuf_size; + uint32_t mbuf_size; /* synchro for request processing */ unsigned long synchro; #ifdef RTE_KNI_VHOST - struct kni_vhost_queue* vhost_queue; + struct kni_vhost_queue *vhost_queue; + volatile enum { BE_STOP = 0x1, BE_START = 0x2, BE_FINISH = 0x4, - }vq_status; + } vq_status; #endif + /* buffers */ + void *pa[MBUF_BURST_SZ]; + void *va[MBUF_BURST_SZ]; + void *alloc_pa[MBUF_BURST_SZ]; + void *alloc_va[MBUF_BURST_SZ]; }; -#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args) -#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args) -#ifdef RTE_KNI_KO_DEBUG - #define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args) -#else - #define KNI_DBG(args...) -#endif - #ifdef RTE_KNI_VHOST -unsigned int +uint32_t kni_poll(struct file *file, struct socket *sock, poll_table * wait); int kni_chk_vhost_rx(struct kni_dev *kni); int kni_vhost_init(struct kni_dev *kni); @@ -127,23 +131,22 @@ struct kni_vhost_queue { int vnet_hdr_sz; struct kni_dev *kni; int sockfd; - unsigned int flags; - struct sk_buff* cache; - struct rte_kni_fifo* fifo; + uint32_t flags; + struct sk_buff *cache; + struct rte_kni_fifo *fifo; }; #endif -#ifdef RTE_KNI_VHOST_DEBUG_RX - #define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args) -#else - #define KNI_DBG_RX(args...) -#endif +void kni_net_rx(struct kni_dev *kni); +void kni_net_init(struct net_device *dev); +void kni_net_config_lo_mode(char *lo_str); +void kni_net_poll_resp(struct kni_dev *kni); +void kni_set_ethtool_ops(struct net_device *netdev); -#ifdef RTE_KNI_VHOST_DEBUG_TX - #define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args) -#else - #define KNI_DBG_TX(args...) -#endif +int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +void ixgbe_kni_remove(struct pci_dev *pdev); +int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +void igb_kni_remove(struct pci_dev *pdev); #endif diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c index 06b6d463..0c88589c 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c @@ -31,6 +31,7 @@ static int kni_check_if_running(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + if (priv->lad_dev) return 0; else @@ -41,6 +42,7 @@ static void kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info); } @@ -48,6 +50,7 @@ static int kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd); } @@ -55,6 +58,7 @@ static int kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd); } @@ -62,6 +66,7 @@ static void kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol); } @@ -69,6 +74,7 @@ static int kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol); } @@ -76,6 +82,7 @@ static int kni_nway_reset(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev); } @@ -83,6 +90,7 @@ static int kni_get_eeprom_len(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev); } @@ -91,6 +99,7 @@ kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *bytes) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom, bytes); } @@ -100,6 +109,7 @@ kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *bytes) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom, bytes); } @@ -108,6 +118,7 @@ static void kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring); } @@ -115,6 +126,7 @@ static int kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring); } @@ -122,6 +134,7 @@ static void kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause); } @@ -129,6 +142,7 @@ static int kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev, pause); } @@ -137,6 +151,7 @@ static u32 kni_get_msglevel(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev); } @@ -144,6 +159,7 @@ static void kni_set_msglevel(struct net_device *dev, u32 data) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data); } @@ -151,6 +167,7 @@ static int kni_get_regs_len(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev); } @@ -158,6 +175,7 @@ static void kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p); } @@ -165,6 +183,7 @@ static void kni_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset, data); } @@ -173,6 +192,7 @@ static int kni_get_sset_count(struct net_device *dev, int sset) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset); } @@ -181,24 +201,25 @@ kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats, data); } struct ethtool_ops kni_ethtool_ops = { - .begin = kni_check_if_running, + .begin = kni_check_if_running, .get_drvinfo = kni_get_drvinfo, .get_settings = kni_get_settings, .set_settings = kni_set_settings, .get_regs_len = kni_get_regs_len, - .get_regs = kni_get_regs, - .get_wol = kni_get_wol, - .set_wol = kni_set_wol, - .nway_reset = kni_nway_reset, - .get_link = ethtool_op_get_link, + .get_regs = kni_get_regs, + .get_wol = kni_get_wol, + .set_wol = kni_set_wol, + .nway_reset = kni_nway_reset, + .get_link = ethtool_op_get_link, .get_eeprom_len = kni_get_eeprom_len, - .get_eeprom = kni_get_eeprom, - .set_eeprom = kni_set_eeprom, + .get_eeprom = kni_get_eeprom, + .set_eeprom = kni_set_eeprom, .get_ringparam = kni_get_ringparam, .set_ringparam = kni_set_ringparam, .get_pauseparam = kni_get_pauseparam, @@ -207,7 +228,7 @@ struct ethtool_ops kni_ethtool_ops = { .set_msglevel = kni_set_msglevel, .get_strings = kni_get_strings, .get_sset_count = kni_get_sset_count, - .get_ethtool_stats = kni_get_ethtool_stats, + .get_ethtool_stats = kni_get_ethtool_stats, }; void diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h index 3ea750e2..025ec1c9 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h @@ -30,13 +30,13 @@ /** * Adds num elements into the fifo. Return the number actually written */ -static inline unsigned -kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num) +static inline uint32_t +kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num) { - unsigned i = 0; - unsigned fifo_write = fifo->write; - unsigned fifo_read = fifo->read; - unsigned new_write = fifo_write; + uint32_t i = 0; + uint32_t fifo_write = fifo->write; + uint32_t fifo_read = fifo->read; + uint32_t new_write = fifo_write; for (i = 0; i < num; i++) { new_write = (new_write + 1) & (fifo->len - 1); @@ -54,12 +54,12 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num) /** * Get up to num elements from the fifo. Return the number actully read */ -static inline unsigned -kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) +static inline uint32_t +kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num) { - unsigned i = 0; - unsigned new_read = fifo->read; - unsigned fifo_write = fifo->write; + uint32_t i = 0; + uint32_t new_read = fifo->read; + uint32_t fifo_write = fifo->write; for (i = 0; i < num; i++) { if (new_read == fifo_write) @@ -76,16 +76,16 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) /** * Get the num of elements in the fifo */ -static inline unsigned +static inline uint32_t kni_fifo_count(struct rte_kni_fifo *fifo) { - return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1); + return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1); } /** * Get the num of available elements in the fifo */ -static inline unsigned +static inline uint32_t kni_fifo_free_count(struct rte_kni_fifo *fifo) { return (fifo->read - fifo->write - 1) & (fifo->len - 1); @@ -96,7 +96,7 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo) * Initializes the kni fifo structure */ static inline void -kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size) +kni_fifo_init(struct rte_kni_fifo *fifo, uint32_t size) { fifo->write = 0; fifo->read = 0; diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c index 59d15ca6..33b61f2a 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -47,52 +48,15 @@ MODULE_DESCRIPTION("Kernel Module for managing kni devices"); #define KNI_MAX_DEVICES 32 -extern void kni_net_rx(struct kni_dev *kni); -extern void kni_net_init(struct net_device *dev); -extern void kni_net_config_lo_mode(char *lo_str); -extern void kni_net_poll_resp(struct kni_dev *kni); -extern void kni_set_ethtool_ops(struct net_device *netdev); - -extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); -extern void ixgbe_kni_remove(struct pci_dev *pdev); -extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); -extern void igb_kni_remove(struct pci_dev *pdev); - -static int kni_open(struct inode *inode, struct file *file); -static int kni_release(struct inode *inode, struct file *file); -static int kni_ioctl(struct inode *inode, unsigned int ioctl_num, - unsigned long ioctl_param); -static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num, - unsigned long ioctl_param); -static int kni_dev_remove(struct kni_dev *dev); - -static int __init kni_parse_kthread_mode(void); - -/* KNI processing for single kernel thread mode */ -static int kni_thread_single(void *unused); -/* KNI processing for multiple kernel thread mode */ -static int kni_thread_multiple(void *param); - -static struct file_operations kni_fops = { - .owner = THIS_MODULE, - .open = kni_open, - .release = kni_release, - .unlocked_ioctl = (void *)kni_ioctl, - .compat_ioctl = (void *)kni_compat_ioctl, -}; - -static struct miscdevice kni_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = KNI_DEVICE, - .fops = &kni_fops, -}; +extern const struct pci_device_id ixgbe_pci_tbl[]; +extern const struct pci_device_id igb_pci_tbl[]; /* loopback mode */ -static char *lo_mode = NULL; +static char *lo_mode; /* Kernel thread mode */ -static char *kthread_mode = NULL; -static unsigned multiple_kthread_on = 0; +static char *kthread_mode; +static uint32_t multiple_kthread_on; #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ @@ -100,20 +64,24 @@ static int kni_net_id; struct kni_net { unsigned long device_in_use; /* device in use flag */ + struct mutex kni_kthread_lock; struct task_struct *kni_kthread; struct rw_semaphore kni_list_lock; struct list_head kni_list_head; }; -static int __net_init kni_init_net(struct net *net) +static int __net_init +kni_init_net(struct net *net) { #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS struct kni_net *knet = net_generic(net, kni_net_id); + + memset(knet, 0, sizeof(*knet)); #else struct kni_net *knet; int ret; - knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL); + knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL); if (!knet) { ret = -ENOMEM; return ret; @@ -123,6 +91,8 @@ static int __net_init kni_init_net(struct net *net) /* Clear the bit of device in use */ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); + mutex_init(&knet->kni_kthread_lock); + init_rwsem(&knet->kni_list_lock); INIT_LIST_HEAD(&knet->kni_list_head); @@ -137,11 +107,15 @@ static int __net_init kni_init_net(struct net *net) #endif } -static void __net_exit kni_exit_net(struct net *net) +static void __net_exit +kni_exit_net(struct net *net) { -#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS - struct kni_net *knet = net_generic(net, kni_net_id); + struct kni_net *knet __maybe_unused; + + knet = net_generic(net, kni_net_id); + mutex_destroy(&knet->kni_kthread_lock); +#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS kfree(knet); #endif } @@ -155,72 +129,56 @@ static struct pernet_operations kni_net_ops = { #endif }; -static int __init -kni_init(void) +static int +kni_thread_single(void *data) { - int rc; - - KNI_PRINT("######## DPDK kni module loading ########\n"); - - if (kni_parse_kthread_mode() < 0) { - KNI_ERR("Invalid parameter for kthread_mode\n"); - return -EINVAL; - } + struct kni_net *knet = data; + int j; + struct kni_dev *dev; -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - rc = register_pernet_subsys(&kni_net_ops); + while (!kthread_should_stop()) { + down_read(&knet->kni_list_lock); + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { + list_for_each_entry(dev, &knet->kni_list_head, list) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); #else - rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); + kni_net_rx(dev); +#endif + kni_net_poll_resp(dev); + } + } + up_read(&knet->kni_list_lock); +#ifdef RTE_KNI_PREEMPT_DEFAULT + /* reschedule out for a while */ + schedule_timeout_interruptible( + usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); #endif - if (rc) - return -EPERM; - - rc = misc_register(&kni_misc); - if (rc != 0) { - KNI_ERR("Misc registration failed\n"); - goto out; } - /* Configure the lo mode according to the input parameter */ - kni_net_config_lo_mode(lo_mode); - - KNI_PRINT("######## DPDK kni module loaded ########\n"); - return 0; - -out: -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - unregister_pernet_subsys(&kni_net_ops); -#else - register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif - return rc; } -static void __exit -kni_exit(void) +static int +kni_thread_multiple(void *param) { - misc_deregister(&kni_misc); -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - unregister_pernet_subsys(&kni_net_ops); + int j; + struct kni_dev *dev = (struct kni_dev *)param; + + while (!kthread_should_stop()) { + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); #else - register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); + kni_net_rx(dev); #endif - KNI_PRINT("####### DPDK kni module unloaded #######\n"); -} - -static int __init -kni_parse_kthread_mode(void) -{ - if (!kthread_mode) - return 0; - - if (strcmp(kthread_mode, "single") == 0) - return 0; - else if (strcmp(kthread_mode, "multiple") == 0) - multiple_kthread_on = 1; - else - return -1; + kni_net_poll_resp(dev); + } +#ifdef RTE_KNI_PREEMPT_DEFAULT + schedule_timeout_interruptible( + usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); +#endif + } return 0; } @@ -235,21 +193,31 @@ kni_open(struct inode *inode, struct file *file) if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) return -EBUSY; - /* Create kernel thread for single mode */ - if (multiple_kthread_on == 0) { - KNI_PRINT("Single kernel thread for all KNI devices\n"); - /* Create kernel thread for RX */ - knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet, - "kni_single"); - if (IS_ERR(knet->kni_kthread)) { - KNI_ERR("Unable to create kernel threaed\n"); - return PTR_ERR(knet->kni_kthread); - } - } else - KNI_PRINT("Multiple kernel thread mode enabled\n"); - file->private_data = get_net(net); - KNI_PRINT("/dev/kni opened\n"); + pr_debug("/dev/kni opened\n"); + + return 0; +} + +static int +kni_dev_remove(struct kni_dev *dev) +{ + if (!dev) + return -ENODEV; + +#ifdef CONFIG_RTE_KNI_KMOD_ETHTOOL + if (dev->pci_dev) { + if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev)) + ixgbe_kni_remove(dev->pci_dev); + else if (pci_match_id(igb_pci_tbl, dev->pci_dev)) + igb_kni_remove(dev->pci_dev); + } +#endif + + if (dev->net_dev) { + unregister_netdev(dev->net_dev); + free_netdev(dev->net_dev); + } return 0; } @@ -263,9 +231,13 @@ kni_release(struct inode *inode, struct file *file) /* Stop kernel thread for single mode */ if (multiple_kthread_on == 0) { + mutex_lock(&knet->kni_kthread_lock); /* Stop kernel thread */ - kthread_stop(knet->kni_kthread); - knet->kni_kthread = NULL; + if (knet->kni_kthread != NULL) { + kthread_stop(knet->kni_kthread); + knet->kni_kthread = NULL; + } + mutex_unlock(&knet->kni_kthread_lock); } down_write(&knet->kni_list_lock); @@ -288,121 +260,78 @@ kni_release(struct inode *inode, struct file *file) clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); put_net(net); - KNI_PRINT("/dev/kni closed\n"); + pr_debug("/dev/kni closed\n"); return 0; } static int -kni_thread_single(void *data) +kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) { - struct kni_net *knet = data; - int j; - struct kni_dev *dev; + if (!kni || !dev) + return -1; - while (!kthread_should_stop()) { - down_read(&knet->kni_list_lock); - for (j = 0; j < KNI_RX_LOOP_NUM; j++) { - list_for_each_entry(dev, &knet->kni_list_head, list) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else - kni_net_rx(dev); -#endif - kni_net_poll_resp(dev); - } - } - up_read(&knet->kni_list_lock); -#ifdef RTE_KNI_PREEMPT_DEFAULT - /* reschedule out for a while */ - schedule_timeout_interruptible(usecs_to_jiffies( \ - KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif + /* Check if network name has been used */ + if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { + pr_err("KNI name %s duplicated\n", dev->name); + return -1; } return 0; } static int -kni_thread_multiple(void *param) +kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind) { - int j; - struct kni_dev *dev = (struct kni_dev *)param; - - while (!kthread_should_stop()) { - for (j = 0; j < KNI_RX_LOOP_NUM; j++) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else - kni_net_rx(dev); -#endif - kni_net_poll_resp(dev); + /** + * Create a new kernel thread for multiple mode, set its core affinity, + * and finally wake it up. + */ + if (multiple_kthread_on) { + kni->pthread = kthread_create(kni_thread_multiple, + (void *)kni, "kni_%s", kni->name); + if (IS_ERR(kni->pthread)) { + kni_dev_remove(kni); + return -ECANCELED; } -#ifdef RTE_KNI_PREEMPT_DEFAULT - schedule_timeout_interruptible(usecs_to_jiffies( \ - KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif - } - - return 0; -} - -static int -kni_dev_remove(struct kni_dev *dev) -{ - if (!dev) - return -ENODEV; - - switch (dev->device_id) { - #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): - #include - igb_kni_remove(dev->pci_dev); - break; - #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev): - #include - ixgbe_kni_remove(dev->pci_dev); - break; - default: - break; - } - - if (dev->net_dev) { - unregister_netdev(dev->net_dev); - free_netdev(dev->net_dev); - } - return 0; -} + if (force_bind) + kthread_bind(kni->pthread, kni->core_id); + wake_up_process(kni->pthread); + } else { + mutex_lock(&knet->kni_kthread_lock); + + if (knet->kni_kthread == NULL) { + knet->kni_kthread = kthread_create(kni_thread_single, + (void *)knet, "kni_single"); + if (IS_ERR(knet->kni_kthread)) { + mutex_unlock(&knet->kni_kthread_lock); + kni_dev_remove(kni); + return -ECANCELED; + } -static int -kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) -{ - if (!kni || !dev) - return -1; + if (force_bind) + kthread_bind(knet->kni_kthread, kni->core_id); + wake_up_process(knet->kni_kthread); + } - /* Check if network name has been used */ - if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { - KNI_ERR("KNI name %s duplicated\n", dev->name); - return -1; + mutex_unlock(&knet->kni_kthread_lock); } return 0; } static int -kni_ioctl_create(struct net *net, - unsigned int ioctl_num, unsigned long ioctl_param) +kni_ioctl_create(struct net *net, uint32_t ioctl_num, + unsigned long ioctl_param) { struct kni_net *knet = net_generic(net, kni_net_id); int ret; struct rte_kni_device_info dev_info; - struct pci_dev *pci = NULL; - struct pci_dev *found_pci = NULL; struct net_device *net_dev = NULL; - struct net_device *lad_dev = NULL; struct kni_dev *kni, *dev, *n; - printk(KERN_INFO "KNI: Creating kni...\n"); + pr_info("Creating kni...\n"); /* Check the buffer size, to avoid warning */ if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) return -EINVAL; @@ -410,17 +339,21 @@ kni_ioctl_create(struct net *net, /* Copy kni info from user space */ ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); if (ret) { - KNI_ERR("copy_from_user in kni_ioctl_create"); + pr_err("copy_from_user in kni_ioctl_create"); return -EIO; } + /* Check if name is zero-ended */ + if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) { + pr_err("kni.name not zero-terminated"); + return -EINVAL; + } + /** - * Check if the cpu core id is valid for binding, - * for multiple kernel thread mode. + * Check if the cpu core id is valid for binding. */ - if (multiple_kthread_on && dev_info.force_bind && - !cpu_online(dev_info.core_id)) { - KNI_ERR("cpu %u is not online\n", dev_info.core_id); + if (dev_info.force_bind && !cpu_online(dev_info.core_id)) { + pr_err("cpu %u is not online\n", dev_info.core_id); return -EINVAL; } @@ -435,12 +368,12 @@ kni_ioctl_create(struct net *net, up_read(&knet->kni_list_lock); net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, -#ifdef NET_NAME_UNKNOWN - NET_NAME_UNKNOWN, +#ifdef NET_NAME_USER + NET_NAME_USER, #endif kni_net_init); if (net_dev == NULL) { - KNI_ERR("error allocating device \"%s\"\n", dev_info.name); + pr_err("error allocating device \"%s\"\n", dev_info.name); return -EBUSY; } @@ -464,44 +397,43 @@ kni_ioctl_create(struct net *net, kni->sync_va = dev_info.sync_va; kni->sync_kva = phys_to_virt(dev_info.sync_phys); - kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys); - kni->mbuf_va = dev_info.mbuf_va; - #ifdef RTE_KNI_VHOST kni->vhost_queue = NULL; kni->vq_status = BE_STOP; #endif kni->mbuf_size = dev_info.mbuf_size; - KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", + pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", (unsigned long long) dev_info.tx_phys, kni->tx_q); - KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", + pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", (unsigned long long) dev_info.rx_phys, kni->rx_q); - KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", + pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", (unsigned long long) dev_info.alloc_phys, kni->alloc_q); - KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p\n", + pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n", (unsigned long long) dev_info.free_phys, kni->free_q); - KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p\n", + pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n", (unsigned long long) dev_info.req_phys, kni->req_q); - KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", + pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", (unsigned long long) dev_info.resp_phys, kni->resp_q); - KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p\n", - (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva); - KNI_PRINT("mbuf_va: 0x%p\n", dev_info.mbuf_va); - KNI_PRINT("mbuf_size: %u\n", kni->mbuf_size); + pr_debug("mbuf_size: %u\n", kni->mbuf_size); - KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n", + pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n", dev_info.bus, dev_info.devid, dev_info.function, dev_info.vendor_id, dev_info.device_id); +#ifdef CONFIG_RTE_KNI_KMOD_ETHTOOL + struct pci_dev *found_pci = NULL; + struct net_device *lad_dev = NULL; + struct pci_dev *pci = NULL; + pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); /* Support Ethtool */ while (pci) { - KNI_PRINT("pci_bus: %02x:%02x:%02x \n", + pr_debug("pci_bus: %02x:%02x:%02x\n", pci->bus->number, PCI_SLOT(pci->devfn), PCI_FUNC(pci->devfn)); @@ -510,28 +442,21 @@ kni_ioctl_create(struct net *net, (PCI_SLOT(pci->devfn) == dev_info.devid) && (PCI_FUNC(pci->devfn) == dev_info.function)) { found_pci = pci; - switch (dev_info.device_id) { - #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): - #include - ret = igb_kni_probe(found_pci, &lad_dev); - break; - #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \ - case (dev): - #include + + if (pci_match_id(ixgbe_pci_tbl, found_pci)) ret = ixgbe_kni_probe(found_pci, &lad_dev); - break; - default: + else if (pci_match_id(igb_pci_tbl, found_pci)) + ret = igb_kni_probe(found_pci, &lad_dev); + else ret = -1; - break; - } - KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n", + pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n", pci, lad_dev); if (ret == 0) { kni->lad_dev = lad_dev; kni_set_ethtool_ops(kni->net_dev); } else { - KNI_ERR("Device not supported by ethtool"); + pr_err("Device not supported by ethtool"); kni->lad_dev = NULL; } @@ -544,9 +469,10 @@ kni_ioctl_create(struct net *net, } if (pci) pci_dev_put(pci); +#endif if (kni->lad_dev) - memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); + ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr); else /* * Generate random mac address. eth_random_addr() is the newer @@ -556,9 +482,11 @@ kni_ioctl_create(struct net *net, ret = register_netdev(net_dev); if (ret) { - KNI_ERR("error %i registering device \"%s\"\n", + pr_err("error %i registering device \"%s\"\n", ret, dev_info.name); + kni->net_dev = NULL; kni_dev_remove(kni); + free_netdev(net_dev); return -ENODEV; } @@ -566,22 +494,9 @@ kni_ioctl_create(struct net *net, kni_vhost_init(kni); #endif - /** - * Create a new kernel thread for multiple mode, set its core affinity, - * and finally wake it up. - */ - if (multiple_kthread_on) { - kni->pthread = kthread_create(kni_thread_multiple, - (void *)kni, - "kni_%s", kni->name); - if (IS_ERR(kni->pthread)) { - kni_dev_remove(kni); - return -ECANCELED; - } - if (dev_info.force_bind) - kthread_bind(kni->pthread, kni->core_id); - wake_up_process(kni->pthread); - } + ret = kni_run_thread(knet, kni, dev_info.force_bind); + if (ret != 0) + return ret; down_write(&knet->kni_list_lock); list_add(&kni->list, &knet->kni_list_head); @@ -591,8 +506,8 @@ kni_ioctl_create(struct net *net, } static int -kni_ioctl_release(struct net *net, - unsigned int ioctl_num, unsigned long ioctl_param) +kni_ioctl_release(struct net *net, uint32_t ioctl_num, + unsigned long ioctl_param) { struct kni_net *knet = net_generic(net, kni_net_id); int ret = -EINVAL; @@ -600,11 +515,11 @@ kni_ioctl_release(struct net *net, struct rte_kni_device_info dev_info; if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) - return -EINVAL; + return -EINVAL; ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); if (ret) { - KNI_ERR("copy_from_user in kni_ioctl_release"); + pr_err("copy_from_user in kni_ioctl_release"); return -EIO; } @@ -631,21 +546,19 @@ kni_ioctl_release(struct net *net, break; } up_write(&knet->kni_list_lock); - printk(KERN_INFO "KNI: %s release kni named %s\n", + pr_info("%s release kni named %s\n", (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name); return ret; } static int -kni_ioctl(struct inode *inode, - unsigned int ioctl_num, - unsigned long ioctl_param) +kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) { int ret = -EINVAL; struct net *net = current->nsproxy->net_ns; - KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); + pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); /* * Switch according to the ioctl called @@ -661,7 +574,7 @@ kni_ioctl(struct inode *inode, ret = kni_ioctl_release(net, ioctl_num, ioctl_param); break; default: - KNI_DBG("IOCTL default\n"); + pr_debug("IOCTL default\n"); break; } @@ -669,16 +582,99 @@ kni_ioctl(struct inode *inode, } static int -kni_compat_ioctl(struct inode *inode, - unsigned int ioctl_num, +kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) { /* 32 bits app on 64 bits OS to be supported later */ - KNI_PRINT("Not implemented.\n"); + pr_debug("Not implemented.\n"); return -EINVAL; } +static const struct file_operations kni_fops = { + .owner = THIS_MODULE, + .open = kni_open, + .release = kni_release, + .unlocked_ioctl = (void *)kni_ioctl, + .compat_ioctl = (void *)kni_compat_ioctl, +}; + +static struct miscdevice kni_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = KNI_DEVICE, + .fops = &kni_fops, +}; + +static int __init +kni_parse_kthread_mode(void) +{ + if (!kthread_mode) + return 0; + + if (strcmp(kthread_mode, "single") == 0) + return 0; + else if (strcmp(kthread_mode, "multiple") == 0) + multiple_kthread_on = 1; + else + return -1; + + return 0; +} + +static int __init +kni_init(void) +{ + int rc; + + if (kni_parse_kthread_mode() < 0) { + pr_err("Invalid parameter for kthread_mode\n"); + return -EINVAL; + } + + if (multiple_kthread_on == 0) + pr_debug("Single kernel thread for all KNI devices\n"); + else + pr_debug("Multiple kernel thread mode enabled\n"); + +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + rc = register_pernet_subsys(&kni_net_ops); +#else + rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); +#endif + if (rc) + return -EPERM; + + rc = misc_register(&kni_misc); + if (rc != 0) { + pr_err("Misc registration failed\n"); + goto out; + } + + /* Configure the lo mode according to the input parameter */ + kni_net_config_lo_mode(lo_mode); + + return 0; + +out: +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + unregister_pernet_subsys(&kni_net_ops); +#else + unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); +#endif + return rc; +} + +static void __exit +kni_exit(void) +{ + misc_deregister(&kni_misc); +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + unregister_pernet_subsys(&kni_net_ops); +#else + unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); +#endif +} + module_init(kni_init); module_exit(kni_exit); diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c index fc82193a..4ac99cfe 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c @@ -44,23 +44,103 @@ #define WD_TIMEOUT 5 /*jiffies */ -#define MBUF_BURST_SZ 32 - #define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */ /* typedef for rx function */ typedef void (*kni_net_rx_t)(struct kni_dev *kni); -static int kni_net_tx(struct sk_buff *skb, struct net_device *dev); static void kni_net_rx_normal(struct kni_dev *kni); -static void kni_net_rx_lo_fifo(struct kni_dev *kni); -static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni); -static int kni_net_process_request(struct kni_dev *kni, - struct rte_kni_request *req); /* kni rx function pointer, with default to normal rx */ static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal; +/* physical address to kernel virtual address */ +static void * +pa2kva(void *pa) +{ + return phys_to_virt((unsigned long)pa); +} + +/* physical address to virtual address */ +static void * +pa2va(void *pa, struct rte_kni_mbuf *m) +{ + void *va; + + va = (void *)((unsigned long)pa + + (unsigned long)m->buf_addr - + (unsigned long)m->buf_physaddr); + return va; +} + +/* mbuf data kernel virtual address from mbuf kernel virtual address */ +static void * +kva2data_kva(struct rte_kni_mbuf *m) +{ + return phys_to_virt(m->buf_physaddr + m->data_off); +} + +/* virtual address to physical address */ +static void * +va2pa(void *va, struct rte_kni_mbuf *m) +{ + void *pa; + + pa = (void *)((unsigned long)va - + ((unsigned long)m->buf_addr - + (unsigned long)m->buf_physaddr)); + return pa; +} + +/* + * It can be called to process the request. + */ +static int +kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) +{ + int ret = -1; + void *resp_va; + uint32_t num; + int ret_val; + + if (!kni || !req) { + pr_err("No kni instance or request\n"); + return -EINVAL; + } + + mutex_lock(&kni->sync_lock); + + /* Construct data */ + memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request)); + num = kni_fifo_put(kni->req_q, &kni->sync_va, 1); + if (num < 1) { + pr_err("Cannot send to req_q\n"); + ret = -EBUSY; + goto fail; + } + + ret_val = wait_event_interruptible_timeout(kni->wq, + kni_fifo_count(kni->resp_q), 3 * HZ); + if (signal_pending(current) || ret_val <= 0) { + ret = -ETIME; + goto fail; + } + num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1); + if (num != 1 || resp_va != kni->sync_va) { + /* This should never happen */ + pr_err("No data in resp_q\n"); + ret = -ENODATA; + goto fail; + } + + memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); + ret = 0; + +fail: + mutex_unlock(&kni->sync_lock); + return ret; +} + /* * Open and close */ @@ -115,19 +195,113 @@ kni_net_config(struct net_device *dev, struct ifmap *map) return 0; } +/* + * Transmit a packet (called by the kernel) + */ +#ifdef RTE_KNI_VHOST +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#else +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + int len = 0; + uint32_t ret; + struct kni_dev *kni = netdev_priv(dev); + struct rte_kni_mbuf *pkt_kva = NULL; + void *pkt_pa = NULL; + void *pkt_va = NULL; + + /* save the timestamp */ +#ifdef HAVE_TRANS_START_HELPER + netif_trans_update(dev); +#else + dev->trans_start = jiffies; +#endif + + /* Check if the length of skb is less than mbuf size */ + if (skb->len > kni->mbuf_size) + goto drop; + + /** + * Check if it has at least one free entry in tx_q and + * one entry in alloc_q. + */ + if (kni_fifo_free_count(kni->tx_q) == 0 || + kni_fifo_count(kni->alloc_q) == 0) { + /** + * If no free entry in tx_q or no entry in alloc_q, + * drops skb and goes out. + */ + goto drop; + } + + /* dequeue a mbuf from alloc_q */ + ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1); + if (likely(ret == 1)) { + void *data_kva; + + pkt_kva = pa2kva(pkt_pa); + data_kva = kva2data_kva(pkt_kva); + pkt_va = pa2va(pkt_pa, pkt_kva); + + len = skb->len; + memcpy(data_kva, skb->data, len); + if (unlikely(len < ETH_ZLEN)) { + memset(data_kva + len, 0, ETH_ZLEN - len); + len = ETH_ZLEN; + } + pkt_kva->pkt_len = len; + pkt_kva->data_len = len; + + /* enqueue mbuf into tx_q */ + ret = kni_fifo_put(kni->tx_q, &pkt_va, 1); + if (unlikely(ret != 1)) { + /* Failing should not happen */ + pr_err("Fail to enqueue mbuf into tx_q\n"); + goto drop; + } + } else { + /* Failing should not happen */ + pr_err("Fail to dequeue mbuf from alloc_q\n"); + goto drop; + } + + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_bytes += len; + kni->stats.tx_packets++; + + return NETDEV_TX_OK; + +drop: + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#endif + /* * RX: normal working mode */ static void kni_net_rx_normal(struct kni_dev *kni) { - unsigned ret; + uint32_t ret; uint32_t len; - unsigned i, num_rx, num_fq; + uint32_t i, num_rx, num_fq; struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va[MBUF_BURST_SZ]; - void * data_kva; - + void *data_kva; struct sk_buff *skb; struct net_device *dev = kni->net_dev; @@ -139,24 +313,22 @@ kni_net_rx_normal(struct kni_dev *kni) } /* Calculate the number of entries to dequeue from rx_q */ - num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ); + num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ); /* Burst dequeue from rx_q */ - num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx); + num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx); if (num_rx == 0) return; /* Transfer received packets to netif */ for (i = 0; i < num_rx; i++) { - kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(kni->pa[i]); len = kva->pkt_len; - - data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va - + kni->mbuf_kva; + data_kva = kva2data_kva(kva); + kni->va[i] = pa2va(kni->pa[i], kva); skb = dev_alloc_skb(len + 2); if (!skb) { - KNI_ERR("Out of mem, dropping pkts\n"); /* Update statistics */ kni->stats.rx_dropped++; continue; @@ -178,9 +350,8 @@ kni_net_rx_normal(struct kni_dev *kni) if (!kva->next) break; - kva = kva->next - kni->mbuf_va + kni->mbuf_kva; - data_kva = kva->buf_addr + kva->data_off - - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(va2pa(kva->next, kva)); + data_kva = kva2data_kva(kva); } } @@ -197,10 +368,10 @@ kni_net_rx_normal(struct kni_dev *kni) } /* Burst enqueue mbufs into free_q */ - ret = kni_fifo_put(kni->free_q, (void **)va, num_rx); + ret = kni_fifo_put(kni->free_q, kni->va, num_rx); if (ret != num_rx) /* Failing should not happen */ - KNI_ERR("Fail to enqueue entries into free_q\n"); + pr_err("Fail to enqueue entries into free_q\n"); } /* @@ -209,15 +380,12 @@ kni_net_rx_normal(struct kni_dev *kni) static void kni_net_rx_lo_fifo(struct kni_dev *kni) { - unsigned ret; + uint32_t ret; uint32_t len; - unsigned i, num, num_rq, num_tq, num_aq, num_fq; + uint32_t i, num, num_rq, num_tq, num_aq, num_fq; struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va[MBUF_BURST_SZ]; - void * data_kva; - + void *data_kva; struct rte_kni_mbuf *alloc_kva; - struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ]; void *alloc_data_kva; /* Get the number of entries in rx_q */ @@ -236,33 +404,32 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) num = min(num_rq, num_tq); num = min(num, num_aq); num = min(num, num_fq); - num = min(num, (unsigned)MBUF_BURST_SZ); + num = min_t(uint32_t, num, MBUF_BURST_SZ); /* Return if no entry to dequeue from rx_q */ if (num == 0) return; /* Burst dequeue from rx_q */ - ret = kni_fifo_get(kni->rx_q, (void **)va, num); + ret = kni_fifo_get(kni->rx_q, kni->pa, num); if (ret == 0) return; /* Failing should not happen */ /* Dequeue entries from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num); + ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num); if (ret) { num = ret; /* Copy mbufs */ for (i = 0; i < num; i++) { - kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(kni->pa[i]); len = kva->pkt_len; - data_kva = kva->buf_addr + kva->data_off - - kni->mbuf_va + kni->mbuf_kva; - - alloc_kva = (void *)alloc_va[i] - kni->mbuf_va + - kni->mbuf_kva; - alloc_data_kva = alloc_kva->buf_addr + - alloc_kva->data_off - kni->mbuf_va + - kni->mbuf_kva; + data_kva = kva2data_kva(kva); + kni->va[i] = pa2va(kni->pa[i], kva); + + alloc_kva = pa2kva(kni->alloc_pa[i]); + alloc_data_kva = kva2data_kva(alloc_kva); + kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva); + memcpy(alloc_data_kva, data_kva, len); alloc_kva->pkt_len = len; alloc_kva->data_len = len; @@ -272,17 +439,17 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) } /* Burst enqueue mbufs into tx_q */ - ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num); + ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num); if (ret != num) /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbufs into tx_q\n"); + pr_err("Fail to enqueue mbufs into tx_q\n"); } /* Burst enqueue mbufs into free_q */ - ret = kni_fifo_put(kni->free_q, (void **)va, num); + ret = kni_fifo_put(kni->free_q, kni->va, num); if (ret != num) /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbufs into free_q\n"); + pr_err("Fail to enqueue mbufs into free_q\n"); /** * Update statistic, and enqueue/dequeue failure is impossible, @@ -298,13 +465,11 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni) { - unsigned ret; + uint32_t ret; uint32_t len; - unsigned i, num_rq, num_fq, num; + uint32_t i, num_rq, num_fq, num; struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va[MBUF_BURST_SZ]; - void * data_kva; - + void *data_kva; struct sk_buff *skb; struct net_device *dev = kni->net_dev; @@ -316,28 +481,26 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) /* Calculate the number of entries to dequeue from rx_q */ num = min(num_rq, num_fq); - num = min(num, (unsigned)MBUF_BURST_SZ); + num = min_t(uint32_t, num, MBUF_BURST_SZ); /* Return if no entry to dequeue from rx_q */ if (num == 0) return; /* Burst dequeue mbufs from rx_q */ - ret = kni_fifo_get(kni->rx_q, (void **)va, num); + ret = kni_fifo_get(kni->rx_q, kni->pa, num); if (ret == 0) return; /* Copy mbufs to sk buffer and then call tx interface */ for (i = 0; i < num; i++) { - kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(kni->pa[i]); len = kva->pkt_len; - data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + - kni->mbuf_kva; + data_kva = kva2data_kva(kva); + kni->va[i] = pa2va(kni->pa[i], kva); skb = dev_alloc_skb(len + 2); - if (skb == NULL) - KNI_ERR("Out of mem, dropping pkts\n"); - else { + if (skb) { /* Align IP on 16B boundary */ skb_reserve(skb, 2); memcpy(skb_put(skb, len), data_kva, len); @@ -349,7 +512,6 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) /* Simulate real usage, allocate/copy skb twice */ skb = dev_alloc_skb(len + 2); if (skb == NULL) { - KNI_ERR("Out of mem, dropping pkts\n"); kni->stats.rx_dropped++; continue; } @@ -370,9 +532,8 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) if (!kva->next) break; - kva = kva->next - kni->mbuf_va + kni->mbuf_kva; - data_kva = kva->buf_addr + kva->data_off - - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(va2pa(kva->next, kva)); + data_kva = kva2data_kva(kva); } } @@ -387,10 +548,10 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) } /* enqueue all the mbufs from rx_q into free_q */ - ret = kni_fifo_put(kni->free_q, (void **)&va, num); + ret = kni_fifo_put(kni->free_q, kni->va, num); if (ret != num) /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbufs into free_q\n"); + pr_err("Fail to enqueue mbufs into free_q\n"); } /* rx interface */ @@ -404,115 +565,19 @@ kni_net_rx(struct kni_dev *kni) (*kni_net_rx_func)(kni); } -/* - * Transmit a packet (called by the kernel) - */ -#ifdef RTE_KNI_VHOST -static int -kni_net_tx(struct sk_buff *skb, struct net_device *dev) -{ - struct kni_dev *kni = netdev_priv(dev); - - dev_kfree_skb(skb); - kni->stats.tx_dropped++; - - return NETDEV_TX_OK; -} -#else -static int -kni_net_tx(struct sk_buff *skb, struct net_device *dev) -{ - int len = 0; - unsigned ret; - struct kni_dev *kni = netdev_priv(dev); - struct rte_kni_mbuf *pkt_kva = NULL; - struct rte_kni_mbuf *pkt_va = NULL; - - /* save the timestamp */ -#ifdef HAVE_TRANS_START_HELPER - netif_trans_update(dev); -#else - dev->trans_start = jiffies; -#endif - - /* Check if the length of skb is less than mbuf size */ - if (skb->len > kni->mbuf_size) - goto drop; - - /** - * Check if it has at least one free entry in tx_q and - * one entry in alloc_q. - */ - if (kni_fifo_free_count(kni->tx_q) == 0 || - kni_fifo_count(kni->alloc_q) == 0) { - /** - * If no free entry in tx_q or no entry in alloc_q, - * drops skb and goes out. - */ - goto drop; - } - - /* dequeue a mbuf from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1); - if (likely(ret == 1)) { - void *data_kva; - - pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; - data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va - + kni->mbuf_kva; - - len = skb->len; - memcpy(data_kva, skb->data, len); - if (unlikely(len < ETH_ZLEN)) { - memset(data_kva + len, 0, ETH_ZLEN - len); - len = ETH_ZLEN; - } - pkt_kva->pkt_len = len; - pkt_kva->data_len = len; - - /* enqueue mbuf into tx_q */ - ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); - if (unlikely(ret != 1)) { - /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbuf into tx_q\n"); - goto drop; - } - } else { - /* Failing should not happen */ - KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); - goto drop; - } - - /* Free skb and update statistics */ - dev_kfree_skb(skb); - kni->stats.tx_bytes += len; - kni->stats.tx_packets++; - - return NETDEV_TX_OK; - -drop: - /* Free skb and update statistics */ - dev_kfree_skb(skb); - kni->stats.tx_dropped++; - - return NETDEV_TX_OK; -} -#endif - /* * Deal with a transmit timeout. */ static void -kni_net_tx_timeout (struct net_device *dev) +kni_net_tx_timeout(struct net_device *dev) { struct kni_dev *kni = netdev_priv(dev); - KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies, - jiffies - dev->trans_start); + pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies, + jiffies - dev_trans_start(dev)); kni->stats.tx_errors++; netif_wake_queue(dev); - return; } /* @@ -521,8 +586,8 @@ kni_net_tx_timeout (struct net_device *dev) static int kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - KNI_DBG("kni_net_ioctl %d\n", - ((struct kni_dev *)netdev_priv(dev))->group_id); + pr_debug("kni_net_ioctl group:%d cmd:%d\n", + ((struct kni_dev *)netdev_priv(dev))->group_id, cmd); return 0; } @@ -539,7 +604,7 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu) struct rte_kni_request req; struct kni_dev *kni = netdev_priv(dev); - KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu); + pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu); memset(&req, 0, sizeof(req)); req.req_id = RTE_KNI_REQ_CHANGE_MTU; @@ -561,55 +626,6 @@ kni_net_poll_resp(struct kni_dev *kni) wake_up_interruptible(&kni->wq); } -/* - * It can be called to process the request. - */ -static int -kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) -{ - int ret = -1; - void *resp_va; - unsigned num; - int ret_val; - - if (!kni || !req) { - KNI_ERR("No kni instance or request\n"); - return -EINVAL; - } - - mutex_lock(&kni->sync_lock); - - /* Construct data */ - memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request)); - num = kni_fifo_put(kni->req_q, &kni->sync_va, 1); - if (num < 1) { - KNI_ERR("Cannot send to req_q\n"); - ret = -EBUSY; - goto fail; - } - - ret_val = wait_event_interruptible_timeout(kni->wq, - kni_fifo_count(kni->resp_q), 3 * HZ); - if (signal_pending(current) || ret_val <= 0) { - ret = -ETIME; - goto fail; - } - num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1); - if (num != 1 || resp_va != kni->sync_va) { - /* This should never happen */ - KNI_ERR("No data in resp_q\n"); - ret = -ENODATA; - goto fail; - } - - memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); - ret = 0; - -fail: - mutex_unlock(&kni->sync_lock); - return ret; -} - /* * Return statistics to the caller */ @@ -617,6 +633,7 @@ static struct net_device_stats * kni_net_stats(struct net_device *dev) { struct kni_dev *kni = netdev_priv(dev); + return &kni->stats; } @@ -626,7 +643,7 @@ kni_net_stats(struct net_device *dev) static int kni_net_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, - const void *saddr, unsigned int len) + const void *saddr, uint32_t len) { struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); @@ -637,7 +654,6 @@ kni_net_header(struct sk_buff *skb, struct net_device *dev, return dev->hard_header_len; } - /* * Re-fill the eth header */ @@ -662,9 +678,11 @@ kni_net_rebuild_header(struct sk_buff *skb) * * Returns 0 on success, negative on failure **/ -static int kni_net_set_mac(struct net_device *netdev, void *p) +static int +kni_net_set_mac(struct net_device *netdev, void *p) { struct sockaddr *addr = p; + if (!is_valid_ether_addr((unsigned char *)(addr->sa_data))) return -EADDRNOTAVAIL; memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); @@ -672,7 +690,8 @@ static int kni_net_set_mac(struct net_device *netdev, void *p) } #ifdef HAVE_CHANGE_CARRIER_CB -static int kni_net_change_carrier(struct net_device *dev, bool new_carrier) +static int +kni_net_change_carrier(struct net_device *dev, bool new_carrier) { if (new_carrier) netif_carrier_on(dev); @@ -711,8 +730,6 @@ kni_net_init(struct net_device *dev) { struct kni_dev *kni = netdev_priv(dev); - KNI_DBG("kni_net_init\n"); - init_waitqueue_head(&kni->wq); mutex_init(&kni->sync_lock); @@ -726,18 +743,18 @@ void kni_net_config_lo_mode(char *lo_str) { if (!lo_str) { - KNI_PRINT("loopback disabled"); + pr_debug("loopback disabled"); return; } if (!strcmp(lo_str, "lo_mode_none")) - KNI_PRINT("loopback disabled"); + pr_debug("loopback disabled"); else if (!strcmp(lo_str, "lo_mode_fifo")) { - KNI_PRINT("loopback mode=lo_mode_fifo enabled"); + pr_debug("loopback mode=lo_mode_fifo enabled"); kni_net_rx_func = kni_net_rx_lo_fifo; } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) { - KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled"); + pr_debug("loopback mode=lo_mode_fifo_skb enabled"); kni_net_rx_func = kni_net_rx_lo_fifo_skb; } else - KNI_PRINT("Incognizant parameter, loopback disabled"); + pr_debug("Incognizant parameter, loopback disabled"); } diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c index a3ca8499..f54c34b1 100644 --- a/src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c +++ b/src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "compat.h" #include "kni_dev.h" @@ -39,21 +40,12 @@ #define RX_BURST_SZ 4 -extern void put_unused_fd(unsigned int fd); - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0) -extern struct file* -sock_alloc_file(struct socket *sock, - int flags, const char *dname); - -extern int get_unused_fd_flags(unsigned flags); - -extern void fd_install(unsigned int fd, struct file *file); - +#ifdef HAVE_STATIC_SOCK_MAP_FD static int kni_sock_map_fd(struct socket *sock) { struct file *file; int fd = get_unused_fd_flags(0); + if (fd < 0) return fd; @@ -65,8 +57,6 @@ static int kni_sock_map_fd(struct socket *sock) fd_install(fd, file); return fd; } -#else -#define kni_sock_map_fd(s) sock_map_fd(s, 0) #endif static struct proto kni_raw_proto = { @@ -77,13 +67,13 @@ static struct proto kni_raw_proto = { static inline int kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, - unsigned offset, unsigned len) + uint32_t offset, uint32_t len) { struct rte_kni_mbuf *pkt_kva = NULL; struct rte_kni_mbuf *pkt_va = NULL; int ret; - KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n", + pr_debug("tx offset=%d, len=%d, iovlen=%d\n", #ifdef HAVE_IOV_ITER_MSGHDR offset, len, (int)m->msg_iter.iov->iov_len); #else @@ -110,7 +100,7 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; data_kva = pkt_kva->buf_addr + pkt_kva->data_off - - kni->mbuf_va + kni->mbuf_kva; + - kni->mbuf_va + kni->mbuf_kva; #ifdef HAVE_IOV_ITER_MSGHDR copy_from_iter(data_kva, len, &m->msg_iter); @@ -129,12 +119,12 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); if (unlikely(ret != 1)) { /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbuf into tx_q\n"); + pr_err("Fail to enqueue mbuf into tx_q\n"); goto drop; } } else { /* Failing should not happen */ - KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); + pr_err("Fail to dequeue mbuf from alloc_q\n"); goto drop; } @@ -153,12 +143,12 @@ drop: static inline int kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, - unsigned offset, unsigned len) + uint32_t offset, uint32_t len) { uint32_t pkt_len; struct rte_kni_mbuf *kva; struct rte_kni_mbuf *va; - void * data_kva; + void *data_kva; struct sk_buff *skb; struct kni_vhost_queue *q = kni->vhost_queue; @@ -173,19 +163,19 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, if (unlikely(skb == NULL)) return 0; - kva = (struct rte_kni_mbuf*)skb->data; + kva = (struct rte_kni_mbuf *)skb->data; /* free skb to cache */ skb->data = NULL; - if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1))) + if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1)) /* Failing should not happen */ - KNI_ERR("Fail to enqueue entries into rx cache fifo\n"); + pr_err("Fail to enqueue entries into rx cache fifo\n"); pkt_len = kva->data_len; if (unlikely(pkt_len > len)) goto drop; - KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", + pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", #ifdef HAVE_IOV_ITER_MSGHDR offset, len, pkt_len, (int)m->msg_iter.iov->iov_len); #else @@ -205,12 +195,12 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, kni->stats.rx_packets++; /* enqueue mbufs into free_q */ - va = (void*)kva - kni->mbuf_kva + kni->mbuf_va; - if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1))) + va = (void *)kva - kni->mbuf_kva + kni->mbuf_va; + if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1)) /* Failing should not happen */ - KNI_ERR("Fail to enqueue entries into free_q\n"); + pr_err("Fail to enqueue entries into free_q\n"); - KNI_DBG_RX("receive done %d\n", pkt_len); + pr_debug("receive done %d\n", pkt_len); return pkt_len; @@ -221,29 +211,25 @@ drop: return 0; } -static unsigned int -kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait) +static uint32_t +kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct kni_vhost_queue *q = container_of(sock->sk, struct kni_vhost_queue, sk); struct kni_dev *kni; - unsigned int mask = 0; + uint32_t mask = 0; if (unlikely(q == NULL || q->kni == NULL)) return POLLERR; kni = q->kni; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) - KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n", +#ifdef HAVE_SOCKET_WQ + pr_debug("start kni_poll on group %d, wq 0x%16llx\n", kni->group_id, (uint64_t)sock->wq); -#else - KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n", - kni->group_id, (uint64_t)&sock->wait); -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) poll_wait(file, &sock->wq->wait, wait); #else + pr_debug("start kni_poll on group %d, wait at 0x%16llx\n", + kni->group_id, (uint64_t)&sock->wait); poll_wait(file, &sock->wait, wait); #endif @@ -252,11 +238,12 @@ kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait) if (sock_writeable(&q->sk) || #ifdef SOCKWQ_ASYNC_NOSPACE - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && + sock_writeable(&q->sk))) #else - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && + (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && + sock_writeable(&q->sk))) #endif - sock_writeable(&q->sk))) mask |= POLLOUT | POLLWRNORM; return mask; @@ -269,7 +256,7 @@ kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q, struct rte_kni_mbuf *kva; kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva; - (skb)->data = (unsigned char*)kva; + (skb)->data = (unsigned char *)kva; (skb)->len = kva->data_len; skb_queue_tail(&q->sk.sk_receive_queue, skb); } @@ -279,6 +266,7 @@ kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q, struct sk_buff **skb, struct rte_kni_mbuf **va) { int i; + for (i = 0; i < RX_BURST_SZ; skb++, va++, i++) kni_vhost_enqueue(kni, q, *skb, *va); } @@ -287,9 +275,9 @@ int kni_chk_vhost_rx(struct kni_dev *kni) { struct kni_vhost_queue *q = kni->vhost_queue; - unsigned nb_in, nb_mbuf, nb_skb; - const unsigned BURST_MASK = RX_BURST_SZ - 1; - unsigned nb_burst, nb_backlog, i; + uint32_t nb_in, nb_mbuf, nb_skb; + const uint32_t BURST_MASK = RX_BURST_SZ - 1; + uint32_t nb_burst, nb_backlog, i; struct sk_buff *skb[RX_BURST_SZ]; struct rte_kni_mbuf *va[RX_BURST_SZ]; @@ -305,20 +293,18 @@ kni_chk_vhost_rx(struct kni_dev *kni) nb_mbuf = kni_fifo_count(kni->rx_q); nb_in = min(nb_mbuf, nb_skb); - nb_in = min(nb_in, (unsigned)RX_BURST_SZ); + nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ); nb_burst = (nb_in & ~BURST_MASK); nb_backlog = (nb_in & BURST_MASK); /* enqueue skb_queue per BURST_SIZE bulk */ - if (0 != nb_burst) { - if (unlikely(RX_BURST_SZ != kni_fifo_get( - kni->rx_q, (void **)&va, - RX_BURST_SZ))) + if (nb_burst != 0) { + if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ) + != RX_BURST_SZ)) goto except; - if (unlikely(RX_BURST_SZ != kni_fifo_get( - q->fifo, (void **)&skb, - RX_BURST_SZ))) + if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ) + != RX_BURST_SZ)) goto except; kni_vhost_enqueue_burst(kni, q, skb, va); @@ -326,12 +312,10 @@ kni_chk_vhost_rx(struct kni_dev *kni) /* all leftover, do one by one */ for (i = 0; i < nb_backlog; ++i) { - if (unlikely(1 != kni_fifo_get( - kni->rx_q,(void **)&va, 1))) + if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1)) goto except; - if (unlikely(1 != kni_fifo_get( - q->fifo, (void **)&skb, 1))) + if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1)) goto except; kni_vhost_enqueue(kni, q, *skb, *va); @@ -342,7 +326,7 @@ kni_chk_vhost_rx(struct kni_dev *kni) ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) { wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); - KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", + pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", nb_mbuf, nb_skb, nb_in); } @@ -350,7 +334,7 @@ kni_chk_vhost_rx(struct kni_dev *kni) except: /* Failing should not happen */ - KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n"); + pr_err("Fail to enqueue fifo, it shouldn't happen\n"); BUG_ON(1); return 0; @@ -373,7 +357,7 @@ kni_sock_sndmsg(struct socket *sock, if (unlikely(q == NULL || q->kni == NULL)) return 0; - KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", + pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", #ifdef HAVE_IOV_ITER_MSGHDR len, q->flags, (int)m->msg_iter.iov->iov_len); #else @@ -420,13 +404,14 @@ kni_sock_rcvmsg(struct socket *sock, #ifdef RTE_KNI_VHOST_VNET_HDR_EN if (likely(q->flags & IFF_VNET_HDR)) { vnet_hdr_len = q->vnet_hdr_sz; - if ((len -= vnet_hdr_len) < 0) + len -= vnet_hdr_len; + if (len < 0) return -EINVAL; } #endif - if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni, - m, vnet_hdr_len, len)))) + pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len); + if (unlikely(pkt_len == 0)) return 0; #ifdef RTE_KNI_VHOST_VNET_HDR_EN @@ -440,7 +425,7 @@ kni_sock_rcvmsg(struct socket *sock, #endif /* HAVE_IOV_ITER_MSGHDR */ return -EFAULT; #endif /* RTE_KNI_VHOST_VNET_HDR_EN */ - KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", + pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", (unsigned long)len, q->flags, pkt_len); return pkt_len + vnet_hdr_len; @@ -448,25 +433,24 @@ kni_sock_rcvmsg(struct socket *sock, /* dummy tap like ioctl */ static int -kni_sock_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) +kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct ifreq __user *ifr = argp; - unsigned int __user *up = argp; + uint32_t __user *up = argp; struct kni_vhost_queue *q = container_of(sock->sk, struct kni_vhost_queue, sk); struct kni_dev *kni; - unsigned int u; + uint32_t u; int __user *sp = argp; int s; int ret; - KNI_DBG("tap ioctl cmd 0x%08x\n", cmd); + pr_debug("tap ioctl cmd 0x%08x\n", cmd); switch (cmd) { case TUNSETIFF: - KNI_DBG("TUNSETIFF\n"); + pr_debug("TUNSETIFF\n"); /* ignore the name, just look at flags */ if (get_user(u, &ifr->ifr_flags)) return -EFAULT; @@ -480,7 +464,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, return ret; case TUNGETIFF: - KNI_DBG("TUNGETIFF\n"); + pr_debug("TUNGETIFF\n"); rcu_read_lock_bh(); kni = rcu_dereference_bh(q->kni); if (kni) @@ -491,14 +475,14 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, return -ENOLINK; ret = 0; - if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) || - put_user(q->flags, &ifr->ifr_flags)) + if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) + || put_user(q->flags, &ifr->ifr_flags)) ret = -EFAULT; dev_put(kni->net_dev); return ret; case TUNGETFEATURES: - KNI_DBG("TUNGETFEATURES\n"); + pr_debug("TUNGETFEATURES\n"); u = IFF_TAP | IFF_NO_PI; #ifdef RTE_KNI_VHOST_VNET_HDR_EN u |= IFF_VNET_HDR; @@ -508,7 +492,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, return 0; case TUNSETSNDBUF: - KNI_DBG("TUNSETSNDBUF\n"); + pr_debug("TUNSETSNDBUF\n"); if (get_user(u, up)) return -EFAULT; @@ -519,7 +503,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, s = q->vnet_hdr_sz; if (put_user(s, sp)) return -EFAULT; - KNI_DBG("TUNGETVNETHDRSZ %d\n", s); + pr_debug("TUNGETVNETHDRSZ %d\n", s); return 0; case TUNSETVNETHDRSZ: @@ -528,12 +512,12 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, if (s < (int)sizeof(struct virtio_net_hdr)) return -EINVAL; - KNI_DBG("TUNSETVNETHDRSZ %d\n", s); + pr_debug("TUNSETVNETHDRSZ %d\n", s); q->vnet_hdr_sz = s; return 0; case TUNSETOFFLOAD: - KNI_DBG("TUNSETOFFLOAD %lx\n", arg); + pr_debug("TUNSETOFFLOAD %lx\n", arg); #ifdef RTE_KNI_VHOST_VNET_HDR_EN /* not support any offload yet */ if (!(q->flags & IFF_VNET_HDR)) @@ -545,26 +529,26 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, #endif default: - KNI_DBG("NOT SUPPORT\n"); + pr_debug("NOT SUPPORT\n"); return -EINVAL; } } static int -kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd, +kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg) { /* 32 bits app on 64 bits OS to be supported later */ - KNI_PRINT("Not implemented.\n"); + pr_debug("Not implemented.\n"); return -EINVAL; } #define KNI_VHOST_WAIT_WQ_SAFE() \ -do { \ +do { \ while ((BE_FINISH | BE_STOP) == kni->vq_status) \ - msleep(1); \ -}while(0) \ + msleep(1); \ +} while (0) \ static int @@ -577,7 +561,8 @@ kni_sock_release(struct socket *sock) if (q == NULL) return 0; - if (NULL != (kni = q->kni)) { + kni = q->kni; + if (kni != NULL) { kni->vq_status = BE_STOP; KNI_VHOST_WAIT_WQ_SAFE(); kni->vhost_queue = NULL; @@ -592,18 +577,17 @@ kni_sock_release(struct socket *sock) sock_put(&q->sk); - KNI_DBG("dummy sock release done\n"); + pr_debug("dummy sock release done\n"); return 0; } int -kni_sock_getname (struct socket *sock, - struct sockaddr *addr, - int *sockaddr_len, int peer) +kni_sock_getname(struct socket *sock, struct sockaddr *addr, + int *sockaddr_len, int peer) { - KNI_DBG("dummy sock getname\n"); - ((struct sockaddr_ll*)addr)->sll_family = AF_PACKET; + pr_debug("dummy sock getname\n"); + ((struct sockaddr_ll *)addr)->sll_family = AF_PACKET; return 0; } @@ -646,7 +630,7 @@ kni_sk_destruct(struct sock *sk) /* make sure there's no packet in buffer */ while (skb_dequeue(&sk->sk_receive_queue) != NULL) - ; + ; mb(); @@ -673,7 +657,7 @@ kni_vhost_backend_init(struct kni_dev *kni) if (kni->vhost_queue != NULL) return -1; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +#ifdef HAVE_SK_ALLOC_KERN_PARAM q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &kni_raw_proto, 0); #else @@ -694,8 +678,9 @@ kni_vhost_backend_init(struct kni_dev *kni) } /* cache init */ - q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), - GFP_KERNEL); + q->cache = kzalloc( + RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), + GFP_KERNEL); if (!q->cache) goto free_fd; @@ -708,7 +693,7 @@ kni_vhost_backend_init(struct kni_dev *kni) for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) { elem = &q->cache[i]; - kni_fifo_put(fifo, (void**)&elem, 1); + kni_fifo_put(fifo, (void **)&elem, 1); } q->fifo = fifo; @@ -738,14 +723,12 @@ kni_vhost_backend_init(struct kni_dev *kni) kni->vq_status = BE_START; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) - KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx," - "sk->sk_wq=0x%16llx", +#ifdef HAVE_SOCKET_WQ + pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx", q->sockfd, (uint64_t)q->sock->wq, (uint64_t)q->sk.sk_wq); #else - KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx," - "sk->sk_sleep=0x%16llx", + pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx", q->sockfd, (uint64_t)&q->sock->wait, (uint64_t)q->sk.sk_sleep); #endif @@ -768,7 +751,7 @@ free_sock: q->sock = NULL; free_sk: - sk_free((struct sock*)q); + sk_free((struct sock *)q); return err; } @@ -781,6 +764,7 @@ show_sock_fd(struct device *dev, struct device_attribute *attr, struct net_device *net_dev = container_of(dev, struct net_device, dev); struct kni_dev *kni = netdev_priv(net_dev); int sockfd = -1; + if (kni->vhost_queue != NULL) sockfd = kni->vhost_queue->sockfd; return snprintf(buf, 10, "%d\n", sockfd); @@ -792,6 +776,7 @@ show_sock_en(struct device *dev, struct device_attribute *attr, { struct net_device *net_dev = container_of(dev, struct net_device, dev); struct kni_dev *kni = netdev_priv(net_dev); + return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1)); } @@ -804,7 +789,7 @@ set_sock_en(struct device *dev, struct device_attribute *attr, unsigned long en; int err = 0; - if (0 != kstrtoul(buf, 0, &en)) + if (kstrtoul(buf, 0, &en) != 0) return -EINVAL; if (en) @@ -818,7 +803,7 @@ static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en); static struct attribute *dev_attrs[] = { &dev_attr_sock_fd.attr, &dev_attr_sock_en.attr, - NULL, + NULL, }; static const struct attribute_group dev_attr_grp = { @@ -836,7 +821,7 @@ kni_vhost_backend_release(struct kni_dev *kni) /* dettach from kni */ q->kni = NULL; - KNI_DBG("release backend done\n"); + pr_debug("release backend done\n"); return 0; } @@ -851,7 +836,7 @@ kni_vhost_init(struct kni_dev *kni) kni->vq_status = BE_STOP; - KNI_DBG("kni_vhost_init done\n"); + pr_debug("kni_vhost_init done\n"); return 0; } diff --git a/src/dpdk/lib/librte_ether/rte_dev_info.h b/src/dpdk/lib/librte_ether/rte_dev_info.h index 574683d3..aab6d1a6 100644 --- a/src/dpdk/lib/librte_ether/rte_dev_info.h +++ b/src/dpdk/lib/librte_ether/rte_dev_info.h @@ -34,6 +34,8 @@ #ifndef _RTE_DEV_INFO_H_ #define _RTE_DEV_INFO_H_ +#include + /* * Placeholder for accessing device registers */ diff --git a/src/dpdk/lib/librte_ether/rte_eth_ctrl.h b/src/dpdk/lib/librte_ether/rte_eth_ctrl.h index 563e80f8..83869042 100644 --- a/src/dpdk/lib/librte_ether/rte_eth_ctrl.h +++ b/src/dpdk/lib/librte_ether/rte_eth_ctrl.h @@ -34,6 +34,10 @@ #ifndef _RTE_ETH_CTRL_H_ #define _RTE_ETH_CTRL_H_ +#include +#include +#include "rte_ether.h" + /** * @file * @@ -95,6 +99,7 @@ enum rte_filter_type { RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_HASH, RTE_ETH_FILTER_L2_TUNNEL, + RTE_ETH_FILTER_GENERIC, RTE_ETH_FILTER_MAX }; @@ -420,8 +425,6 @@ struct rte_eth_l2_flow { struct rte_eth_ipv4_flow { uint32_t src_ip; /**< IPv4 source address in big endian. */ uint32_t dst_ip; /**< IPv4 destination address in big endian. */ - // TREX_PATCH (ip_id) - uint16_t ip_id; /**< IPv4 IP ID to match */ uint8_t tos; /**< Type of service to match. */ uint8_t ttl; /**< Time to live to match. */ uint8_t proto; /**< Protocol, next header in big endian. */ @@ -464,8 +467,6 @@ struct rte_eth_ipv6_flow { uint8_t tc; /**< Traffic class to match. */ uint8_t proto; /**< Protocol, next header to match. */ uint8_t hop_limits; /**< Hop limits to match. */ - // TREX_PATCH (flow_label) - uint32_t flow_label; /** #include #include -#include #include #include #include @@ -72,6 +71,7 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; static struct rte_eth_dev_data *rte_eth_dev_data; +static uint8_t eth_dev_last_created_port; static uint8_t nb_ports; /* spinlock for eth device callbacks */ @@ -189,8 +189,23 @@ rte_eth_dev_find_free_port(void) return RTE_MAX_ETHPORTS; } +static struct rte_eth_dev * +eth_dev_get(uint8_t port_id) +{ + struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id]; + + eth_dev->data = &rte_eth_dev_data[port_id]; + eth_dev->attached = DEV_ATTACHED; + TAILQ_INIT(&(eth_dev->link_intr_cbs)); + + eth_dev_last_created_port = port_id; + nb_ports++; + + return eth_dev; +} + struct rte_eth_dev * -rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type) +rte_eth_dev_allocate(const char *name) { uint8_t port_id; struct rte_eth_dev *eth_dev; @@ -210,28 +225,44 @@ rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type) return NULL; } - eth_dev = &rte_eth_devices[port_id]; - eth_dev->data = &rte_eth_dev_data[port_id]; + memset(&rte_eth_dev_data[port_id], 0, sizeof(struct rte_eth_dev_data)); + eth_dev = eth_dev_get(port_id); snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name); eth_dev->data->port_id = port_id; - eth_dev->attached = DEV_ATTACHED; - eth_dev->dev_type = type; - nb_ports++; + eth_dev->data->mtu = ETHER_MTU; + return eth_dev; } -static int -rte_eth_dev_create_unique_device_name(char *name, size_t size, - struct rte_pci_device *pci_dev) +/* + * Attach to a port already registered by the primary process, which + * makes sure that the same device would have the same port id both + * in the primary and secondary process. + */ +static struct rte_eth_dev * +eth_dev_attach_secondary(const char *name) { - int ret; + uint8_t i; + struct rte_eth_dev *eth_dev; - ret = snprintf(name, size, "%d:%d.%d", - pci_dev->addr.bus, pci_dev->addr.devid, - pci_dev->addr.function); - if (ret < 0) - return ret; - return 0; + if (rte_eth_dev_data == NULL) + rte_eth_dev_data_alloc(); + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (strcmp(rte_eth_dev_data[i].name, name) == 0) + break; + } + if (i == RTE_MAX_ETHPORTS) { + RTE_PMD_DEBUG_TRACE( + "device %s is not driven by the primary process\n", + name); + return NULL; + } + + eth_dev = eth_dev_get(i); + RTE_ASSERT(eth_dev->data->port_id == i); + + return eth_dev; } int @@ -245,9 +276,9 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) return 0; } -static int -rte_eth_dev_init(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev) +int +rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) { struct eth_driver *eth_drv; struct rte_eth_dev *eth_dev; @@ -257,40 +288,43 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv, eth_drv = (struct eth_driver *)pci_drv; - /* Create unique Ethernet device name using PCI address */ - rte_eth_dev_create_unique_device_name(ethdev_name, - sizeof(ethdev_name), pci_dev); - - eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI); - if (eth_dev == NULL) - return -ENOMEM; + rte_eal_pci_device_name(&pci_dev->addr, ethdev_name, + sizeof(ethdev_name)); if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev = rte_eth_dev_allocate(ethdev_name); + if (eth_dev == NULL) + return -ENOMEM; + eth_dev->data->dev_private = rte_zmalloc("ethdev private structure", eth_drv->dev_private_size, RTE_CACHE_LINE_SIZE); if (eth_dev->data->dev_private == NULL) rte_panic("Cannot allocate memzone for private port data\n"); + } else { + eth_dev = eth_dev_attach_secondary(ethdev_name); + if (eth_dev == NULL) { + /* + * if we failed to attach a device, it means the + * device is skipped in primary process, due to + * some errors. If so, we return a positive value, + * to let EAL skip it for the secondary process + * as well. + */ + return 1; + } } - eth_dev->pci_dev = pci_dev; + eth_dev->device = &pci_dev->device; + eth_dev->intr_handle = &pci_dev->intr_handle; eth_dev->driver = eth_drv; - eth_dev->data->rx_mbuf_alloc_failed = 0; - - /* init user callbacks */ - TAILQ_INIT(&(eth_dev->link_intr_cbs)); - - /* - * Set the default MTU. - */ - eth_dev->data->mtu = ETHER_MTU; /* Invoke PMD device initialization function */ diag = (*eth_drv->eth_dev_init)(eth_dev); if (diag == 0) return 0; - RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%u device_id=0x%x) failed\n", - pci_drv->name, + RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%x device_id=0x%x) failed\n", + pci_drv->driver.name, (unsigned) pci_dev->id.vendor_id, (unsigned) pci_dev->id.device_id); if (rte_eal_process_type() == RTE_PROC_PRIMARY) @@ -299,8 +333,8 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv, return diag; } -static int -rte_eth_dev_uninit(struct rte_pci_device *pci_dev) +int +rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev) { const struct eth_driver *eth_drv; struct rte_eth_dev *eth_dev; @@ -310,9 +344,8 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev) if (pci_dev == NULL) return -EINVAL; - /* Create unique Ethernet device name using PCI address */ - rte_eth_dev_create_unique_device_name(ethdev_name, - sizeof(ethdev_name), pci_dev); + rte_eal_pci_device_name(&pci_dev->addr, ethdev_name, + sizeof(ethdev_name)); eth_dev = rte_eth_dev_allocated(ethdev_name); if (eth_dev == NULL) @@ -333,35 +366,13 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev) if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_free(eth_dev->data->dev_private); - eth_dev->pci_dev = NULL; + eth_dev->device = NULL; eth_dev->driver = NULL; eth_dev->data = NULL; return 0; } -/** - * Register an Ethernet [Poll Mode] driver. - * - * Function invoked by the initialization function of an Ethernet driver - * to simultaneously register itself as a PCI driver and as an Ethernet - * Poll Mode Driver. - * Invokes the rte_eal_pci_register() function to register the *pci_drv* - * structure embedded in the *eth_drv* structure, after having stored the - * address of the rte_eth_dev_init() function in the *devinit* field of - * the *pci_drv* structure. - * During the PCI probing phase, the rte_eth_dev_init() function is - * invoked for each PCI [Ethernet device] matching the embedded PCI - * identifiers provided by the driver. - */ -void -rte_eth_driver_register(struct eth_driver *eth_drv) -{ - eth_drv->pci_drv.devinit = rte_eth_dev_init; - eth_drv->pci_drv.devuninit = rte_eth_dev_uninit; - rte_eal_pci_register(ð_drv->pci_drv); -} - int rte_eth_dev_is_valid_port(uint8_t port_id) { @@ -385,27 +396,6 @@ rte_eth_dev_count(void) return nb_ports; } -static enum rte_eth_dev_type -rte_eth_dev_get_device_type(uint8_t port_id) -{ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, RTE_ETH_DEV_UNKNOWN); - return rte_eth_devices[port_id].dev_type; -} - -static int -rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr) -{ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - if (addr == NULL) { - RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); - return -EINVAL; - } - - *addr = rte_eth_devices[port_id].pci_dev->addr; - return 0; -} - int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) { @@ -435,6 +425,9 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) return -EINVAL; } + if (!nb_ports) + return -ENODEV; + *port_id = RTE_MAX_ETHPORTS; for (i = 0; i < RTE_MAX_ETHPORTS; i++) { @@ -450,35 +443,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) return -ENODEV; } -/* TREX_PATCH removed "static" */ -int -rte_eth_dev_get_port_by_addr(const struct rte_pci_addr *addr, uint8_t *port_id) -{ - int i; - struct rte_pci_device *pci_dev = NULL; - - if (addr == NULL) { - RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); - return -EINVAL; - } - - *port_id = RTE_MAX_ETHPORTS; - - for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - - pci_dev = rte_eth_devices[i].pci_dev; - - if (pci_dev && - !rte_eal_compare_pci_addr(&pci_dev->addr, addr)) { - - *port_id = i; - - return 0; - } - } - return -ENODEV; -} - static int rte_eth_dev_is_detachable(uint8_t port_id) { @@ -504,127 +468,49 @@ rte_eth_dev_is_detachable(uint8_t port_id) return 1; } -/* attach the new physical device, then store port_id of the device */ -static int -rte_eth_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id) +/* attach the new device, then store port_id of the device */ +int +rte_eth_dev_attach(const char *devargs, uint8_t *port_id) { - /* re-construct pci_device_list */ - if (rte_eal_pci_scan()) - goto err; - /* Invoke probe func of the driver can handle the new device. */ - if (rte_eal_pci_probe_one(addr)) - goto err; + int ret = -1; + int current = rte_eth_dev_count(); + char *name = NULL; + char *args = NULL; - if (rte_eth_dev_get_port_by_addr(addr, port_id)) + if ((devargs == NULL) || (port_id == NULL)) { + ret = -EINVAL; goto err; + } - return 0; -err: - return -1; -} - -/* detach the new physical device, then store pci_addr of the device */ -static int -rte_eth_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr) -{ - struct rte_pci_addr freed_addr; - struct rte_pci_addr vp; - - /* get pci address by port id */ - if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr)) + /* parse devargs, then retrieve device name and args */ + if (rte_eal_parse_devargs_str(devargs, &name, &args)) goto err; - /* Zeroed pci addr means the port comes from virtual device */ - vp.domain = vp.bus = vp.devid = vp.function = 0; - if (rte_eal_compare_pci_addr(&vp, &freed_addr) == 0) + ret = rte_eal_dev_attach(name, args); + if (ret < 0) goto err; - /* invoke devuninit func of the pci driver, - * also remove the device from pci_device_list */ - if (rte_eal_pci_detach(&freed_addr)) + /* no point looking at the port count if no port exists */ + if (!rte_eth_dev_count()) { + RTE_LOG(ERR, EAL, "No port found for device (%s)\n", name); + ret = -1; goto err; + } - *addr = freed_addr; - return 0; -err: - return -1; -} - -/* attach the new virtual device, then store port_id of the device */ -static int -rte_eth_dev_attach_vdev(const char *vdevargs, uint8_t *port_id) -{ - char *name = NULL, *args = NULL; - int ret = -1; - - /* parse vdevargs, then retrieve device name and args */ - if (rte_eal_parse_devargs_str(vdevargs, &name, &args)) - goto end; - - /* walk around dev_driver_list to find the driver of the device, - * then invoke probe function of the driver. - * rte_eal_vdev_init() updates port_id allocated after - * initialization. + /* if nothing happened, there is a bug here, since some driver told us + * it did attach a device, but did not create a port. */ - if (rte_eal_vdev_init(name, args)) - goto end; - - if (rte_eth_dev_get_port_by_name(name, port_id)) - goto end; - - ret = 0; -end: - free(name); - free(args); - - return ret; -} - -/* detach the new virtual device, then store the name of the device */ -static int -rte_eth_dev_detach_vdev(uint8_t port_id, char *vdevname) -{ - char name[RTE_ETH_NAME_MAX_LEN]; - - /* get device name by port id */ - if (rte_eth_dev_get_name_by_port(port_id, name)) - goto err; - /* walk around dev_driver_list to find the driver of the device, - * then invoke uninit function of the driver */ - if (rte_eal_vdev_uninit(name)) - goto err; - - strncpy(vdevname, name, sizeof(name)); - return 0; -err: - return -1; -} - -/* attach the new device, then store port_id of the device */ -int -rte_eth_dev_attach(const char *devargs, uint8_t *port_id) -{ - struct rte_pci_addr addr; - int ret = -1; - - if ((devargs == NULL) || (port_id == NULL)) { - ret = -EINVAL; + if (current == rte_eth_dev_count()) { + ret = -1; goto err; } - if (eal_parse_pci_DomBDF(devargs, &addr) == 0) { - ret = rte_eth_dev_attach_pdev(&addr, port_id); - if (ret < 0) - goto err; - } else { - ret = rte_eth_dev_attach_vdev(devargs, port_id); - if (ret < 0) - goto err; - } + *port_id = eth_dev_last_created_port; + ret = 0; - return 0; err: - RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n"); + free(name); + free(args); return ret; } @@ -632,7 +518,6 @@ err: int rte_eth_dev_detach(uint8_t port_id, char *name) { - struct rte_pci_addr addr; int ret = -1; if (name == NULL) { @@ -640,33 +525,19 @@ rte_eth_dev_detach(uint8_t port_id, char *name) goto err; } - /* check whether the driver supports detach feature, or not */ + /* FIXME: move this to eal, once device flags are relocated there */ if (rte_eth_dev_is_detachable(port_id)) goto err; - if (rte_eth_dev_get_device_type(port_id) == RTE_ETH_DEV_PCI) { - ret = rte_eth_dev_get_addr_by_port(port_id, &addr); - if (ret < 0) - goto err; - - ret = rte_eth_dev_detach_pdev(port_id, &addr); - if (ret < 0) - goto err; - - snprintf(name, RTE_ETH_NAME_MAX_LEN, - "%04x:%02x:%02x.%d", - addr.domain, addr.bus, - addr.devid, addr.function); - } else { - ret = rte_eth_dev_detach_vdev(port_id, name); - if (ret < 0) - goto err; - } + snprintf(name, sizeof(rte_eth_devices[port_id].data->name), + "%s", rte_eth_devices[port_id].data->name); + ret = rte_eal_dev_detach(name); + if (ret < 0) + goto err; return 0; err: - RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n"); return ret; } @@ -712,6 +583,9 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) for (i = nb_queues; i < old_nb_queues; i++) (*dev->dev_ops->rx_queue_release)(rxq[i]); + + rte_free(dev->data->rx_queues); + dev->data->rx_queues = NULL; } dev->data->nb_rx_queues = nb_queues; return 0; @@ -863,6 +737,9 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) for (i = nb_queues; i < old_nb_queues; i++) (*dev->dev_ops->tx_queue_release)(txq[i]); + + rte_free(dev->data->tx_queues); + dev->data->tx_queues = NULL; } dev->data->nb_tx_queues = nb_queues; return 0; @@ -1033,39 +910,61 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, return 0; } +void +_rte_eth_dev_reset(struct rte_eth_dev *dev) +{ + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow reset\n", + dev->data->port_id); + return; + } + + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + + memset(&dev->data->dev_conf, 0, sizeof(dev->data->dev_conf)); +} + static void rte_eth_dev_config_restore(uint8_t port_id) { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; - struct ether_addr addr; + struct ether_addr *addr; uint16_t i; uint32_t pool = 0; + uint64_t pool_mask; dev = &rte_eth_devices[port_id]; rte_eth_dev_info_get(port_id, &dev_info); - if (RTE_ETH_DEV_SRIOV(dev).active) - pool = RTE_ETH_DEV_SRIOV(dev).def_vmdq_idx; - - /* replay MAC address configuration */ - for (i = 0; i < dev_info.max_mac_addrs; i++) { - addr = dev->data->mac_addrs[i]; - - /* skip zero address */ - if (is_zero_ether_addr(&addr)) - continue; - - /* add address to the hardware */ - if (*dev->dev_ops->mac_addr_add && - (dev->data->mac_pool_sel[i] & (1ULL << pool))) - (*dev->dev_ops->mac_addr_add)(dev, &addr, i, pool); - else { - RTE_PMD_DEBUG_TRACE("port %d: MAC address array not supported\n", - port_id); - /* exit the loop but not return an error */ - break; + /* replay MAC address configuration including default MAC */ + addr = &dev->data->mac_addrs[0]; + if (*dev->dev_ops->mac_addr_set != NULL) + (*dev->dev_ops->mac_addr_set)(dev, addr); + else if (*dev->dev_ops->mac_addr_add != NULL) + (*dev->dev_ops->mac_addr_add)(dev, addr, 0, pool); + + if (*dev->dev_ops->mac_addr_add != NULL) { + for (i = 1; i < dev_info.max_mac_addrs; i++) { + addr = &dev->data->mac_addrs[i]; + + /* skip zero address */ + if (is_zero_ether_addr(addr)) + continue; + + pool = 0; + pool_mask = dev->data->mac_pool_sel[i]; + + do { + if (pool_mask & 1ULL) + (*dev->dev_ops->mac_addr_add)(dev, + addr, i, pool); + pool_mask >>= 1; + pool++; + } while (pool_mask); } } @@ -1191,6 +1090,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, uint32_t mbp_buf_size; struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; + void **rxq; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -1249,6 +1149,14 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, return -EINVAL; } + rxq = dev->data->rx_queues; + if (rxq[rx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, + -ENOTSUP); + (*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]); + rxq[rx_queue_id] = NULL; + } + if (rx_conf == NULL) rx_conf = &dev_info.default_rxconf; @@ -1270,6 +1178,7 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; + void **txq; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -1302,6 +1211,14 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, return -EINVAL; } + txq = dev->data->tx_queues; + if (txq[tx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, + -ENOTSUP); + (*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]); + txq[tx_queue_id] = NULL; + } + if (tx_conf == NULL) tx_conf = &dev_info.default_txconf; @@ -1480,54 +1397,6 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link) } } -// TREX_PATCH -// return in stats, statistics starting from start, for len counters. -int -rte_eth_fdir_stats_get(uint8_t port_id, uint32_t *stats, uint32_t start, uint32_t len) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - // Only xl710 support this - i40e_trex_fdir_stats_get(dev, stats, start, len); - - return 0; -} - -// TREX_PATCH -// zero statistics counters, starting from start, for len counters. -int -rte_eth_fdir_stats_reset(uint8_t port_id, uint32_t *stats, uint32_t start, uint32_t len) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - // Only xl710 support this - i40e_trex_fdir_stats_reset(dev, stats, start, len); - - return 0; -} - -// TREX_PATCH -int -rte_eth_get_fw_ver(int port_id, uint32_t *version) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - // Only xl710 support this - return i40e_trex_get_fw_ver(dev, version); -} - int rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats) { @@ -1572,8 +1441,10 @@ get_xstats_count(uint8_t port_id) } else count = 0; count += RTE_NB_STATS; - count += dev->data->nb_rx_queues * RTE_NB_RXQ_STATS; - count += dev->data->nb_tx_queues * RTE_NB_TXQ_STATS; + count += RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) * + RTE_NB_RXQ_STATS; + count += RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) * + RTE_NB_TXQ_STATS; return count; } @@ -1587,6 +1458,7 @@ rte_eth_xstats_get_names(uint8_t port_id, int cnt_expected_entries; int cnt_driver_entries; uint32_t idx, id_queue; + uint16_t num_q; cnt_expected_entries = get_xstats_count(port_id); if (xstats_names == NULL || cnt_expected_entries < 0 || @@ -1603,7 +1475,8 @@ rte_eth_xstats_get_names(uint8_t port_id, "%s", rte_stats_strings[idx].name); cnt_used_entries++; } - for (id_queue = 0; id_queue < dev->data->nb_rx_queues; id_queue++) { + num_q = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) { snprintf(xstats_names[cnt_used_entries].name, sizeof(xstats_names[0].name), @@ -1613,7 +1486,8 @@ rte_eth_xstats_get_names(uint8_t port_id, } } - for (id_queue = 0; id_queue < dev->data->nb_tx_queues; id_queue++) { + num_q = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) { snprintf(xstats_names[cnt_used_entries].name, sizeof(xstats_names[0].name), @@ -1649,14 +1523,18 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, unsigned count = 0, i, q; signed xcount = 0; uint64_t val, *stats_ptr; + uint16_t nb_rxqs, nb_txqs; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); dev = &rte_eth_devices[port_id]; + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + /* Return generic statistics */ - count = RTE_NB_STATS + (dev->data->nb_rx_queues * RTE_NB_RXQ_STATS) + - (dev->data->nb_tx_queues * RTE_NB_TXQ_STATS); + count = RTE_NB_STATS + (nb_rxqs * RTE_NB_RXQ_STATS) + + (nb_txqs * RTE_NB_TXQ_STATS); /* implemented by the driver */ if (dev->dev_ops->xstats_get != NULL) { @@ -1687,7 +1565,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, } /* per-rxq stats */ - for (q = 0; q < dev->data->nb_rx_queues; q++) { + for (q = 0; q < nb_rxqs; q++) { for (i = 0; i < RTE_NB_RXQ_STATS; i++) { stats_ptr = RTE_PTR_ADD(ð_stats, rte_rxq_stats_strings[i].offset + @@ -1698,7 +1576,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, } /* per-txq stats */ - for (q = 0; q < dev->data->nb_tx_queues; q++) { + for (q = 0; q < nb_txqs; q++) { for (i = 0; i < RTE_NB_TXQ_STATS; i++) { stats_ptr = RTE_PTR_ADD(ð_stats, rte_txq_stats_strings[i].offset + @@ -1708,8 +1586,11 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, } } - for (i = 0; i < count + xcount; i++) + for (i = 0; i < count; i++) xstats[i].id = i; + /* add an offset to driver-specific stats */ + for ( ; i < count + xcount; i++) + xstats[i].id += count; return count + xcount; } @@ -1766,6 +1647,18 @@ rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id, STAT_QMAP_RX); } +int +rte_eth_dev_fw_version_get(uint8_t port_id, char *fw_version, size_t fw_size) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fw_version_get, -ENOTSUP); + return (*dev->dev_ops->fw_version_get)(dev, fw_version, fw_size); +} + void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) { @@ -1785,7 +1678,6 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); (*dev->dev_ops->dev_infos_get)(dev, dev_info); - dev_info->pci_dev = dev->pci_dev; dev_info->driver_name = dev->data->drv_name; dev_info->nb_rx_queues = dev->data->nb_rx_queues; dev_info->nb_tx_queues = dev->data->nb_tx_queues; @@ -2354,32 +2246,6 @@ rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr) return 0; } -int -rte_eth_dev_set_vf_rxmode(uint8_t port_id, uint16_t vf, - uint16_t rx_mode, uint8_t on) -{ - uint16_t num_vfs; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - - num_vfs = dev_info.max_vfs; - if (vf > num_vfs) { - RTE_PMD_DEBUG_TRACE("set VF RX mode:invalid VF id %d\n", vf); - return -EINVAL; - } - - if (rx_mode == 0) { - RTE_PMD_DEBUG_TRACE("set VF RX mode:mode mask ca not be zero\n"); - return -EINVAL; - } - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx_mode, -ENOTSUP); - return (*dev->dev_ops->set_vf_rx_mode)(dev, vf, rx_mode, on); -} /* * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find @@ -2469,76 +2335,6 @@ rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on) return (*dev->dev_ops->uc_all_hash_table_set)(dev, on); } -int -rte_eth_dev_set_vf_rx(uint8_t port_id, uint16_t vf, uint8_t on) -{ - uint16_t num_vfs; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - - num_vfs = dev_info.max_vfs; - if (vf > num_vfs) { - RTE_PMD_DEBUG_TRACE("port %d: invalid vf id\n", port_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx, -ENOTSUP); - return (*dev->dev_ops->set_vf_rx)(dev, vf, on); -} - -int -rte_eth_dev_set_vf_tx(uint8_t port_id, uint16_t vf, uint8_t on) -{ - uint16_t num_vfs; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - - num_vfs = dev_info.max_vfs; - if (vf > num_vfs) { - RTE_PMD_DEBUG_TRACE("set pool tx:invalid pool id=%d\n", vf); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_tx, -ENOTSUP); - return (*dev->dev_ops->set_vf_tx)(dev, vf, on); -} - -int -rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id, - uint64_t vf_mask, uint8_t vlan_on) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - if (vlan_id > ETHER_MAX_VLAN_ID) { - RTE_PMD_DEBUG_TRACE("VF VLAN filter:invalid VLAN id=%d\n", - vlan_id); - return -EINVAL; - } - - if (vf_mask == 0) { - RTE_PMD_DEBUG_TRACE("VF VLAN filter:pool_mask can not be 0\n"); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_vlan_filter, -ENOTSUP); - return (*dev->dev_ops->set_vf_vlan_filter)(dev, vlan_id, - vf_mask, vlan_on); -} - int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, uint16_t tx_rate) { @@ -2569,45 +2365,12 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate); } -int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate, - uint64_t q_msk) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - struct rte_eth_link link; - - if (q_msk == 0) - return 0; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - link = dev->data->dev_link; - - if (vf > dev_info.max_vfs) { - RTE_PMD_DEBUG_TRACE("set VF rate limit:port %d: " - "invalid vf id=%d\n", port_id, vf); - return -EINVAL; - } - - if (tx_rate > link.link_speed) { - RTE_PMD_DEBUG_TRACE("set VF rate limit:invalid tx_rate=%d, " - "bigger than link speed= %d\n", - tx_rate, link.link_speed); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rate_limit, -ENOTSUP); - return (*dev->dev_ops->set_vf_rate_limit)(dev, vf, tx_rate, q_msk); -} - int rte_eth_mirror_rule_set(uint8_t port_id, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, uint8_t on) { - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_dev *dev; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); if (mirror_conf->rule_type == 0) { @@ -2643,7 +2406,7 @@ rte_eth_mirror_rule_set(uint8_t port_id, int rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id) { - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_dev *dev; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); @@ -2678,14 +2441,15 @@ rte_eth_dev_callback_register(uint8_t port_id, } /* create a new callback. */ - if (user_cb == NULL) + if (user_cb == NULL) { user_cb = rte_zmalloc("INTR_USER_CALLBACK", sizeof(struct rte_eth_dev_callback), 0); - if (user_cb != NULL) { - user_cb->cb_fn = cb_fn; - user_cb->cb_arg = cb_arg; - user_cb->event = event; - TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next); + if (user_cb != NULL) { + user_cb->cb_fn = cb_fn; + user_cb->cb_arg = cb_arg; + user_cb->event = event; + TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next); + } } rte_spinlock_unlock(&rte_eth_dev_cb_lock); @@ -2737,7 +2501,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id, void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event) + enum rte_eth_event_type event, void *cb_arg) { struct rte_eth_dev_callback *cb_lst; struct rte_eth_dev_callback dev_cb; @@ -2748,6 +2512,9 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev, continue; dev_cb = *cb_lst; cb_lst->active = 1; + if (cb_arg != NULL) + dev_cb.cb_arg = (void *) cb_arg; + rte_spinlock_unlock(&rte_eth_dev_cb_lock); dev_cb.cb_fn(dev->data->port_id, dev_cb.event, dev_cb.cb_arg); @@ -2769,7 +2536,13 @@ rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); dev = &rte_eth_devices[port_id]; - intr_handle = &dev->pci_dev->intr_handle; + + if (!dev->intr_handle) { + RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; if (!intr_handle->intr_vec) { RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); return -EPERM; @@ -2797,7 +2570,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->driver->pci_drv.name, ring_name, + dev->data->drv_name, ring_name, dev->data->port_id, queue_id); mz = rte_memzone_lookup(z_name); @@ -2829,7 +2602,12 @@ rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, return -EINVAL; } - intr_handle = &dev->pci_dev->intr_handle; + if (!dev->intr_handle) { + RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; if (!intr_handle->intr_vec) { RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); return -EPERM; @@ -3431,15 +3209,15 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_de return; } + eth_dev->intr_handle = &pci_dev->intr_handle; + eth_dev->data->dev_flags = 0; if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_DETACHABLE) - eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; eth_dev->data->kdrv = pci_dev->kdrv; - eth_dev->data->numa_node = pci_dev->numa_node; - eth_dev->data->drv_name = pci_dev->driver->name; + eth_dev->data->numa_node = pci_dev->device.numa_node; + eth_dev->data->drv_name = pci_dev->driver->driver.name; } int diff --git a/src/dpdk/lib/librte_ether/rte_ethdev.h b/src/dpdk/lib/librte_ether/rte_ethdev.h index 5339d3be..c17bbda8 100644 --- a/src/dpdk/lib/librte_ether/rte_ethdev.h +++ b/src/dpdk/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ extern "C" { #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -190,6 +191,9 @@ struct rte_mbuf; /** * A structure used to retrieve statistics for an Ethernet port. + * Not all statistics fields in struct rte_eth_stats are supported + * by any type of network interface card (NIC). If any statistics + * field is not supported, its value is 0. */ struct rte_eth_stats { uint64_t ipackets; /**< Total number of successfully received packets. */ @@ -198,7 +202,7 @@ struct rte_eth_stats { uint64_t obytes; /**< Total number of successfully transmitted bytes. */ uint64_t imissed; /**< Total of RX packets dropped by the HW, - * because there are no available mbufs (i.e. RX queues are full). + * because there are no available buffer (i.e. RX queues are full). */ uint64_t ierrors; /**< Total number of erroneous received packets. */ uint64_t oerrors; /**< Total number of failed transmitted packets. */ @@ -255,6 +259,7 @@ struct rte_eth_stats { /** * A structure used to retrieve link-level information of an Ethernet port. */ +__extension__ struct rte_eth_link { uint32_t link_speed; /**< ETH_SPEED_NUM_ */ uint16_t link_duplex : 1; /**< ETH_LINK_[HALF/FULL]_DUPLEX */ @@ -346,6 +351,7 @@ struct rte_eth_rxmode { enum rte_eth_rx_mq_mode mq_mode; uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */ uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/ + __extension__ uint16_t header_split : 1, /**< Header Split enable. */ hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */ hw_vlan_filter : 1, /**< VLAN filter enable. */ @@ -645,6 +651,7 @@ struct rte_eth_txmode { /* For i40e specifically */ uint16_t pvid; + __extension__ uint8_t hw_vlan_reject_tagged : 1, /**< If set, reject sending out tagged pkts */ hw_vlan_reject_untagged : 1, @@ -696,6 +703,29 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + + /** + * Max allowed number of segments per whole packet. + * + * - For TSO packet this is the total number of data descriptors allowed + * by device. + * + * @see nb_mtu_seg_max + */ + uint16_t nb_seg_max; + + /** + * Max number of segments per one MTU. + * + * - For non-TSO packet, this is the maximum allowed number of segments + * in a single transmit packet. + * + * - For TSO packet each segment within the TSO may span up to this + * value. + * + * @see nb_seg_max + */ + uint16_t nb_mtu_seg_max; }; /** @@ -767,8 +797,6 @@ struct rte_fdir_conf { struct rte_eth_fdir_masks mask; struct rte_eth_fdir_flex_conf flex_conf; /**< Flex payload configuration. */ - // TREX_PATCH - uint8_t flexbytes_offset; }; /** @@ -853,6 +881,7 @@ struct rte_eth_conf { #define DEV_RX_OFFLOAD_TCP_LRO 0x00000010 #define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020 #define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040 +#define DEV_RX_OFFLOAD_MACSEC_STRIP 0x00000080 /** * TX offload capabilities of a device. @@ -866,6 +895,11 @@ struct rte_eth_conf { #define DEV_TX_OFFLOAD_UDP_TSO 0x00000040 #define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */ #define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100 +#define DEV_TX_OFFLOAD_VXLAN_TNL_TSO 0x00000200 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GRE_TNL_TSO 0x00000400 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_MACSEC_INSERT 0x00002000 /** * Ethernet device information @@ -930,23 +964,26 @@ struct rte_eth_txq_info { /** * An Ethernet device extended statistic structure * - * This structure is used by ethdev->eth_xstats_get() to provide - * statistics that are not provided in the generic rte_eth_stats + * This structure is used by rte_eth_xstats_get() to provide + * statistics that are not provided in the generic *rte_eth_stats* * structure. + * It maps a name id, corresponding to an index in the array returned + * by rte_eth_xstats_get_names(), to a statistic value. */ struct rte_eth_xstat { - uint64_t id; - uint64_t value; + uint64_t id; /**< The index in xstats name array. */ + uint64_t value; /**< The statistic counter value. */ }; /** - * A name-key lookup element for extended statistics. + * A name element for extended statistics. * - * This structure is used to map between names and ID numbers - * for extended ethernet statistics. + * An array of this structure is returned by rte_eth_xstats_get_names(). + * It lists the names of extended statistics for a PMD. The *rte_eth_xstat* + * structure references these names by their array index. */ struct rte_eth_xstat_name { - char name[RTE_ETH_XSTATS_NAME_SIZE]; + char name[RTE_ETH_XSTATS_NAME_SIZE]; /**< The statistic name. */ }; #define ETH_DCB_NUM_TCS 8 @@ -1142,6 +1179,10 @@ typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev, typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset); /**< @internal Check DD bit of specific RX descriptor */ +typedef int (*eth_fw_version_get_t)(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); +/**< @internal Get firmware information of an Ethernet device. */ + typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev, uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo); @@ -1183,6 +1224,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1241,39 +1287,11 @@ typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev, uint8_t on); /**< @internal Set all Unicast Hash bitmap */ -typedef int (*eth_set_vf_rx_mode_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint16_t rx_mode, - uint8_t on); -/**< @internal Set a VF receive mode */ - -typedef int (*eth_set_vf_rx_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint8_t on); -/**< @internal Set a VF receive mode */ - -typedef int (*eth_set_vf_tx_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint8_t on); -/**< @internal Enable or disable a VF transmit */ - -typedef int (*eth_set_vf_vlan_filter_t)(struct rte_eth_dev *dev, - uint16_t vlan, - uint64_t vf_mask, - uint8_t vlan_on); -/**< @internal Set VF VLAN pool filter */ - typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t tx_rate); /**< @internal Set queue TX rate */ -typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev, - uint16_t vf, - uint16_t tx_rate, - uint64_t q_msk); -/**< @internal Set VF TX rate */ - typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, @@ -1423,11 +1441,18 @@ struct eth_dev_ops { eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */ eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */ eth_dev_close_t dev_close; /**< Close device. */ + eth_link_update_t link_update; /**< Get device link state. */ + eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */ eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */ eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */ - eth_link_update_t link_update; /**< Get device link state. */ + eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address. */ + eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address. */ + eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address. */ + eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs. */ + mtu_set_t mtu_set; /**< Set MTU. */ + eth_stats_get_t stats_get; /**< Get generic device statistics. */ eth_stats_reset_t stats_reset; /**< Reset generic device statistics. */ eth_xstats_get_t xstats_get; /**< Get extended device statistics. */ @@ -1436,109 +1461,93 @@ struct eth_dev_ops { /**< Get names of extended statistics. */ eth_queue_stats_mapping_set_t queue_stats_mapping_set; /**< Configure per queue stat counter mapping. */ + eth_dev_infos_get_t dev_infos_get; /**< Get device info. */ + eth_rxq_info_get_t rxq_info_get; /**< retrieve RX queue information. */ + eth_txq_info_get_t txq_info_get; /**< retrieve TX queue information. */ + eth_fw_version_get_t fw_version_get; /**< Get firmware version. */ eth_dev_supported_ptypes_get_t dev_supported_ptypes_get; - /**< Get packet types supported and identified by device*/ - mtu_set_t mtu_set; /**< Set MTU. */ - vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ - vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */ + /**< Get packet types supported and identified by device. */ + + vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ + vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */ vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on queue. */ vlan_offload_set_t vlan_offload_set; /**< Set VLAN Offload. */ - vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion */ - eth_queue_start_t rx_queue_start;/**< Start RX for a queue.*/ - eth_queue_stop_t rx_queue_stop;/**< Stop RX for a queue.*/ - eth_queue_start_t tx_queue_start;/**< Start TX for a queue.*/ - eth_queue_stop_t tx_queue_stop;/**< Stop TX for a queue.*/ - eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue.*/ - eth_queue_release_t rx_queue_release;/**< Release RX queue.*/ - eth_rx_queue_count_t rx_queue_count; /**< Get Rx queue count. */ - eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit */ - /**< Enable Rx queue interrupt. */ - eth_rx_enable_intr_t rx_queue_intr_enable; - /**< Disable Rx queue interrupt.*/ - eth_rx_disable_intr_t rx_queue_intr_disable; - eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue.*/ - eth_queue_release_t tx_queue_release;/**< Release TX queue.*/ + vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion. */ + + eth_queue_start_t rx_queue_start;/**< Start RX for a queue. */ + eth_queue_stop_t rx_queue_stop; /**< Stop RX for a queue. */ + eth_queue_start_t tx_queue_start;/**< Start TX for a queue. */ + eth_queue_stop_t tx_queue_stop; /**< Stop TX for a queue. */ + eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue. */ + eth_queue_release_t rx_queue_release; /**< Release RX queue. */ + eth_rx_queue_count_t rx_queue_count;/**< Get Rx queue count. */ + eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit. */ + eth_rx_enable_intr_t rx_queue_intr_enable; /**< Enable Rx queue interrupt. */ + eth_rx_disable_intr_t rx_queue_intr_disable; /**< Disable Rx queue interrupt. */ + eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue. */ + eth_queue_release_t tx_queue_release; /**< Release TX queue. */ + eth_dev_led_on_t dev_led_on; /**< Turn on LED. */ eth_dev_led_off_t dev_led_off; /**< Turn off LED. */ + flow_ctrl_get_t flow_ctrl_get; /**< Get flow control. */ flow_ctrl_set_t flow_ctrl_set; /**< Setup flow control. */ - priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control.*/ - eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address */ - eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address */ - eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address */ - eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array */ - eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap */ - eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule.*/ - eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule.*/ - eth_set_vf_rx_mode_t set_vf_rx_mode; /**< Set VF RX mode */ - eth_set_vf_rx_t set_vf_rx; /**< enable/disable a VF receive */ - eth_set_vf_tx_t set_vf_tx; /**< enable/disable a VF transmit */ - eth_set_vf_vlan_filter_t set_vf_vlan_filter; /**< Set VF VLAN filter */ - /** Add UDP tunnel port. */ - eth_udp_tunnel_port_add_t udp_tunnel_port_add; - /** Del UDP tunnel port. */ - eth_udp_tunnel_port_del_t udp_tunnel_port_del; - eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit */ - eth_set_vf_rate_limit_t set_vf_rate_limit; /**< Set VF rate limit */ - /** Update redirection table. */ - reta_update_t reta_update; - /** Query redirection table. */ - reta_query_t reta_query; - - eth_get_reg_t get_reg; - /**< Get registers */ - eth_get_eeprom_length_t get_eeprom_length; - /**< Get eeprom length */ - eth_get_eeprom_t get_eeprom; - /**< Get eeprom data */ - eth_set_eeprom_t set_eeprom; - /**< Set eeprom */ - /* bypass control */ + priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control. */ + + eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array. */ + eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap. */ + + eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule. */ + eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule. */ + + eth_udp_tunnel_port_add_t udp_tunnel_port_add; /** Add UDP tunnel port. */ + eth_udp_tunnel_port_del_t udp_tunnel_port_del; /** Del UDP tunnel port. */ + eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; + /** Config ether type of l2 tunnel. */ + eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; + /** Enable/disable l2 tunnel offload functions. */ + + eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit. */ + + rss_hash_update_t rss_hash_update; /** Configure RSS hash protocols. */ + rss_hash_conf_get_t rss_hash_conf_get; /** Get current RSS hash configuration. */ + reta_update_t reta_update; /** Update redirection table. */ + reta_query_t reta_query; /** Query redirection table. */ + + eth_get_reg_t get_reg; /**< Get registers. */ + eth_get_eeprom_length_t get_eeprom_length; /**< Get eeprom length. */ + eth_get_eeprom_t get_eeprom; /**< Get eeprom data. */ + eth_set_eeprom_t set_eeprom; /**< Set eeprom. */ + + /* bypass control */ #ifdef RTE_NIC_BYPASS - bypass_init_t bypass_init; - bypass_state_set_t bypass_state_set; - bypass_state_show_t bypass_state_show; - bypass_event_set_t bypass_event_set; - bypass_event_show_t bypass_event_show; - bypass_wd_timeout_set_t bypass_wd_timeout_set; - bypass_wd_timeout_show_t bypass_wd_timeout_show; - bypass_ver_show_t bypass_ver_show; - bypass_wd_reset_t bypass_wd_reset; + bypass_init_t bypass_init; + bypass_state_set_t bypass_state_set; + bypass_state_show_t bypass_state_show; + bypass_event_set_t bypass_event_set; + bypass_event_show_t bypass_event_show; + bypass_wd_timeout_set_t bypass_wd_timeout_set; + bypass_wd_timeout_show_t bypass_wd_timeout_show; + bypass_ver_show_t bypass_ver_show; + bypass_wd_reset_t bypass_wd_reset; #endif - /** Configure RSS hash protocols. */ - rss_hash_update_t rss_hash_update; - /** Get current RSS hash configuration. */ - rss_hash_conf_get_t rss_hash_conf_get; - eth_filter_ctrl_t filter_ctrl; - /**< common filter control. */ - eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs */ - eth_rxq_info_get_t rxq_info_get; - /**< retrieve RX queue information. */ - eth_txq_info_get_t txq_info_get; - /**< retrieve TX queue information. */ + eth_filter_ctrl_t filter_ctrl; /**< common filter control. */ + + eth_get_dcb_info get_dcb_info; /** Get DCB information. */ + + eth_timesync_enable_t timesync_enable; /** Turn IEEE1588/802.1AS timestamping on. */ - eth_timesync_enable_t timesync_enable; + eth_timesync_disable_t timesync_disable; /** Turn IEEE1588/802.1AS timestamping off. */ - eth_timesync_disable_t timesync_disable; - /** Read the IEEE1588/802.1AS RX timestamp. */ eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp; - /** Read the IEEE1588/802.1AS TX timestamp. */ + /** Read the IEEE1588/802.1AS RX timestamp. */ eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp; - - /** Get DCB information */ - eth_get_dcb_info get_dcb_info; - /** Adjust the device clock.*/ - eth_timesync_adjust_time timesync_adjust_time; - /** Get the device clock time. */ - eth_timesync_read_time timesync_read_time; - /** Set the device clock time. */ - eth_timesync_write_time timesync_write_time; - /** Config ether type of l2 tunnel */ - eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; - /** Enable/disable l2 tunnel offload functions */ - eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; + /** Read the IEEE1588/802.1AS TX timestamp. */ + eth_timesync_adjust_time timesync_adjust_time; /** Adjust the device clock. */ + eth_timesync_read_time timesync_read_time; /** Get the device clock time. */ + eth_timesync_write_time timesync_write_time; /** Set the device clock time. */ }; /** @@ -1604,17 +1613,6 @@ struct rte_eth_rxtx_callback { void *param; }; -/** - * The eth device type. - */ -enum rte_eth_dev_type { - RTE_ETH_DEV_UNKNOWN, /**< unknown device type */ - RTE_ETH_DEV_PCI, - /**< Physical function and Virtual function of PCI devices */ - RTE_ETH_DEV_VIRTUAL, /**< non hardware device */ - RTE_ETH_DEV_MAX /**< max value of this enum */ -}; - /** * @internal * The generic data structure associated with each ethernet device. @@ -1628,10 +1626,12 @@ enum rte_eth_dev_type { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ - struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */ + struct rte_device *device; /**< Backing device */ + struct rte_intr_handle *intr_handle; /**< Device interrupt handle */ /** User application callbacks for NIC interrupts */ struct rte_eth_dev_cb_list link_intr_cbs; /** @@ -1645,7 +1645,6 @@ struct rte_eth_dev { */ struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; uint8_t attached; /**< Flag indicating the port is attached */ - enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */ } __rte_cache_aligned; struct rte_eth_dev_sriov { @@ -1693,6 +1692,7 @@ struct rte_eth_dev_data { struct ether_addr* hash_mac_addrs; /** Device Ethernet MAC addresses of hash filtering. */ uint8_t port_id; /**< Device [external] port identifier. */ + __extension__ uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */ scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */ all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */ @@ -1758,8 +1758,7 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name); * @return * - Slot in the rte_dev_devices array for a new device; */ -struct rte_eth_dev *rte_eth_dev_allocate(const char *name, - enum rte_eth_dev_type type); +struct rte_eth_dev *rte_eth_dev_allocate(const char *name); /** * @internal @@ -1778,7 +1777,7 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); * @param devargs * A pointer to a strings array describing the new device * to be attached. The strings should be a pci address like - * '0000:01:00.0' or virtual device name like 'eth_pcap0'. + * '0000:01:00.0' or virtual device name like 'net_pcap0'. * @param port_id * A pointer to a port identifier actually attached. * @return @@ -1872,18 +1871,6 @@ struct eth_driver { unsigned int dev_private_size; /**< Size of device private data. */ }; -/** - * @internal - * A function invoked by the initialization function of an Ethernet driver - * to simultaneously register itself as a PCI driver and as an Ethernet - * Poll Mode Driver (PMD). - * - * @param eth_drv - * The pointer to the *eth_driver* structure associated with - * the Ethernet driver. - */ -void rte_eth_driver_register(struct eth_driver *eth_drv); - /** * Convert a numerical speed in Mbps to a bitmap flag that can be used in * the bitmap link_speeds of the struct rte_eth_conf @@ -1929,6 +1916,19 @@ uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex); int rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_queue, uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf); +/** + * @internal + * Release device queues and clear its configuration to force the user + * application to reconfigure it. It is for internal use only. + * + * @param dev + * Pointer to struct rte_eth_dev. + * + * @return + * void + */ +void _rte_eth_dev_reset(struct rte_eth_dev *dev); + /** * Allocate and set up a receive queue for an Ethernet device. * @@ -2288,18 +2288,19 @@ void rte_eth_stats_reset(uint8_t port_id); * @param port_id * The port identifier of the Ethernet device. * @param xstats_names - * Block of memory to insert names into. Must be at least size in capacity. - * If set to NULL, function returns required capacity. + * An rte_eth_xstat_name array of at least *size* elements to + * be filled. If set to NULL, the function returns the required number + * of elements. * @param size - * Capacity of xstats_names (number of names). + * The size of the xstats_names array (number of elements). * @return - * - positive value lower or equal to size: success. The return value + * - A positive value lower or equal to size: success. The return value * is the number of entries filled in the stats table. - * - positive value higher than size: error, the given statistics table + * - A positive value higher than size: error, the given statistics table * is too small. The return value corresponds to the size that should * be given to succeed. The entries in the table are not valid and * shall not be used by the caller. - * - negative value on error (invalid port id) + * - A negative value on error (invalid port id). */ int rte_eth_xstats_get_names(uint8_t port_id, struct rte_eth_xstat_name *xstats_names, @@ -2312,19 +2313,20 @@ int rte_eth_xstats_get_names(uint8_t port_id, * The port identifier of the Ethernet device. * @param xstats * A pointer to a table of structure of type *rte_eth_xstat* - * to be filled with device statistics ids and values. + * to be filled with device statistics ids and values: id is the + * index of the name string in xstats_names (see rte_eth_xstats_get_names()), + * and value is the statistic counter. * This parameter can be set to NULL if n is 0. * @param n - * The size of the stats table, which should be large enough to store - * all the statistics of the device. + * The size of the xstats array (number of elements). * @return - * - positive value lower or equal to n: success. The return value + * - A positive value lower or equal to n: success. The return value * is the number of entries filled in the stats table. - * - positive value higher than n: error, the given statistics table + * - A positive value higher than n: error, the given statistics table * is too small. The return value corresponds to the size that should * be given to succeed. The entries in the table are not valid and * shall not be used by the caller. - * - negative value on error (invalid port id) + * - A negative value on error (invalid port id). */ int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats, unsigned n); @@ -2400,6 +2402,27 @@ void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr); */ void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info); +/** + * Retrieve the firmware version of a device. + * + * @param port_id + * The port identifier of the device. + * @param fw_version + * A pointer to a string array storing the firmware version of a device, + * the string includes terminating null. This pointer is allocated by caller. + * @param fw_size + * The size of the string array pointed by fw_version, which should be + * large enough to store firmware version of the device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (>0) if *fw_size* is not enough to store firmware version, return + * the size of the non truncated string. + */ +int rte_eth_dev_fw_version_get(uint8_t port_id, + char *fw_version, size_t fw_size); + /** * Retrieve the supported packet types of an Ethernet device. * @@ -2835,6 +2858,115 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); } +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prepare() function is invoked to prepare output packets to be + * transmitted on the output queue *queue_id* of the Ethernet device designated + * by its *port_id*. + * The *nb_pkts* parameter is the number of packets to be prepared which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them + * allocated from a pool created with rte_pktmbuf_pool_create(). + * For each packet to send, the rte_eth_tx_prepare() function performs + * the following operations: + * + * - Check if packet meets devices requirements for tx offloads. + * + * - Check limitations about number of segments. + * + * - Check additional requirements when debug is enabled. + * + * - Update and/or reset required checksums when tx offload is set for packet. + * + * Since this function can modify packet data, provided mbufs must be safely + * writable (e.g. modified data cannot be in shared segment). + * + * The rte_eth_tx_prepare() function returns the number of packets ready to be + * sent. A return value equal to *nb_pkts* means that all packets are valid and + * ready to be sent, otherwise stops processing on the first invalid packet and + * leaves the rest packets untouched. + * + * When this functionality is not implemented in the driver, all packets are + * are returned untouched. + * + * @param port_id + * The port identifier of the Ethernet device. + * The value must be a valid port id. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param tx_pkts + * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures + * which contain the output packets. + * @param nb_pkts + * The maximum number of packets to process. + * @return + * The number of packets correct and ready to be sent. The return value can be + * less than the value of the *tx_pkts* parameter when some packet doesn't + * meet devices requirements with rte_errno set appropriately: + * - -EINVAL: offload flags are not correctly set + * - -ENOTSUP: the offload feature is not supported by the hardware + * + */ + +#ifndef RTE_ETHDEV_TX_PREPARE_NOOP + +static inline uint16_t +rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct rte_eth_dev *dev; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid TX port_id=%d\n", port_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + if (!dev->tx_pkt_prepare) + return nb_pkts; + + return (*dev->tx_pkt_prepare)(dev->data->tx_queues[queue_id], + tx_pkts, nb_pkts); +} + +#else + +/* + * Native NOOP operation for compilation targets which doesn't require any + * preparations steps, and functional NOOP may introduce unnecessary performance + * drop. + * + * Generally this is not a good idea to turn it on globally and didn't should + * be used if behavior of tx_preparation can change. + */ + +static inline uint16_t +rte_eth_tx_prepare(__rte_unused uint8_t port_id, __rte_unused uint16_t queue_id, + __rte_unused struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + return nb_pkts; +} + +#endif + typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count, void *userdata); @@ -3049,6 +3181,8 @@ enum rte_eth_event_type { /**< queue state event (enabled/disabled) */ RTE_ETH_EVENT_INTR_RESET, /**< reset interrupt event, sent to VF on PF reset */ + RTE_ETH_EVENT_VF_MBOX, /**< message from the VF received by PF */ + RTE_ETH_EVENT_MACSEC, /**< MACsec offload related event */ RTE_ETH_EVENT_MAX /**< max value of this enum */ }; @@ -3070,6 +3204,11 @@ typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \ * @param cb_arg * Pointer to the parameters for the registered callback. * + * The user data is overwritten in the case of RTE_ETH_EVENT_VF_MBOX. + * This even occurs when a message from the VF is received by the PF. + * The user data is overwritten with struct rte_pmd_ixgbe_mb_event_param. + * This struct is defined in rte_pmd_ixgbe.h. + * * @return * - On success, zero. * - On failure, a negative value. @@ -3108,12 +3247,16 @@ int rte_eth_dev_callback_unregister(uint8_t port_id, * Pointer to struct rte_eth_dev. * @param event * Eth device interrupt event type. + * @param cb_arg + * Update callback parameter to pass data back to user application. + * This allows the user application to decide if a particular function + * is permitted or not. * * @return * void */ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event); + enum rte_eth_event_type event, void *cb_arg); /** * When there is no rx packet coming in Rx Queue for a long time, we can @@ -3409,93 +3552,6 @@ int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr, */ int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on); - /** - * Set RX L2 Filtering mode of a VF of an Ethernet device. - * - * @param port - * The port identifier of the Ethernet device. - * @param vf - * VF id. - * @param rx_mode - * The RX mode mask, which is one or more of accepting Untagged Packets, - * packets that match the PFUTA table, Broadcast and Multicast Promiscuous. - * ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC, - * ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used - * in rx_mode. - * @param on - * 1 - Enable a VF RX mode. - * 0 - Disable a VF RX mode. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mode, - uint8_t on); - -/** -* Enable or disable a VF traffic transmit of the Ethernet device. -* -* @param port -* The port identifier of the Ethernet device. -* @param vf -* VF id. -* @param on -* 1 - Enable a VF traffic transmit. -* 0 - Disable a VF traffic transmit. -* @return -* - (0) if successful. -* - (-ENODEV) if *port_id* invalid. -* - (-ENOTSUP) if hardware doesn't support. -* - (-EINVAL) if bad parameter. -*/ -int -rte_eth_dev_set_vf_tx(uint8_t port,uint16_t vf, uint8_t on); - -/** -* Enable or disable a VF traffic receive of an Ethernet device. -* -* @param port -* The port identifier of the Ethernet device. -* @param vf -* VF id. -* @param on -* 1 - Enable a VF traffic receive. -* 0 - Disable a VF traffic receive. -* @return -* - (0) if successful. -* - (-ENOTSUP) if hardware doesn't support. -* - (-ENODEV) if *port_id* invalid. -* - (-EINVAL) if bad parameter. -*/ -int -rte_eth_dev_set_vf_rx(uint8_t port,uint16_t vf, uint8_t on); - -/** -* Enable/Disable hardware VF VLAN filtering by an Ethernet device of -* received VLAN packets tagged with a given VLAN Tag Identifier. -* -* @param port id -* The port identifier of the Ethernet device. -* @param vlan_id -* The VLAN Tag Identifier whose filtering must be enabled or disabled. -* @param vf_mask -* Bitmap listing which VFs participate in the VLAN filtering. -* @param vlan_on -* 1 - Enable VFs VLAN filtering. -* 0 - Disable VFs VLAN filtering. -* @return -* - (0) if successful. -* - (-ENOTSUP) if hardware doesn't support. -* - (-ENODEV) if *port_id* invalid. -* - (-EINVAL) if bad parameter. -*/ -int -rte_eth_dev_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id, - uint64_t vf_mask, - uint8_t vlan_on); - /** * Set a traffic mirroring rule on an Ethernet device * @@ -3556,26 +3612,6 @@ int rte_eth_mirror_rule_reset(uint8_t port_id, int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, uint16_t tx_rate); -/** - * Set the rate limitation for a vf on an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param vf - * VF id. - * @param tx_rate - * The tx rate allocated from the total link speed for this VF id. - * @param q_msk - * The queue mask which need to set the rate. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support this feature. - * - (-ENODEV) if *port_id* invalid. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, - uint16_t tx_rate, uint64_t q_msk); - /** * Initialize bypass logic. This function needs to be called before * executing any other bypass API. @@ -4343,7 +4379,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, /** * Get the port id from pci adrress or device name -* Ex: 0000:2:00.0 or vdev name eth_pcap0 +* Ex: 0000:2:00.0 or vdev name net_pcap0 * * @param name * pci address or name of the device @@ -4370,6 +4406,21 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id); int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name); +/** + * @internal + * Wrapper for use by pci drivers as a .probe function to attach to a ethdev + * interface. + */ +int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev); + +/** + * @internal + * Wrapper for use by pci drivers as a .remove function to detach a ethdev + * interface. + */ +int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev); + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_ether/rte_ether.h b/src/dpdk/lib/librte_ether/rte_ether.h deleted file mode 100644 index 1d62d8e5..00000000 --- a/src/dpdk/lib/librte_ether/rte_ether.h +++ /dev/null @@ -1,416 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_ETHER_H_ -#define _RTE_ETHER_H_ - -/** - * @file - * - * Ethernet Helpers in RTE - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -#include -#include -#include -#include - -#define ETHER_ADDR_LEN 6 /**< Length of Ethernet address. */ -#define ETHER_TYPE_LEN 2 /**< Length of Ethernet type field. */ -#define ETHER_CRC_LEN 4 /**< Length of Ethernet CRC. */ -#define ETHER_HDR_LEN \ - (ETHER_ADDR_LEN * 2 + ETHER_TYPE_LEN) /**< Length of Ethernet header. */ -#define ETHER_MIN_LEN 64 /**< Minimum frame len, including CRC. */ -#define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */ -#define ETHER_MTU \ - (ETHER_MAX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN) /**< Ethernet MTU. */ - -#define ETHER_MAX_VLAN_FRAME_LEN \ - (ETHER_MAX_LEN + 4) /**< Maximum VLAN frame length, including CRC. */ - -#define ETHER_MAX_JUMBO_FRAME_LEN \ - 0x3F00 /**< Maximum Jumbo frame length, including CRC. */ - -#define ETHER_MAX_VLAN_ID 4095 /**< Maximum VLAN ID. */ - -#define ETHER_MIN_MTU 68 /**< Minimum MTU for IPv4 packets, see RFC 791. */ - -/** - * Ethernet address: - * A universally administered address is uniquely assigned to a device by its - * manufacturer. The first three octets (in transmission order) contain the - * Organizationally Unique Identifier (OUI). The following three (MAC-48 and - * EUI-48) octets are assigned by that organization with the only constraint - * of uniqueness. - * A locally administered address is assigned to a device by a network - * administrator and does not contain OUIs. - * See http://standards.ieee.org/regauth/groupmac/tutorial.html - */ -struct ether_addr { - uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Address bytes in transmission order */ -} __attribute__((__packed__)); - -#define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */ -#define ETHER_GROUP_ADDR 0x01 /**< Multicast or broadcast Eth. address. */ - -/** - * Check if two Ethernet addresses are the same. - * - * @param ea1 - * A pointer to the first ether_addr structure containing - * the ethernet address. - * @param ea2 - * A pointer to the second ether_addr structure containing - * the ethernet address. - * - * @return - * True (1) if the given two ethernet address are the same; - * False (0) otherwise. - */ -static inline int is_same_ether_addr(const struct ether_addr *ea1, - const struct ether_addr *ea2) -{ - int i; - for (i = 0; i < ETHER_ADDR_LEN; i++) - if (ea1->addr_bytes[i] != ea2->addr_bytes[i]) - return 0; - return 1; -} - -/** - * Check if an Ethernet address is filled with zeros. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is filled with zeros; - * false (0) otherwise. - */ -static inline int is_zero_ether_addr(const struct ether_addr *ea) -{ - int i; - for (i = 0; i < ETHER_ADDR_LEN; i++) - if (ea->addr_bytes[i] != 0x00) - return 0; - return 1; -} - -/** - * Check if an Ethernet address is a unicast address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a unicast address; - * false (0) otherwise. - */ -static inline int is_unicast_ether_addr(const struct ether_addr *ea) -{ - return (ea->addr_bytes[0] & ETHER_GROUP_ADDR) == 0; -} - -/** - * Check if an Ethernet address is a multicast address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a multicast address; - * false (0) otherwise. - */ -static inline int is_multicast_ether_addr(const struct ether_addr *ea) -{ - return ea->addr_bytes[0] & ETHER_GROUP_ADDR; -} - -/** - * Check if an Ethernet address is a broadcast address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a broadcast address; - * false (0) otherwise. - */ -static inline int is_broadcast_ether_addr(const struct ether_addr *ea) -{ - const unaligned_uint16_t *ea_words = (const unaligned_uint16_t *)ea; - - return (ea_words[0] == 0xFFFF && ea_words[1] == 0xFFFF && - ea_words[2] == 0xFFFF); -} - -/** - * Check if an Ethernet address is a universally assigned address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a universally assigned address; - * false (0) otherwise. - */ -static inline int is_universal_ether_addr(const struct ether_addr *ea) -{ - return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) == 0; -} - -/** - * Check if an Ethernet address is a locally assigned address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a locally assigned address; - * false (0) otherwise. - */ -static inline int is_local_admin_ether_addr(const struct ether_addr *ea) -{ - return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) != 0; -} - -/** - * Check if an Ethernet address is a valid address. Checks that the address is a - * unicast address and is not filled with zeros. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is valid; - * false (0) otherwise. - */ -static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea) -{ - return is_unicast_ether_addr(ea) && (! is_zero_ether_addr(ea)); -} - -/** - * Generate a random Ethernet address that is locally administered - * and not multicast. - * @param addr - * A pointer to Ethernet address. - */ -static inline void eth_random_addr(uint8_t *addr) -{ - uint64_t rand = rte_rand(); - uint8_t *p = (uint8_t*)&rand; - - rte_memcpy(addr, p, ETHER_ADDR_LEN); - addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */ - addr[0] |= ETHER_LOCAL_ADMIN_ADDR; /* set local assignment bit */ -} - -/** - * Fast copy an Ethernet address. - * - * @param ea_from - * A pointer to a ether_addr structure holding the Ethernet address to copy. - * @param ea_to - * A pointer to a ether_addr structure where to copy the Ethernet address. - */ -static inline void ether_addr_copy(const struct ether_addr *ea_from, - struct ether_addr *ea_to) -{ -#ifdef __INTEL_COMPILER - uint16_t *from_words = (uint16_t *)(ea_from->addr_bytes); - uint16_t *to_words = (uint16_t *)(ea_to->addr_bytes); - - to_words[0] = from_words[0]; - to_words[1] = from_words[1]; - to_words[2] = from_words[2]; -#else - /* - * Use the common way, because of a strange gcc warning. - */ - *ea_to = *ea_from; -#endif -} - -#define ETHER_ADDR_FMT_SIZE 18 -/** - * Format 48bits Ethernet address in pattern xx:xx:xx:xx:xx:xx. - * - * @param buf - * A pointer to buffer contains the formatted MAC address. - * @param size - * The format buffer size. - * @param eth_addr - * A pointer to a ether_addr structure. - */ -static inline void -ether_format_addr(char *buf, uint16_t size, - const struct ether_addr *eth_addr) -{ - snprintf(buf, size, "%02X:%02X:%02X:%02X:%02X:%02X", - eth_addr->addr_bytes[0], - eth_addr->addr_bytes[1], - eth_addr->addr_bytes[2], - eth_addr->addr_bytes[3], - eth_addr->addr_bytes[4], - eth_addr->addr_bytes[5]); -} - -/** - * Ethernet header: Contains the destination address, source address - * and frame type. - */ -struct ether_hdr { - struct ether_addr d_addr; /**< Destination address. */ - struct ether_addr s_addr; /**< Source address. */ - uint16_t ether_type; /**< Frame type. */ -} __attribute__((__packed__)); - -/** - * Ethernet VLAN Header. - * Contains the 16-bit VLAN Tag Control Identifier and the Ethernet type - * of the encapsulated frame. - */ -struct vlan_hdr { - uint16_t vlan_tci; /**< Priority (3) + CFI (1) + Identifier Code (12) */ - uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */ -} __attribute__((__packed__)); - -/** - * VXLAN protocol header. - * Contains the 8-bit flag, 24-bit VXLAN Network Identifier and - * Reserved fields (24 bits and 8 bits) - */ -struct vxlan_hdr { - uint32_t vx_flags; /**< flag (8) + Reserved (24). */ - uint32_t vx_vni; /**< VNI (24) + Reserved (8). */ -} __attribute__((__packed__)); - -/* Ethernet frame types */ -#define ETHER_TYPE_IPv4 0x0800 /**< IPv4 Protocol. */ -#define ETHER_TYPE_IPv6 0x86DD /**< IPv6 Protocol. */ -#define ETHER_TYPE_ARP 0x0806 /**< Arp Protocol. */ -#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ -#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ -#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ -#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ -#define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */ - -#define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)) -/**< VXLAN tunnel header length. */ - -/** - * Extract VLAN tag information into mbuf - * - * Software version of VLAN stripping - * - * @param m - * The packet mbuf. - * @return - * - 0: Success - * - 1: not a vlan packet - */ -static inline int rte_vlan_strip(struct rte_mbuf *m) -{ - struct ether_hdr *eh - = rte_pktmbuf_mtod(m, struct ether_hdr *); - - if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) - return -1; - - struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); - m->ol_flags |= PKT_RX_VLAN_PKT; - m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); - - /* Copy ether header over rather than moving whole packet */ - memmove(rte_pktmbuf_adj(m, sizeof(struct vlan_hdr)), - eh, 2 * ETHER_ADDR_LEN); - - return 0; -} - -/** - * Insert VLAN tag into mbuf. - * - * Software version of VLAN unstripping - * - * @param m - * The packet mbuf. - * @return - * - 0: On success - * -EPERM: mbuf is is shared overwriting would be unsafe - * -ENOSPC: not enough headroom in mbuf - */ -static inline int rte_vlan_insert(struct rte_mbuf **m) -{ - struct ether_hdr *oh, *nh; - struct vlan_hdr *vh; - - /* Can't insert header if mbuf is shared */ - if (rte_mbuf_refcnt_read(*m) > 1) { - struct rte_mbuf *copy; - - copy = rte_pktmbuf_clone(*m, (*m)->pool); - if (unlikely(copy == NULL)) - return -ENOMEM; - rte_pktmbuf_free(*m); - *m = copy; - } - - oh = rte_pktmbuf_mtod(*m, struct ether_hdr *); - nh = (struct ether_hdr *) - rte_pktmbuf_prepend(*m, sizeof(struct vlan_hdr)); - if (nh == NULL) - return -ENOSPC; - - memmove(nh, oh, 2 * ETHER_ADDR_LEN); - nh->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN); - - vh = (struct vlan_hdr *) (nh + 1); - vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci); - - return 0; -} - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_ETHER_H_ */ diff --git a/src/dpdk/lib/librte_ether/rte_flow.c b/src/dpdk/lib/librte_ether/rte_flow.c new file mode 100644 index 00000000..aaa70d68 --- /dev/null +++ b/src/dpdk/lib/librte_ether/rte_flow.c @@ -0,0 +1,159 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include "rte_ethdev.h" +#include "rte_flow_driver.h" +#include "rte_flow.h" + +/* Get generic flow operations structure from a port. */ +const struct rte_flow_ops * +rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops; + int code; + + if (unlikely(!rte_eth_dev_is_valid_port(port_id))) + code = ENODEV; + else if (unlikely(!dev->dev_ops->filter_ctrl || + dev->dev_ops->filter_ctrl(dev, + RTE_ETH_FILTER_GENERIC, + RTE_ETH_FILTER_GET, + &ops) || + !ops)) + code = ENOSYS; + else + return ops; + rte_flow_error_set(error, code, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(code)); + return NULL; +} + +/* Check whether a flow rule can be created on a given port. */ +int +rte_flow_validate(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->validate)) + return ops->validate(dev, attr, pattern, actions, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Create a flow rule on a given port. */ +struct rte_flow * +rte_flow_create(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return NULL; + if (likely(!!ops->create)) + return ops->create(dev, attr, pattern, actions, error); + rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); + return NULL; +} + +/* Destroy a flow rule on a given port. */ +int +rte_flow_destroy(uint8_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->destroy)) + return ops->destroy(dev, flow, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Destroy all flow rules associated with a port. */ +int +rte_flow_flush(uint8_t port_id, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->flush)) + return ops->flush(dev, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Query an existing flow rule. */ +int +rte_flow_query(uint8_t port_id, + struct rte_flow *flow, + enum rte_flow_action_type action, + void *data, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (!ops) + return -rte_errno; + if (likely(!!ops->query)) + return ops->query(dev, flow, action, data, error); + return -rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} diff --git a/src/dpdk/lib/librte_ether/rte_flow.h b/src/dpdk/lib/librte_ether/rte_flow.h new file mode 100644 index 00000000..171a5698 --- /dev/null +++ b/src/dpdk/lib/librte_ether/rte_flow.h @@ -0,0 +1,1090 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_FLOW_H_ +#define RTE_FLOW_H_ + +/** + * @file + * RTE generic flow API + * + * This interface provides the ability to program packet matching and + * associated actions in hardware through flow rules. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Flow rule attributes. + * + * Priorities are set on two levels: per group and per rule within groups. + * + * Lower values denote higher priority, the highest priority for both levels + * is 0, so that a rule with priority 0 in group 8 is always matched after a + * rule with priority 8 in group 0. + * + * Although optional, applications are encouraged to group similar rules as + * much as possible to fully take advantage of hardware capabilities + * (e.g. optimized matching) and work around limitations (e.g. a single + * pattern type possibly allowed in a given group). + * + * Group and priority levels are arbitrary and up to the application, they + * do not need to be contiguous nor start from 0, however the maximum number + * varies between devices and may be affected by existing flow rules. + * + * If a packet is matched by several rules of a given group for a given + * priority level, the outcome is undefined. It can take any path, may be + * duplicated or even cause unrecoverable errors. + * + * Note that support for more than a single group and priority level is not + * guaranteed. + * + * Flow rules can apply to inbound and/or outbound traffic (ingress/egress). + * + * Several pattern items and actions are valid and can be used in both + * directions. Those valid for only one direction are described as such. + * + * At least one direction must be specified. + * + * Specifying both directions at once for a given rule is not recommended + * but may be valid in a few cases (e.g. shared counter). + */ +struct rte_flow_attr { + uint32_t group; /**< Priority group. */ + uint32_t priority; /**< Priority level within group. */ + uint32_t ingress:1; /**< Rule applies to ingress traffic. */ + uint32_t egress:1; /**< Rule applies to egress traffic. */ + uint32_t reserved:30; /**< Reserved, must be zero. */ +}; + +/** + * Matching pattern item types. + * + * Pattern items fall in two categories: + * + * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4, + * IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a + * specification structure. These must be stacked in the same order as the + * protocol layers to match, starting from the lowest. + * + * - Matching meta-data or affecting pattern processing (END, VOID, INVERT, + * PF, VF, PORT and so on), often without a specification structure. Since + * they do not match packet contents, these can be specified anywhere + * within item lists without affecting others. + * + * See the description of individual types for more information. Those + * marked with [META] fall into the second category. + */ +enum rte_flow_item_type { + /** + * [META] + * + * End marker for item lists. Prevents further processing of items, + * thereby ending the pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_END, + + /** + * [META] + * + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_VOID, + + /** + * [META] + * + * Inverted matching, i.e. process packets that do not match the + * pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_INVERT, + + /** + * Matches any protocol in place of the current layer, a single ANY + * may also stand for several protocol layers. + * + * See struct rte_flow_item_any. + */ + RTE_FLOW_ITEM_TYPE_ANY, + + /** + * [META] + * + * Matches packets addressed to the physical function of the device. + * + * If the underlying device function differs from the one that would + * normally receive the matched traffic, specifying this item + * prevents it from reaching that device unless the flow rule + * contains a PF action. Packets are not duplicated between device + * instances by default. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_PF, + + /** + * [META] + * + * Matches packets addressed to a virtual function ID of the device. + * + * If the underlying device function differs from the one that would + * normally receive the matched traffic, specifying this item + * prevents it from reaching that device unless the flow rule + * contains a VF action. Packets are not duplicated between device + * instances by default. + * + * See struct rte_flow_item_vf. + */ + RTE_FLOW_ITEM_TYPE_VF, + + /** + * [META] + * + * Matches packets coming from the specified physical port of the + * underlying device. + * + * The first PORT item overrides the physical port normally + * associated with the specified DPDK input port (port_id). This + * item can be provided several times to match additional physical + * ports. + * + * See struct rte_flow_item_port. + */ + RTE_FLOW_ITEM_TYPE_PORT, + + /** + * Matches a byte string of a given length at a given offset. + * + * See struct rte_flow_item_raw. + */ + RTE_FLOW_ITEM_TYPE_RAW, + + /** + * Matches an Ethernet header. + * + * See struct rte_flow_item_eth. + */ + RTE_FLOW_ITEM_TYPE_ETH, + + /** + * Matches an 802.1Q/ad VLAN tag. + * + * See struct rte_flow_item_vlan. + */ + RTE_FLOW_ITEM_TYPE_VLAN, + + /** + * Matches an IPv4 header. + * + * See struct rte_flow_item_ipv4. + */ + RTE_FLOW_ITEM_TYPE_IPV4, + + /** + * Matches an IPv6 header. + * + * See struct rte_flow_item_ipv6. + */ + RTE_FLOW_ITEM_TYPE_IPV6, + + /** + * Matches an ICMP header. + * + * See struct rte_flow_item_icmp. + */ + RTE_FLOW_ITEM_TYPE_ICMP, + + /** + * Matches a UDP header. + * + * See struct rte_flow_item_udp. + */ + RTE_FLOW_ITEM_TYPE_UDP, + + /** + * Matches a TCP header. + * + * See struct rte_flow_item_tcp. + */ + RTE_FLOW_ITEM_TYPE_TCP, + + /** + * Matches a SCTP header. + * + * See struct rte_flow_item_sctp. + */ + RTE_FLOW_ITEM_TYPE_SCTP, + + /** + * Matches a VXLAN header. + * + * See struct rte_flow_item_vxlan. + */ + RTE_FLOW_ITEM_TYPE_VXLAN, + + /** + * Matches a E_TAG header. + * + * See struct rte_flow_item_e_tag. + */ + RTE_FLOW_ITEM_TYPE_E_TAG, + + /** + * Matches a NVGRE header. + * + * See struct rte_flow_item_nvgre. + */ + RTE_FLOW_ITEM_TYPE_NVGRE, +}; + +/** + * RTE_FLOW_ITEM_TYPE_ANY + * + * Matches any protocol in place of the current layer, a single ANY may also + * stand for several protocol layers. + * + * This is usually specified as the first pattern item when looking for a + * protocol anywhere in a packet. + * + * A zeroed mask stands for any number of layers. + */ +struct rte_flow_item_any { + uint32_t num; /**< Number of layers covered. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ANY. */ +static const struct rte_flow_item_any rte_flow_item_any_mask = { + .num = 0x00000000, +}; + +/** + * RTE_FLOW_ITEM_TYPE_VF + * + * Matches packets addressed to a virtual function ID of the device. + * + * If the underlying device function differs from the one that would + * normally receive the matched traffic, specifying this item prevents it + * from reaching that device unless the flow rule contains a VF + * action. Packets are not duplicated between device instances by default. + * + * - Likely to return an error or never match any traffic if this causes a + * VF device to match traffic addressed to a different VF. + * - Can be specified multiple times to match traffic addressed to several + * VF IDs. + * - Can be combined with a PF item to match both PF and VF traffic. + * + * A zeroed mask can be used to match any VF ID. + */ +struct rte_flow_item_vf { + uint32_t id; /**< Destination VF ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VF. */ +static const struct rte_flow_item_vf rte_flow_item_vf_mask = { + .id = 0x00000000, +}; + +/** + * RTE_FLOW_ITEM_TYPE_PORT + * + * Matches packets coming from the specified physical port of the underlying + * device. + * + * The first PORT item overrides the physical port normally associated with + * the specified DPDK input port (port_id). This item can be provided + * several times to match additional physical ports. + * + * Note that physical ports are not necessarily tied to DPDK input ports + * (port_id) when those are not under DPDK control. Possible values are + * specific to each device, they are not necessarily indexed from zero and + * may not be contiguous. + * + * As a device property, the list of allowed values as well as the value + * associated with a port_id should be retrieved by other means. + * + * A zeroed mask can be used to match any port index. + */ +struct rte_flow_item_port { + uint32_t index; /**< Physical port index. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_PORT. */ +static const struct rte_flow_item_port rte_flow_item_port_mask = { + .index = 0x00000000, +}; + +/** + * RTE_FLOW_ITEM_TYPE_RAW + * + * Matches a byte string of a given length at a given offset. + * + * Offset is either absolute (using the start of the packet) or relative to + * the end of the previous matched item in the stack, in which case negative + * values are allowed. + * + * If search is enabled, offset is used as the starting point. The search + * area can be delimited by setting limit to a nonzero value, which is the + * maximum number of bytes after offset where the pattern may start. + * + * Matching a zero-length pattern is allowed, doing so resets the relative + * offset for subsequent items. + * + * This type does not support ranges (struct rte_flow_item.last). + */ +struct rte_flow_item_raw { + uint32_t relative:1; /**< Look for pattern after the previous item. */ + uint32_t search:1; /**< Search pattern from offset (see also limit). */ + uint32_t reserved:30; /**< Reserved, must be set to zero. */ + int32_t offset; /**< Absolute or relative offset for pattern. */ + uint16_t limit; /**< Search area limit for start of pattern. */ + uint16_t length; /**< Pattern length. */ + uint8_t pattern[]; /**< Byte string to look for. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_RAW. */ +static const struct rte_flow_item_raw rte_flow_item_raw_mask = { + .relative = 1, + .search = 1, + .reserved = 0x3fffffff, + .offset = 0xffffffff, + .limit = 0xffff, + .length = 0xffff, +}; + +/** + * RTE_FLOW_ITEM_TYPE_ETH + * + * Matches an Ethernet header. + */ +struct rte_flow_item_eth { + struct ether_addr dst; /**< Destination MAC. */ + struct ether_addr src; /**< Source MAC. */ + uint16_t type; /**< EtherType. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */ +static const struct rte_flow_item_eth rte_flow_item_eth_mask = { + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .type = 0x0000, +}; + +/** + * RTE_FLOW_ITEM_TYPE_VLAN + * + * Matches an 802.1Q/ad VLAN tag. + * + * This type normally follows either RTE_FLOW_ITEM_TYPE_ETH or + * RTE_FLOW_ITEM_TYPE_VLAN. + */ +struct rte_flow_item_vlan { + uint16_t tpid; /**< Tag protocol identifier. */ + uint16_t tci; /**< Tag control information. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */ +static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = { + .tpid = 0x0000, + .tci = 0xffff, +}; + +/** + * RTE_FLOW_ITEM_TYPE_IPV4 + * + * Matches an IPv4 header. + * + * Note: IPv4 options are handled by dedicated pattern items. + */ +struct rte_flow_item_ipv4 { + struct ipv4_hdr hdr; /**< IPv4 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV4. */ +static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = { + .hdr = { + .src_addr = 0xffffffff, + .dst_addr = 0xffffffff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_IPV6. + * + * Matches an IPv6 header. + * + * Note: IPv6 options are handled by dedicated pattern items. + */ +struct rte_flow_item_ipv6 { + struct ipv6_hdr hdr; /**< IPv6 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6. */ +static const struct rte_flow_item_ipv6 rte_flow_item_ipv6_mask = { + .hdr = { + .src_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + .dst_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_ICMP. + * + * Matches an ICMP header. + */ +struct rte_flow_item_icmp { + struct icmp_hdr hdr; /**< ICMP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP. */ +static const struct rte_flow_item_icmp rte_flow_item_icmp_mask = { + .hdr = { + .icmp_type = 0xff, + .icmp_code = 0xff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_UDP. + * + * Matches a UDP header. + */ +struct rte_flow_item_udp { + struct udp_hdr hdr; /**< UDP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_UDP. */ +static const struct rte_flow_item_udp rte_flow_item_udp_mask = { + .hdr = { + .src_port = 0xffff, + .dst_port = 0xffff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_TCP. + * + * Matches a TCP header. + */ +struct rte_flow_item_tcp { + struct tcp_hdr hdr; /**< TCP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_TCP. */ +static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = { + .hdr = { + .src_port = 0xffff, + .dst_port = 0xffff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_SCTP. + * + * Matches a SCTP header. + */ +struct rte_flow_item_sctp { + struct sctp_hdr hdr; /**< SCTP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_SCTP. */ +static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = { + .hdr = { + .src_port = 0xffff, + .dst_port = 0xffff, + }, +}; + +/** + * RTE_FLOW_ITEM_TYPE_VXLAN. + * + * Matches a VXLAN header (RFC 7348). + */ +struct rte_flow_item_vxlan { + uint8_t flags; /**< Normally 0x08 (I flag). */ + uint8_t rsvd0[3]; /**< Reserved, normally 0x000000. */ + uint8_t vni[3]; /**< VXLAN identifier. */ + uint8_t rsvd1; /**< Reserved, normally 0x00. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN. */ +static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = { + .vni = "\xff\xff\xff", +}; + +/** + * RTE_FLOW_ITEM_TYPE_E_TAG. + * + * Matches a E-tag header. + */ +struct rte_flow_item_e_tag { + uint16_t tpid; /**< Tag protocol identifier (0x893F). */ + /** + * E-Tag control information (E-TCI). + * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b). + */ + uint16_t epcp_edei_in_ecid_b; + /** Reserved (2b), GRP (2b), E-CID base (12b). */ + uint16_t rsvd_grp_ecid_b; + uint8_t in_ecid_e; /**< Ingress E-CID ext. */ + uint8_t ecid_e; /**< E-CID ext. */ +}; + +/** + * RTE_FLOW_ITEM_TYPE_NVGRE. + * + * Matches a NVGRE header. + */ +struct rte_flow_item_nvgre { + /** + * Checksum (1b), undefined (1b), key bit (1b), sequence number (1b), + * reserved 0 (9b), version (3b). + * + * c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637. + */ + uint16_t c_k_s_rsvd0_ver; + uint16_t protocol; /**< Protocol type (0x6558). */ + uint8_t tni[3]; /**< Virtual subnet ID. */ + uint8_t flow_id; /**< Flow ID. */ +}; + +/** + * Matching pattern item definition. + * + * A pattern is formed by stacking items starting from the lowest protocol + * layer to match. This stacking restriction does not apply to meta items + * which can be placed anywhere in the stack without affecting the meaning + * of the resulting pattern. + * + * Patterns are terminated by END items. + * + * The spec field should be a valid pointer to a structure of the related + * item type. It may remain unspecified (NULL) in many cases to request + * broad (nonspecific) matching. In such cases, last and mask must also be + * set to NULL. + * + * Optionally, last can point to a structure of the same type to define an + * inclusive range. This is mostly supported by integer and address fields, + * may cause errors otherwise. Fields that do not support ranges must be set + * to 0 or to the same value as the corresponding fields in spec. + * + * Only the fields defined to nonzero values in the default masks (see + * rte_flow_item_{name}_mask constants) are considered relevant by + * default. This can be overridden by providing a mask structure of the + * same type with applicable bits set to one. It can also be used to + * partially filter out specific fields (e.g. as an alternate mean to match + * ranges of IP addresses). + * + * Mask is a simple bit-mask applied before interpreting the contents of + * spec and last, which may yield unexpected results if not used + * carefully. For example, if for an IPv4 address field, spec provides + * 10.1.2.3, last provides 10.3.4.5 and mask provides 255.255.0.0, the + * effective range becomes 10.1.0.0 to 10.3.255.255. + */ +struct rte_flow_item { + enum rte_flow_item_type type; /**< Item type. */ + const void *spec; /**< Pointer to item specification structure. */ + const void *last; /**< Defines an inclusive range (spec to last). */ + const void *mask; /**< Bit-mask applied to spec and last. */ +}; + +/** + * Action types. + * + * Each possible action is represented by a type. Some have associated + * configuration structures. Several actions combined in a list can be + * affected to a flow rule. That list is not ordered. + * + * They fall in three categories: + * + * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent + * processing matched packets by subsequent flow rules, unless overridden + * with PASSTHRU. + * + * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up + * for additional processing by subsequent flow rules. + * + * - Other non terminating meta actions that do not affect the fate of + * packets (END, VOID, MARK, FLAG, COUNT). + * + * When several actions are combined in a flow rule, they should all have + * different types (e.g. dropping a packet twice is not possible). + * + * Only the last action of a given type is taken into account. PMDs still + * perform error checking on the entire list. + * + * Note that PASSTHRU is the only action able to override a terminating + * rule. + */ +enum rte_flow_action_type { + /** + * [META] + * + * End marker for action lists. Prevents further processing of + * actions, thereby ending the list. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_END, + + /** + * [META] + * + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_VOID, + + /** + * Leaves packets up for additional processing by subsequent flow + * rules. This is the default when a rule does not contain a + * terminating action, but can be specified to force a rule to + * become non-terminating. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PASSTHRU, + + /** + * [META] + * + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * See struct rte_flow_action_mark. + */ + RTE_FLOW_ACTION_TYPE_MARK, + + /** + * [META] + * + * Flags packets. Similar to MARK without a specific value; only + * sets the PKT_RX_FDIR mbuf flag. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_FLAG, + + /** + * Assigns packets to a given queue index. + * + * See struct rte_flow_action_queue. + */ + RTE_FLOW_ACTION_TYPE_QUEUE, + + /** + * Drops packets. + * + * PASSTHRU overrides this action if both are specified. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_DROP, + + /** + * [META] + * + * Enables counters for this rule. + * + * These counters can be retrieved and reset through rte_flow_query(), + * see struct rte_flow_query_count. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_COUNT, + + /** + * Duplicates packets to a given queue index. + * + * This is normally combined with QUEUE, however when used alone, it + * is actually similar to QUEUE + PASSTHRU. + * + * See struct rte_flow_action_dup. + */ + RTE_FLOW_ACTION_TYPE_DUP, + + /** + * Similar to QUEUE, except RSS is additionally performed on packets + * to spread them among several queues according to the provided + * parameters. + * + * See struct rte_flow_action_rss. + */ + RTE_FLOW_ACTION_TYPE_RSS, + + /** + * Redirects packets to the physical function (PF) of the current + * device. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PF, + + /** + * Redirects packets to the virtual function (VF) of the current + * device with the specified ID. + * + * See struct rte_flow_action_vf. + */ + RTE_FLOW_ACTION_TYPE_VF, +}; + +/** + * RTE_FLOW_ACTION_TYPE_MARK + * + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * This value is arbitrary and application-defined. Maximum allowed value + * depends on the underlying implementation. It is returned in the + * hash.fdir.hi mbuf field. + */ +struct rte_flow_action_mark { + uint32_t id; /**< Integer value to return with packets. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_QUEUE + * + * Assign packets to a given queue index. + * + * Terminating by default. + */ +struct rte_flow_action_queue { + uint16_t index; /**< Queue index to use. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_COUNT (query) + * + * Query structure to retrieve and reset flow rule counters. + */ +struct rte_flow_query_count { + uint32_t reset:1; /**< Reset counters after query [in]. */ + uint32_t hits_set:1; /**< hits field is set [out]. */ + uint32_t bytes_set:1; /**< bytes field is set [out]. */ + uint32_t reserved:29; /**< Reserved, must be zero [in, out]. */ + uint64_t hits; /**< Number of hits for this rule [out]. */ + uint64_t bytes; /**< Number of bytes through this rule [out]. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_DUP + * + * Duplicates packets to a given queue index. + * + * This is normally combined with QUEUE, however when used alone, it is + * actually similar to QUEUE + PASSTHRU. + * + * Non-terminating by default. + */ +struct rte_flow_action_dup { + uint16_t index; /**< Queue index to duplicate packets to. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_RSS + * + * Similar to QUEUE, except RSS is additionally performed on packets to + * spread them among several queues according to the provided parameters. + * + * Note: RSS hash result is stored in the hash.rss mbuf field which overlaps + * hash.fdir.lo. Since the MARK action sets the hash.fdir.hi field only, + * both can be requested simultaneously. + * + * Terminating by default. + */ +struct rte_flow_action_rss { + const struct rte_eth_rss_conf *rss_conf; /**< RSS parameters. */ + uint16_t num; /**< Number of entries in queue[]. */ + uint16_t queue[]; /**< Queues indices to use. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_VF + * + * Redirects packets to a virtual function (VF) of the current device. + * + * Packets matched by a VF pattern item can be redirected to their original + * VF ID instead of the specified one. This parameter may not be available + * and is not guaranteed to work properly if the VF part is matched by a + * prior flow rule or if packets are not addressed to a VF in the first + * place. + * + * Terminating by default. + */ +struct rte_flow_action_vf { + uint32_t original:1; /**< Use original VF ID if possible. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t id; /**< VF ID to redirect packets to. */ +}; + +/** + * Definition of a single action. + * + * A list of actions is terminated by a END action. + * + * For simple actions without a configuration structure, conf remains NULL. + */ +struct rte_flow_action { + enum rte_flow_action_type type; /**< Action type. */ + const void *conf; /**< Pointer to action configuration structure. */ +}; + +/** + * Opaque type returned after successfully creating a flow. + * + * This handle can be used to manage and query the related flow (e.g. to + * destroy it or retrieve counters). + */ +struct rte_flow; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_flow_error.cause. + */ +enum rte_flow_error_type { + RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */ + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */ + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */ + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */ + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */ + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */ + RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */ + RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */ + RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */ + RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */ + RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */ +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_flow_error { + enum rte_flow_error_type type; /**< Cause field and error types. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * Check whether a flow rule can be created on a given port. + * + * While this function has no effect on the target device, the flow rule is + * validated against its current configuration state and the returned value + * should be considered valid by the caller for that state only. + * + * The returned value is guaranteed to remain valid only as long as no + * successful calls to rte_flow_create() or rte_flow_destroy() are made in + * the meantime and no device parameter affecting flow rules in any way are + * modified, due to possible collisions or resource limitations (although in + * such cases EINVAL should not be returned). + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 if flow rule is valid and can be created. A negative errno value + * otherwise (rte_errno is also set), the following errors are defined: + * + * -ENOSYS: underlying device does not support this functionality. + * + * -EINVAL: unknown or invalid rule specification. + * + * -ENOTSUP: valid but unsupported rule specification (e.g. partial + * bit-masks are unsupported). + * + * -EEXIST: collision with an existing rule. + * + * -ENOMEM: not enough resources. + * + * -EBUSY: action cannot be performed due to busy device resources, may + * succeed if the affected queues or even the entire port are in a stopped + * state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()). + */ +int +rte_flow_validate(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Create a flow rule on a given port. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * A valid handle in case of success, NULL otherwise and rte_errno is set + * to the positive version of one of the error codes defined for + * rte_flow_validate(). + */ +struct rte_flow * +rte_flow_create(uint8_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Destroy a flow rule on a given port. + * + * Failure to destroy a flow rule handle may occur when other flow rules + * depend on it, and destroying it would result in an inconsistent state. + * + * This function is only guaranteed to succeed if handles are destroyed in + * reverse order of their creation. + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to destroy. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_destroy(uint8_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error); + +/** + * Destroy all flow rules associated with a port. + * + * In the unlikely event of failure, handles are still considered destroyed + * and no longer valid but the port must be assumed to be in an inconsistent + * state. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_flush(uint8_t port_id, + struct rte_flow_error *error); + +/** + * Query an existing flow rule. + * + * This function allows retrieving flow-specific data such as counters. + * Data is gathered by special actions which must be present in the flow + * rule definition. + * + * \see RTE_FLOW_ACTION_TYPE_COUNT + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to query. + * @param action + * Action type to query. + * @param[in, out] data + * Pointer to storage for the associated query data type. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_query(uint8_t port_id, + struct rte_flow *flow, + enum rte_flow_action_type action, + void *data, + struct rte_flow_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_H_ */ diff --git a/src/dpdk/lib/librte_ether/rte_flow_driver.h b/src/dpdk/lib/librte_ether/rte_flow_driver.h new file mode 100644 index 00000000..da5749d5 --- /dev/null +++ b/src/dpdk/lib/librte_ether/rte_flow_driver.h @@ -0,0 +1,182 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_FLOW_DRIVER_H_ +#define RTE_FLOW_DRIVER_H_ + +/** + * @file + * RTE generic flow API (driver side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include + +#include +#include "rte_ethdev.h" +#include "rte_flow.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Generic flow operations structure implemented and returned by PMDs. + * + * To implement this API, PMDs must handle the RTE_ETH_FILTER_GENERIC filter + * type in their .filter_ctrl callback function (struct eth_dev_ops) as well + * as the RTE_ETH_FILTER_GET filter operation. + * + * If successful, this operation must result in a pointer to a PMD-specific + * struct rte_flow_ops written to the argument address as described below: + * + * \code + * + * // PMD filter_ctrl callback + * + * static const struct rte_flow_ops pmd_flow_ops = { ... }; + * + * switch (filter_type) { + * case RTE_ETH_FILTER_GENERIC: + * if (filter_op != RTE_ETH_FILTER_GET) + * return -EINVAL; + * *(const void **)arg = &pmd_flow_ops; + * return 0; + * } + * + * \endcode + * + * See also rte_flow_ops_get(). + * + * These callback functions are not supposed to be used by applications + * directly, which must rely on the API defined in rte_flow.h. + * + * Public-facing wrapper functions perform a few consistency checks so that + * unimplemented (i.e. NULL) callbacks simply return -ENOTSUP. These + * callbacks otherwise only differ by their first argument (with port ID + * already resolved to a pointer to struct rte_eth_dev). + */ +struct rte_flow_ops { + /** See rte_flow_validate(). */ + int (*validate) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_create(). */ + struct rte_flow *(*create) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_destroy(). */ + int (*destroy) + (struct rte_eth_dev *, + struct rte_flow *, + struct rte_flow_error *); + /** See rte_flow_flush(). */ + int (*flush) + (struct rte_eth_dev *, + struct rte_flow_error *); + /** See rte_flow_query(). */ + int (*query) + (struct rte_eth_dev *, + struct rte_flow *, + enum rte_flow_action_type, + void *, + struct rte_flow_error *); +}; + +/** + * Initialize generic flow error structure. + * + * This function also sets rte_errno to a given value. + * + * @param[out] error + * Pointer to flow error structure (may be NULL). + * @param code + * Related error code (rte_errno). + * @param type + * Cause field and error types. + * @param cause + * Object responsible for the error. + * @param message + * Human-readable error message. + * + * @return + * Error code. + */ +static inline int +rte_flow_error_set(struct rte_flow_error *error, + int code, + enum rte_flow_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_flow_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return code; +} + +/** + * Get generic flow operations structure from a port. + * + * @param port_id + * Port identifier to query. + * @param[out] error + * Pointer to flow error structure. + * + * @return + * The flow operations structure associated with port_id, NULL in case of + * error, in which case rte_errno is set and the error structure contains + * additional details. + */ +const struct rte_flow_ops * +rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_DRIVER_H_ */ diff --git a/src/dpdk/lib/librte_hash/rte_cuckoo_hash.c b/src/dpdk/lib/librte_hash/rte_cuckoo_hash.c index 26e54f68..51db006a 100644 --- a/src/dpdk/lib/librte_hash/rte_cuckoo_hash.c +++ b/src/dpdk/lib/librte_hash/rte_cuckoo_hash.c @@ -98,6 +98,7 @@ rte_hash_find_existing(const char *name) void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) { + h->cmp_jump_table_idx = KEY_CUSTOM; h->rte_hash_custom_cmp_eq = func; } @@ -159,7 +160,8 @@ rte_hash_create(const struct rte_hash_parameters *params) num_key_slots = params->entries + 1; snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name); - r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots), + /* Create ring (Dummy slot index is not enqueued) */ + r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1), params->socket_id, 0); if (r == NULL) { RTE_LOG(ERR, HASH, "memory allocation failed\n"); @@ -282,6 +284,15 @@ rte_hash_create(const struct rte_hash_parameters *params) h->free_slots = r; h->hw_trans_mem_support = hw_trans_mem_support; +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2; + else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_SSE; + else +#endif + h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR; + /* Turn on multi-writer only with explicit flat from user and TM * support. */ @@ -408,6 +419,7 @@ rte_hash_reset(struct rte_hash *h) static inline int make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) { + static unsigned int nr_pushes; unsigned i, j; int ret; uint32_t next_bucket_idx; @@ -419,10 +431,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Search for space in alternative locations */ - next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask; + next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask; next_bkt[i] = &h->buckets[next_bucket_idx]; for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) { - if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE) + if (next_bkt[i]->key_idx[j] == EMPTY_SLOT) break; } @@ -432,8 +444,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) /* Alternative location has spare room (end of recursive function) */ if (i != RTE_HASH_BUCKET_ENTRIES) { - next_bkt[i]->signatures[j].alt = bkt->signatures[i].current; - next_bkt[i]->signatures[j].current = bkt->signatures[i].alt; + next_bkt[i]->sig_alt[j] = bkt->sig_current[i]; + next_bkt[i]->sig_current[j] = bkt->sig_alt[i]; next_bkt[i]->key_idx[j] = bkt->key_idx[i]; return i; } @@ -444,11 +456,13 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) break; /* All entries have been pushed, so entry cannot be added */ - if (i == RTE_HASH_BUCKET_ENTRIES) + if (i == RTE_HASH_BUCKET_ENTRIES || nr_pushes > RTE_HASH_MAX_PUSHES) return -ENOSPC; /* Set flag to indicate that this entry is going to be pushed */ bkt->flag[i] = 1; + + nr_pushes++; /* Need room in alternative bucket to insert the pushed entry */ ret = make_space_bucket(h, next_bkt[i]); /* @@ -458,9 +472,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) * or return error */ bkt->flag[i] = 0; + nr_pushes = 0; if (ret >= 0) { - next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current; - next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt; + next_bkt[i]->sig_alt[ret] = bkt->sig_current[i]; + next_bkt[i]->sig_current[ret] = bkt->sig_alt[i]; next_bkt[i]->key_idx[ret] = bkt->key_idx[i]; return i; } else @@ -542,8 +557,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (prim_bkt->signatures[i].current == sig && - prim_bkt->signatures[i].alt == alt_hash) { + if (prim_bkt->sig_current[i] == sig && + prim_bkt->sig_alt[i] == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + prim_bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -562,8 +577,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (sec_bkt->signatures[i].alt == sig && - sec_bkt->signatures[i].current == alt_hash) { + if (sec_bkt->sig_alt[i] == sig && + sec_bkt->sig_current[i] == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + sec_bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -608,9 +623,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, #endif for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Check if slot is available */ - if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) { - prim_bkt->signatures[i].current = sig; - prim_bkt->signatures[i].alt = alt_hash; + if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { + prim_bkt->sig_current[i] = sig; + prim_bkt->sig_alt[i] = alt_hash; prim_bkt->key_idx[i] = new_idx; break; } @@ -630,8 +645,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, */ ret = make_space_bucket(h, prim_bkt); if (ret >= 0) { - prim_bkt->signatures[ret].current = sig; - prim_bkt->signatures[ret].alt = alt_hash; + prim_bkt->sig_current[ret] = sig; + prim_bkt->sig_alt[ret] = alt_hash; prim_bkt->key_idx[ret] = new_idx; if (h->add_key == ADD_KEY_MULTIWRITER) rte_spinlock_unlock(h->multiwriter_lock); @@ -705,8 +720,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == sig && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == sig && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -728,8 +743,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == alt_hash && - bkt->signatures[i].alt == sig) { + if (bkt->sig_current[i] == alt_hash && + bkt->sig_alt[i] == sig) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -783,7 +798,8 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) unsigned lcore_id, n_slots; struct lcore_cache *cached_free_slots; - bkt->signatures[i].sig = NULL_SIGNATURE; + bkt->sig_current[i] = NULL_SIGNATURE; + bkt->sig_alt[i] = NULL_SIGNATURE; if (h->hw_trans_mem_support) { lcore_id = rte_lcore_id(); cached_free_slots = &h->local_free_slots[lcore_id]; @@ -814,14 +830,15 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, unsigned i; struct rte_hash_bucket *bkt; struct rte_hash_key *k, *keys = h->key_store; + int32_t ret; bucket_idx = sig & h->bucket_bitmask; bkt = &h->buckets[bucket_idx]; /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == sig && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == sig && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -831,7 +848,9 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * substracting the first dummy index */ - return bkt->key_idx[i] - 1; + ret = bkt->key_idx[i] - 1; + bkt->key_idx[i] = EMPTY_SLOT; + return ret; } } } @@ -843,8 +862,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == alt_hash && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == alt_hash && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -854,7 +873,9 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * substracting the first dummy index */ - return bkt->key_idx[i] - 1; + ret = bkt->key_idx[i] - 1; + bkt->key_idx[i] = EMPTY_SLOT; + return ret; } } } @@ -897,280 +918,189 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position, return 0; } -/* Lookup bulk stage 0: Prefetch input key */ static inline void -lookup_stage0(unsigned *idx, uint64_t *lookup_mask, - const void * const *keys) +compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, + const struct rte_hash_bucket *prim_bkt, + const struct rte_hash_bucket *sec_bkt, + hash_sig_t prim_hash, hash_sig_t sec_hash, + enum rte_hash_sig_compare_function sig_cmp_fn) { - *idx = __builtin_ctzl(*lookup_mask); - if (*lookup_mask == 0) - *idx = 0; + unsigned int i; + + switch (sig_cmp_fn) { +#ifdef RTE_MACHINE_CPUFLAG_AVX2 + case RTE_HASH_COMPARE_AVX2: + *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)prim_bkt->sig_current), + _mm256_set1_epi32(prim_hash))); + *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)sec_bkt->sig_current), + _mm256_set1_epi32(sec_hash))); + break; +#endif +#ifdef RTE_MACHINE_CPUFLAG_SSE2 + case RTE_HASH_COMPARE_SSE: + /* Compare the first 4 signatures in the bucket */ + *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)prim_bkt->sig_current), + _mm_set1_epi32(prim_hash))); + *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&prim_bkt->sig_current[4]), + _mm_set1_epi32(prim_hash)))) << 4; + /* Compare the first 4 signatures in the bucket */ + *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)sec_bkt->sig_current), + _mm_set1_epi32(sec_hash))); + *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&sec_bkt->sig_current[4]), + _mm_set1_epi32(sec_hash)))) << 4; + break; +#endif + default: + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + *prim_hash_matches |= + ((prim_hash == prim_bkt->sig_current[i]) << i); + *sec_hash_matches |= + ((sec_hash == sec_bkt->sig_current[i]) << i); + } + } - rte_prefetch0(keys[*idx]); - *lookup_mask &= ~(1llu << *idx); } -/* - * Lookup bulk stage 1: Calculate primary/secondary hashes - * and prefetch primary/secondary buckets - */ +#define PREFETCH_OFFSET 4 static inline void -lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash, - const struct rte_hash_bucket **primary_bkt, - const struct rte_hash_bucket **secondary_bkt, - hash_sig_t *hash_vals, const void * const *keys, - const struct rte_hash *h) +__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + int32_t num_keys, int32_t *positions, + uint64_t *hit_mask, void *data[]) { - *prim_hash = rte_hash_hash(h, keys[idx]); - hash_vals[idx] = *prim_hash; - *sec_hash = rte_hash_secondary_hash(*prim_hash); + uint64_t hits = 0; + int32_t i; + uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + + /* Prefetch first keys */ + for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++) + rte_prefetch0(keys[i]); - *primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask]; - *secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask]; + /* + * Prefetch rest of the keys, calculate primary and + * secondary bucket and prefetch them + */ + for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) { + rte_prefetch0(keys[i + PREFETCH_OFFSET]); - rte_prefetch0(*primary_bkt); - rte_prefetch0(*secondary_bkt); -} + prim_hash[i] = rte_hash_hash(h, keys[i]); + sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); -/* - * Lookup bulk stage 2: Search for match hashes in primary/secondary locations - * and prefetch first key slot - */ -static inline void -lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash, - const struct rte_hash_bucket *prim_bkt, - const struct rte_hash_bucket *sec_bkt, - const struct rte_hash_key **key_slot, int32_t *positions, - uint64_t *extra_hits_mask, const void *keys, - const struct rte_hash *h) -{ - unsigned prim_hash_matches, sec_hash_matches, key_idx, i; - unsigned total_hash_matches; + primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; + secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; - prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i); - sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i); + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); } - key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)]; - if (key_idx == 0) - key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)]; + /* Calculate and prefetch rest of the buckets */ + for (; i < num_keys; i++) { + prim_hash[i] = rte_hash_hash(h, keys[i]); + sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); - total_hash_matches = (prim_hash_matches | - (sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1))); - *key_slot = (const struct rte_hash_key *) ((const char *)keys + - key_idx * h->key_entry_size); + primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; + secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; - rte_prefetch0(*key_slot); - /* - * Return index where key is stored, - * substracting the first dummy index - */ - positions[idx] = (key_idx - 1); + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); + } - *extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx; + /* Compare signatures and prefetch key slot of first hit */ + for (i = 0; i < num_keys; i++) { + compare_signatures(&prim_hitmask[i], &sec_hitmask[i], + primary_bkt[i], secondary_bkt[i], + prim_hash[i], sec_hash[i], h->sig_cmp_fn); + + if (prim_hitmask[i]) { + uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]); + uint32_t key_idx = primary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + continue; + } -} + if (sec_hitmask[i]) { + uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]); + uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + } + } + /* Compare keys, first hits in primary first */ + for (i = 0; i < num_keys; i++) { + positions[i] = -ENOENT; + while (prim_hitmask[i]) { + uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]); + + uint32_t key_idx = primary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ + if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; -/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */ -static inline void -lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys, - const int32_t *positions, void *data[], uint64_t *hits, - const struct rte_hash *h) -{ - unsigned hit; - unsigned key_idx; + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; + } + prim_hitmask[i] &= ~(1 << (hit_index)); + } - hit = !rte_hash_cmp_eq(key_slot->key, keys[idx], h); - if (data != NULL) - data[idx] = key_slot->pdata; + while (sec_hitmask[i]) { + uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]); - key_idx = positions[idx] + 1; - /* - * If key index is 0, force hit to be 0, in case key to be looked up - * is all zero (as in the dummy slot), which would result in a wrong hit - */ - *hits |= (uint64_t)(hit && !!key_idx) << idx; -} + uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ -static inline void -__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, - uint32_t num_keys, int32_t *positions, - uint64_t *hit_mask, void *data[]) -{ - uint64_t hits = 0; - uint64_t extra_hits_mask = 0; - uint64_t lookup_mask, miss_mask; - unsigned idx; - const void *key_store = h->key_store; - int ret; - hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX]; - - unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31; - const struct rte_hash_bucket *primary_bkt10, *primary_bkt11; - const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11; - const struct rte_hash_bucket *primary_bkt20, *primary_bkt21; - const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21; - const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31; - hash_sig_t primary_hash10, primary_hash11; - hash_sig_t secondary_hash10, secondary_hash11; - hash_sig_t primary_hash20, primary_hash21; - hash_sig_t secondary_hash20, secondary_hash21; - - lookup_mask = (uint64_t) -1 >> (64 - num_keys); - miss_mask = lookup_mask; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - - while (lookup_mask) { - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, - primary_bkt20, secondary_bkt20, &k_slot20, positions, - &extra_hits_mask, key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, - primary_bkt21, secondary_bkt21, &k_slot21, positions, - &extra_hits_mask, key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - } + if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - /* ignore any items we have already found */ - extra_hits_mask &= ~hits; - - if (unlikely(extra_hits_mask)) { - /* run a single search for each remaining item */ - do { - idx = __builtin_ctzl(extra_hits_mask); - if (data != NULL) { - ret = rte_hash_lookup_with_hash_data(h, - keys[idx], hash_vals[idx], &data[idx]); - if (ret >= 0) - hits |= 1ULL << idx; - } else { - positions[idx] = rte_hash_lookup_with_hash(h, - keys[idx], hash_vals[idx]); - if (positions[idx] >= 0) - hits |= 1llu << idx; + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; } - extra_hits_mask &= ~(1llu << idx); - } while (extra_hits_mask); - } + sec_hitmask[i] &= ~(1 << (hit_index)); + } - miss_mask &= ~hits; - if (unlikely(miss_mask)) { - do { - idx = __builtin_ctzl(miss_mask); - positions[idx] = -ENOENT; - miss_mask &= ~(1llu << idx); - } while (miss_mask); +next_key: + continue; } if (hit_mask != NULL) @@ -1223,7 +1153,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32 idx = *next % RTE_HASH_BUCKET_ENTRIES; /* If current position is empty, go to the next one */ - while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) { + while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) { (*next)++; /* End of table */ if (*next == total_entries) diff --git a/src/dpdk/lib/librte_hash/rte_cuckoo_hash.h b/src/dpdk/lib/librte_hash/rte_cuckoo_hash.h index 6c76700f..1b8ffed8 100644 --- a/src/dpdk/lib/librte_hash/rte_cuckoo_hash.h +++ b/src/dpdk/lib/librte_hash/rte_cuckoo_hash.h @@ -130,14 +130,18 @@ enum add_key_case { }; /** Number of items per bucket. */ -#define RTE_HASH_BUCKET_ENTRIES 4 +#define RTE_HASH_BUCKET_ENTRIES 8 #define NULL_SIGNATURE 0 +#define EMPTY_SLOT 0 + #define KEY_ALIGNMENT 16 #define LCORE_CACHE_SIZE 64 +#define RTE_HASH_MAX_PUSHES 100 + #define RTE_HASH_BFS_QUEUE_MAX_LEN 1000 #define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4 @@ -149,17 +153,6 @@ struct lcore_cache { void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */ } __rte_cache_aligned; -/* Structure storing both primary and secondary hashes */ -struct rte_hash_signatures { - union { - struct { - hash_sig_t current; - hash_sig_t alt; - }; - uint64_t sig; - }; -}; - /* Structure that stores key-value pair */ struct rte_hash_key { union { @@ -170,11 +163,22 @@ struct rte_hash_key { char key[0]; } __attribute__((aligned(KEY_ALIGNMENT))); +/* All different signature compare functions */ +enum rte_hash_sig_compare_function { + RTE_HASH_COMPARE_SCALAR = 0, + RTE_HASH_COMPARE_SSE, + RTE_HASH_COMPARE_AVX2, + RTE_HASH_COMPARE_NUM +}; + /** Bucket structure */ struct rte_hash_bucket { - struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES]; - /* Includes dummy key index that always contains index 0 */ - uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1]; + hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES]; + + uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES]; + + hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES]; + uint8_t flag[RTE_HASH_BUCKET_ENTRIES]; } __rte_cache_aligned; @@ -183,30 +187,38 @@ struct rte_hash { char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */ uint32_t entries; /**< Total table entries. */ uint32_t num_buckets; /**< Number of buckets in table. */ - uint32_t key_len; /**< Length of hash key. */ + + struct rte_ring *free_slots; + /**< Ring that stores all indexes of the free slots in the key table */ + uint8_t hw_trans_mem_support; + /**< Hardware transactional memory support */ + struct lcore_cache *local_free_slots; + /**< Local cache per lcore, storing some indexes of the free slots */ + enum add_key_case add_key; /**< Multi-writer hash add behavior */ + + rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */ + + /* Fields used in lookup */ + + uint32_t key_len __rte_cache_aligned; + /**< Length of hash key. */ rte_hash_function hash_func; /**< Function used to calculate hash. */ uint32_t hash_func_init_val; /**< Init value used by hash_func. */ rte_hash_cmp_eq_t rte_hash_custom_cmp_eq; /**< Custom function used to compare keys. */ enum cmp_jump_table_case cmp_jump_table_idx; /**< Indicates which compare function to use. */ - uint32_t bucket_bitmask; /**< Bitmask for getting bucket index - from hash signature. */ + enum rte_hash_sig_compare_function sig_cmp_fn; + /**< Indicates which signature compare function to use. */ + uint32_t bucket_bitmask; + /**< Bitmask for getting bucket index from hash signature. */ uint32_t key_entry_size; /**< Size of each key entry. */ - struct rte_ring *free_slots; /**< Ring that stores all indexes - of the free slots in the key table */ void *key_store; /**< Table storing all keys and data */ - struct rte_hash_bucket *buckets; /**< Table with buckets storing all the - hash values and key indexes - to the key table*/ - uint8_t hw_trans_mem_support; /**< Hardware transactional - memory support */ - struct lcore_cache *local_free_slots; - /**< Local cache per lcore, storing some indexes of the free slots */ - enum add_key_case add_key; /**< Multi-writer hash add behavior */ - - rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */ + struct rte_hash_bucket *buckets; + /**< Table with buckets storing all the hash values and key indexes + * to the key table. + */ } __rte_cache_aligned; struct queue_node { diff --git a/src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h b/src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h index fa5630b7..0c94244a 100644 --- a/src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h +++ b/src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h @@ -53,10 +53,9 @@ rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt, */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Check if slot is available */ - if (likely(prim_bkt->signatures[i].sig == - NULL_SIGNATURE)) { - prim_bkt->signatures[i].current = sig; - prim_bkt->signatures[i].alt = alt_hash; + if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { + prim_bkt->sig_current[i] = sig; + prim_bkt->sig_alt[i] = alt_hash; prim_bkt->key_idx[i] = new_idx; break; } @@ -102,7 +101,7 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, prev_slot = curr_node->prev_slot; prev_alt_bkt_idx - = prev_bkt->signatures[prev_slot].alt + = prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask; if (unlikely(&h->buckets[prev_alt_bkt_idx] @@ -114,10 +113,10 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, * Cuckoo insert to move elements back to its * primary bucket if available */ - curr_bkt->signatures[curr_slot].alt = - prev_bkt->signatures[prev_slot].current; - curr_bkt->signatures[curr_slot].current = - prev_bkt->signatures[prev_slot].alt; + curr_bkt->sig_alt[curr_slot] = + prev_bkt->sig_current[prev_slot]; + curr_bkt->sig_current[curr_slot] = + prev_bkt->sig_alt[prev_slot]; curr_bkt->key_idx[curr_slot] = prev_bkt->key_idx[prev_slot]; @@ -126,8 +125,8 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, curr_bkt = curr_node->bkt; } - curr_bkt->signatures[curr_slot].current = sig; - curr_bkt->signatures[curr_slot].alt = alt_hash; + curr_bkt->sig_current[curr_slot] = sig; + curr_bkt->sig_alt[curr_slot] = alt_hash; curr_bkt->key_idx[curr_slot] = new_idx; rte_xend(); @@ -168,10 +167,11 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h, /* Cuckoo bfs Search */ while (likely(tail != head && head < - queue + RTE_HASH_BFS_QUEUE_MAX_LEN - 4)) { + queue + RTE_HASH_BFS_QUEUE_MAX_LEN - + RTE_HASH_BUCKET_ENTRIES)) { curr_bkt = tail->bkt; for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) { + if (curr_bkt->key_idx[i] == EMPTY_SLOT) { if (likely(rte_hash_cuckoo_move_insert_mw_tm(h, tail, i, sig, alt_hash, new_idx) == 0)) @@ -179,7 +179,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h, } /* Enqueue new node and keep prev node info */ - alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt + alt_bkt = &(h->buckets[curr_bkt->sig_alt[i] & h->bucket_bitmask]); head->bkt = alt_bkt; head->prev = tail; diff --git a/src/dpdk/lib/librte_hash/rte_fbk_hash.h b/src/dpdk/lib/librte_hash/rte_fbk_hash.h index a430961d..bd46048f 100644 --- a/src/dpdk/lib/librte_hash/rte_fbk_hash.h +++ b/src/dpdk/lib/librte_hash/rte_fbk_hash.h @@ -115,7 +115,7 @@ struct rte_fbk_hash_table { uint32_t init_val; /**< For initialising hash function. */ /** A flat table of all buckets. */ - union rte_fbk_hash_entry t[0]; + union rte_fbk_hash_entry t[]; }; /** diff --git a/src/dpdk/lib/librte_hash/rte_thash.h b/src/dpdk/lib/librte_hash/rte_thash.h index d98e98e7..a4886a8c 100644 --- a/src/dpdk/lib/librte_hash/rte_thash.h +++ b/src/dpdk/lib/librte_hash/rte_thash.h @@ -54,6 +54,7 @@ extern "C" { #include #include #include +#include #ifdef __SSE3__ #include @@ -102,6 +103,7 @@ static const __m128i rte_thash_ipv6_bswap_mask = { struct rte_ipv4_tuple { uint32_t src_addr; uint32_t dst_addr; + RTE_STD_C11 union { struct { uint16_t dport; @@ -119,6 +121,7 @@ struct rte_ipv4_tuple { struct rte_ipv6_tuple { uint8_t src_addr[16]; uint8_t dst_addr[16]; + RTE_STD_C11 union { struct { uint16_t dport; diff --git a/src/dpdk/lib/librte_kvargs/rte_kvargs.c b/src/dpdk/lib/librte_kvargs/rte_kvargs.c index 8d56abd4..854ac83f 100644 --- a/src/dpdk/lib/librte_kvargs/rte_kvargs.c +++ b/src/dpdk/lib/librte_kvargs/rte_kvargs.c @@ -92,9 +92,9 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params) * into a list of valid keys. */ static int -is_valid_key(const char *valid[], const char *key_match) +is_valid_key(const char * const valid[], const char *key_match) { - const char **valid_ptr; + const char * const *valid_ptr; for (valid_ptr = valid; *valid_ptr != NULL; valid_ptr++) { if (strcmp(key_match, *valid_ptr) == 0) @@ -109,7 +109,7 @@ is_valid_key(const char *valid[], const char *key_match) */ static int check_for_valid_keys(struct rte_kvargs *kvlist, - const char *valid[]) + const char * const valid[]) { unsigned i, ret; struct rte_kvargs_pair *pair; @@ -187,7 +187,7 @@ rte_kvargs_free(struct rte_kvargs *kvlist) * check if only valid keys were used. */ struct rte_kvargs * -rte_kvargs_parse(const char *args, const char *valid_keys[]) +rte_kvargs_parse(const char *args, const char * const valid_keys[]) { struct rte_kvargs *kvlist; diff --git a/src/dpdk/lib/librte_kvargs/rte_kvargs.h b/src/dpdk/lib/librte_kvargs/rte_kvargs.h index ae9ae79f..5821c726 100644 --- a/src/dpdk/lib/librte_kvargs/rte_kvargs.h +++ b/src/dpdk/lib/librte_kvargs/rte_kvargs.h @@ -97,7 +97,8 @@ struct rte_kvargs { * - A pointer to an allocated rte_kvargs structure on success * - NULL on error */ -struct rte_kvargs *rte_kvargs_parse(const char *args, const char *valid_keys[]); +struct rte_kvargs *rte_kvargs_parse(const char *args, + const char *const valid_keys[]); /** * Free a rte_kvargs structure diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf.c b/src/dpdk/lib/librte_mbuf/rte_mbuf.c index 4846b897..72ad91e4 100644 --- a/src/dpdk/lib/librte_mbuf/rte_mbuf.c +++ b/src/dpdk/lib/librte_mbuf/rte_mbuf.c @@ -53,12 +53,12 @@ #include #include #include -#include #include #include #include #include #include +#include /* * ctrlmbuf constructor, given as a callback function to @@ -174,10 +174,12 @@ rte_pktmbuf_pool_create(const char *name, unsigned n, if (mp == NULL) return NULL; - rte_errno = rte_mempool_set_ops_byname(mp, - RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL); - if (rte_errno != 0) { + ret = rte_mempool_set_ops_byname(mp, + RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL); + if (ret != 0) { RTE_LOG(ERR, MBUF, "error setting mempool handler\n"); + rte_mempool_free(mp); + rte_errno = -ret; return NULL; } rte_pktmbuf_pool_init(mp, &mbp_priv); @@ -262,6 +264,40 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len) } } +/* read len data bytes in a mbuf at specified offset (internal) */ +const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off, + uint32_t len, void *buf) +{ + const struct rte_mbuf *seg = m; + uint32_t buf_off = 0, copy_len; + + if (off + len > rte_pktmbuf_pkt_len(m)) + return NULL; + + while (off >= rte_pktmbuf_data_len(seg)) { + off -= rte_pktmbuf_data_len(seg); + seg = seg->next; + } + + if (off + len <= rte_pktmbuf_data_len(seg)) + return rte_pktmbuf_mtod_offset(seg, char *, off); + + /* rare case: header is split among several segments */ + while (len > 0) { + copy_len = rte_pktmbuf_data_len(seg) - off; + if (copy_len > len) + copy_len = len; + rte_memcpy((char *)buf + buf_off, + rte_pktmbuf_mtod_offset(seg, char *, off), copy_len); + off = 0; + buf_off += copy_len; + len -= copy_len; + seg = seg->next; + } + + return buf; +} + /* * Get the name of a RX offload flag. Must be kept synchronized with flag * definitions in rte_mbuf.h. @@ -273,16 +309,78 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask) case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH"; case PKT_RX_FDIR: return "PKT_RX_FDIR"; case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD"; + case PKT_RX_L4_CKSUM_GOOD: return "PKT_RX_L4_CKSUM_GOOD"; + case PKT_RX_L4_CKSUM_NONE: return "PKT_RX_L4_CKSUM_NONE"; case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD"; + case PKT_RX_IP_CKSUM_GOOD: return "PKT_RX_IP_CKSUM_GOOD"; + case PKT_RX_IP_CKSUM_NONE: return "PKT_RX_IP_CKSUM_NONE"; case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD"; case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED"; case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP"; case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST"; case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED"; + case PKT_RX_LRO: return "PKT_RX_LRO"; default: return NULL; } } +struct flag_mask { + uint64_t flag; + uint64_t mask; + const char *default_name; +}; + +/* write the list of rx ol flags in buffer buf */ +int +rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) +{ + const struct flag_mask rx_flags[] = { + { PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, NULL }, + { PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL }, + { PKT_RX_FDIR, PKT_RX_FDIR, NULL }, + { PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL }, + { PKT_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_MASK, NULL }, + { PKT_RX_L4_CKSUM_NONE, PKT_RX_L4_CKSUM_MASK, NULL }, + { PKT_RX_L4_CKSUM_UNKNOWN, PKT_RX_L4_CKSUM_MASK, + "PKT_RX_L4_CKSUM_UNKNOWN" }, + { PKT_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK, NULL }, + { PKT_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_MASK, NULL }, + { PKT_RX_IP_CKSUM_NONE, PKT_RX_IP_CKSUM_MASK, NULL }, + { PKT_RX_IP_CKSUM_UNKNOWN, PKT_RX_IP_CKSUM_MASK, + "PKT_RX_IP_CKSUM_UNKNOWN" }, + { PKT_RX_EIP_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD, NULL }, + { PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL }, + { PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL }, + { PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL }, + { PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL }, + { PKT_RX_LRO, PKT_RX_LRO, NULL }, + }; + const char *name; + unsigned int i; + int ret; + + if (buflen == 0) + return -1; + + buf[0] = '\0'; + for (i = 0; i < RTE_DIM(rx_flags); i++) { + if ((mask & rx_flags[i].mask) != rx_flags[i].flag) + continue; + name = rte_get_rx_ol_flag_name(rx_flags[i].flag); + if (name == NULL) + name = rx_flags[i].default_name; + ret = snprintf(buf, buflen, "%s ", name); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + + return 0; +} + /* * Get the name of a TX offload flag. Must be kept synchronized with flag * definitions in rte_mbuf.h. @@ -302,6 +400,65 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask) case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM"; case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4"; case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6"; + case PKT_TX_TUNNEL_VXLAN: return "PKT_TX_TUNNEL_VXLAN"; + case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE"; + case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP"; + case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE"; + case PKT_TX_MACSEC: return "PKT_TX_MACSEC"; default: return NULL; } } + +/* write the list of tx ol flags in buffer buf */ +int +rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) +{ + const struct flag_mask tx_flags[] = { + { PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL }, + { PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL }, + { PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL }, + { PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL }, + { PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK, NULL }, + { PKT_TX_L4_NO_CKSUM, PKT_TX_L4_MASK, "PKT_TX_L4_NO_CKSUM" }, + { PKT_TX_IEEE1588_TMST, PKT_TX_IEEE1588_TMST, NULL }, + { PKT_TX_TCP_SEG, PKT_TX_TCP_SEG, NULL }, + { PKT_TX_IPV4, PKT_TX_IPV4, NULL }, + { PKT_TX_IPV6, PKT_TX_IPV6, NULL }, + { PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL }, + { PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL }, + { PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL }, + { PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_MACSEC, PKT_TX_MACSEC, NULL }, + }; + const char *name; + unsigned int i; + int ret; + + if (buflen == 0) + return -1; + + buf[0] = '\0'; + for (i = 0; i < RTE_DIM(tx_flags); i++) { + if ((mask & tx_flags[i].mask) != tx_flags[i].flag) + continue; + name = rte_get_tx_ol_flag_name(tx_flags[i].flag); + if (name == NULL) + name = tx_flags[i].default_name; + ret = snprintf(buf, buflen, "%s ", name); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + + return 0; +} diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf.h b/src/dpdk/lib/librte_mbuf/rte_mbuf.h index 9e607992..0d01167c 100644 --- a/src/dpdk/lib/librte_mbuf/rte_mbuf.h +++ b/src/dpdk/lib/librte_mbuf/rte_mbuf.h @@ -44,7 +44,7 @@ * buffers. The message buffers are stored in a mempool, using the * RTE mempool library. * - * This library provide an API to allocate/free packet mbufs, which are + * This library provides an API to allocate/free packet mbufs, which are * used to carry network packets. * * To understand the concepts of packet buffers or mbufs, you @@ -60,6 +60,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -90,8 +91,25 @@ extern "C" { #define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */ #define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */ -#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */ -#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) /**< IP cksum of RX pkt. is not OK. */ + +/** + * Deprecated. + * Checking this flag alone is deprecated: check the 2 bits of + * PKT_RX_L4_CKSUM_MASK. + * This flag was set when the L4 checksum of a packet was detected as + * wrong by the hardware. + */ +#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) + +/** + * Deprecated. + * Checking this flag alone is deprecated: check the 2 bits of + * PKT_RX_IP_CKSUM_MASK. + * This flag was set when the IP checksum of a packet was detected as + * wrong by the hardware. + */ +#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) + #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5) /**< External IP header checksum error. */ /** @@ -101,7 +119,35 @@ extern "C" { */ #define PKT_RX_VLAN_STRIPPED (1ULL << 6) -/* hole, some bits can be reused here */ +/** + * Mask of bits used to determine the status of RX IP checksum. + * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum + * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong + * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid + * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet + * data, but the integrity of the IP header is verified. + */ +#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7)) + +#define PKT_RX_IP_CKSUM_UNKNOWN 0 +#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) +#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) +#define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7)) + +/** + * Mask of bits used to determine the status of RX L4 checksum. + * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum + * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong + * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid + * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet + * data, but the integrity of the L4 data is verified. + */ +#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8)) + +#define PKT_RX_L4_CKSUM_UNKNOWN 0 +#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) +#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8) +#define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8)) #define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */ #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/ @@ -124,10 +170,35 @@ extern "C" { */ #define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED +/** + * When packets are coalesced by a hardware or virtual driver, this flag + * can be set in the RX mbuf, meaning that the m->tso_segsz field is + * valid and is set to the segment size of original packets. + */ +#define PKT_RX_LRO (1ULL << 16) + /* add new RX flags here */ /* add new TX flags here */ +/** + * Offload the MACsec. This flag must be set by the application to enable + * this offload feature for a packet to be transmitted. + */ +#define PKT_TX_MACSEC (1ULL << 44) + +/** + * Bits 45:48 used for the tunnel type. + * When doing Tx offload like TSO or checksum, the HW needs to configure the + * tunnel type into the HW descriptors. + */ +#define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45) +#define PKT_TX_TUNNEL_GRE (0x2ULL << 45) +#define PKT_TX_TUNNEL_IPIP (0x3ULL << 45) +#define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45) +/* add new TX TUNNEL type here */ +#define PKT_TX_TUNNEL_MASK (0xFULL << 45) + /** * Second VLAN insertion (QinQ) flag. */ @@ -218,6 +289,19 @@ extern "C" { */ #define PKT_TX_OUTER_IPV6 (1ULL << 60) +/** + * Bitmask of all supported packet Tx offload features flags, + * which can be set for packet. + */ +#define PKT_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG | \ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT | \ + PKT_TX_TUNNEL_MASK) + #define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */ #define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */ @@ -225,500 +309,6 @@ extern "C" { /* Use final bit of flags to indicate a control mbuf */ #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ -/* - * 32 bits are divided into several fields to mark packet types. Note that - * each field is indexical. - * - Bit 3:0 is for L2 types. - * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types. - * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types. - * - Bit 15:12 is for tunnel types. - * - Bit 19:16 is for inner L2 types. - * - Bit 23:20 is for inner L3 types. - * - Bit 27:24 is for inner L4 types. - * - Bit 31:28 is reserved. - * - * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT, - * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP - * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits. - * - * Note that L3 types values are selected for checking IPV4/IPV6 header from - * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and - * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values. - * - * Note that the packet types of the same packet recognized by different - * hardware may be different, as different hardware may have different - * capability of packet type recognition. - * - * examples: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=0x29 - * | 'version'=6, 'next header'=0x3A - * | 'ICMPv6 header'> - * will be recognized on i40e hardware as packet type combination of, - * RTE_PTYPE_L2_ETHER | - * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - * RTE_PTYPE_TUNNEL_IP | - * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - * RTE_PTYPE_INNER_L4_ICMP. - * - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=0x2F - * | 'GRE header' - * | 'version'=6, 'next header'=0x11 - * | 'UDP header'> - * will be recognized on i40e hardware as packet type combination of, - * RTE_PTYPE_L2_ETHER | - * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - * RTE_PTYPE_TUNNEL_GRENAT | - * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - * RTE_PTYPE_INNER_L4_UDP. - */ -#define RTE_PTYPE_UNKNOWN 0x00000000 -/** - * Ethernet packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=[0x0800|0x86DD]> - */ -#define RTE_PTYPE_L2_ETHER 0x00000001 -/** - * Ethernet packet type for time sync. - * - * Packet format: - * <'ether type'=0x88F7> - */ -#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002 -/** - * ARP (Address Resolution Protocol) packet type. - * - * Packet format: - * <'ether type'=0x0806> - */ -#define RTE_PTYPE_L2_ETHER_ARP 0x00000003 -/** - * LLDP (Link Layer Discovery Protocol) packet type. - * - * Packet format: - * <'ether type'=0x88CC> - */ -#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004 -/** - * NSH (Network Service Header) packet type. - * - * Packet format: - * <'ether type'=0x894F> - */ -#define RTE_PTYPE_L2_ETHER_NSH 0x00000005 -/** - * Mask of layer 2 packet types. - * It is used for outer packet for tunneling cases. - */ -#define RTE_PTYPE_L2_MASK 0x0000000f -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for outer packet for tunneling cases, and does not contain any - * header option. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=5> - */ -#define RTE_PTYPE_L3_IPV4 0x00000010 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for outer packet for tunneling cases, and contains header - * options. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[6-15], 'options'> - */ -#define RTE_PTYPE_L3_IPV4_EXT 0x00000030 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for outer packet for tunneling cases, and does not contain any - * extension header. - * - * Packet format: - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=0x3B> - */ -#define RTE_PTYPE_L3_IPV6 0x00000040 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for outer packet for tunneling cases, and may or maynot contain - * header options. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[5-15], <'options'>> - */ -#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for outer packet for tunneling cases, and contains extension - * headers. - * - * Packet format: - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * 'extension headers'> - */ -#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for outer packet for tunneling cases, and may or maynot contain - * extension headers. - * - * Packet format: - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * <'extension headers'>> - */ -#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0 -/** - * Mask of layer 3 packet types. - * It is used for outer packet for tunneling cases. - */ -#define RTE_PTYPE_L3_MASK 0x000000f0 -/** - * TCP (Transmission Control Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=6, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=6> - */ -#define RTE_PTYPE_L4_TCP 0x00000100 -/** - * UDP (User Datagram Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17> - */ -#define RTE_PTYPE_L4_UDP 0x00000200 -/** - * Fragmented IP (Internet Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * It refers to those packets of any IP types, which can be recognized as - * fragmented. A fragmented packet cannot be recognized as any other L4 types - * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, - * RTE_PTYPE_L4_NONFRAG). - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'MF'=1> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=44> - */ -#define RTE_PTYPE_L4_FRAG 0x00000300 -/** - * SCTP (Stream Control Transmission Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=132, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=132> - */ -#define RTE_PTYPE_L4_SCTP 0x00000400 -/** - * ICMP (Internet Control Message Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=1, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=1> - */ -#define RTE_PTYPE_L4_ICMP 0x00000500 -/** - * Non-fragmented IP (Internet Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * It refers to those packets of any IP types, while cannot be recognized as - * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, - * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP). - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'!=[6|17|44|132|1]> - */ -#define RTE_PTYPE_L4_NONFRAG 0x00000600 -/** - * Mask of layer 4 packet types. - * It is used for outer packet for tunneling cases. - */ -#define RTE_PTYPE_L4_MASK 0x00000f00 -/** - * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=[4|41]> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[4|41]> - */ -#define RTE_PTYPE_TUNNEL_IP 0x00001000 -/** - * GRE (Generic Routing Encapsulation) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=47> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=47> - */ -#define RTE_PTYPE_TUNNEL_GRE 0x00002000 -/** - * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17 - * | 'destination port'=4798> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17 - * | 'destination port'=4798> - */ -#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000 -/** - * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling - * packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=47 - * | 'protocol type'=0x6558> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=47 - * | 'protocol type'=0x6558'> - */ -#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000 -/** - * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17 - * | 'destination port'=6081> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17 - * | 'destination port'=6081> - */ -#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000 -/** - * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area - * Network) or GRE (Generic Routing Encapsulation) could be recognized as this - * packet type, if they can not be recognized independently as of hardware - * capability. - */ -#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000 -/** - * Mask of tunneling packet types. - */ -#define RTE_PTYPE_TUNNEL_MASK 0x0000f000 -/** - * Ethernet packet type. - * It is used for inner packet type only. - * - * Packet format (inner only): - * <'ether type'=[0x800|0x86DD]> - */ -#define RTE_PTYPE_INNER_L2_ETHER 0x00010000 -/** - * Ethernet packet type with VLAN (Virtual Local Area Network) tag. - * - * Packet format (inner only): - * <'ether type'=[0x800|0x86DD], vlan=[1-4095]> - */ -#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000 -/** - * Mask of inner layer 2 packet types. - */ -#define RTE_PTYPE_INNER_L2_MASK 0x000f0000 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for inner packet only, and does not contain any header option. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=5> - */ -#define RTE_PTYPE_INNER_L3_IPV4 0x00100000 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for inner packet only, and contains header options. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[6-15], 'options'> - */ -#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for inner packet only, and does not contain any extension header. - * - * Packet format (inner only): - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=0x3B> - */ -#define RTE_PTYPE_INNER_L3_IPV6 0x00300000 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for inner packet only, and may or maynot contain header options. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[5-15], <'options'>> - */ -#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for inner packet only, and contains extension headers. - * - * Packet format (inner only): - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * 'extension headers'> - */ -#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for inner packet only, and may or maynot contain extension - * headers. - * - * Packet format (inner only): - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * <'extension headers'>> - */ -#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000 -/** - * Mask of inner layer 3 packet types. - */ -#define RTE_PTYPE_INNER_L3_MASK 0x00f00000 -/** - * TCP (Transmission Control Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=6, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=6> - */ -#define RTE_PTYPE_INNER_L4_TCP 0x01000000 -/** - * UDP (User Datagram Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17> - */ -#define RTE_PTYPE_INNER_L4_UDP 0x02000000 -/** - * Fragmented IP (Internet Protocol) packet type. - * It is used for inner packet only, and may or maynot have layer 4 packet. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'MF'=1> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=44> - */ -#define RTE_PTYPE_INNER_L4_FRAG 0x03000000 -/** - * SCTP (Stream Control Transmission Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=132, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=132> - */ -#define RTE_PTYPE_INNER_L4_SCTP 0x04000000 -/** - * ICMP (Internet Control Message Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=1, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=1> - */ -#define RTE_PTYPE_INNER_L4_ICMP 0x05000000 -/** - * Non-fragmented IP (Internet Protocol) packet type. - * It is used for inner packet only, and may or maynot have other unknown layer - * 4 packet types. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'!=[6|17|44|132|1]> - */ -#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000 -/** - * Mask of inner layer 4 packet types. - */ -#define RTE_PTYPE_INNER_L4_MASK 0x0f000000 - -/** - * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by - * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can - * determine if it is an IPV4 packet. - */ -#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4) - -/** - * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by - * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can - * determine if it is an IPV4 packet. - */ -#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6) - -/* Check if it is a tunneling packet */ -#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & (RTE_PTYPE_TUNNEL_MASK | \ - RTE_PTYPE_INNER_L2_MASK | \ - RTE_PTYPE_INNER_L3_MASK | \ - RTE_PTYPE_INNER_L4_MASK)) - /** Alignment constraint of mbuf private area. */ #define RTE_MBUF_PRIV_ALIGN 8 @@ -732,6 +322,20 @@ extern "C" { */ const char *rte_get_rx_ol_flag_name(uint64_t mask); +/** + * Dump the list of RX offload flags in a buffer + * + * @param mask + * The mask describing the RX flags. + * @param buf + * The output buffer. + * @param buflen + * The length of the buffer. + * @return + * 0 on success, (-1) on error. + */ +int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen); + /** * Get the name of a TX offload flag * @@ -744,6 +348,20 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask); */ const char *rte_get_tx_ol_flag_name(uint64_t mask); +/** + * Dump the list of TX offload flags in a buffer + * + * @param mask + * The mask describing the TX flags. + * @param buf + * The output buffer. + * @param buflen + * The length of the buffer. + * @return + * 0 on success, (-1) on error. + */ +int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen); + /** * Some NICs need at least 2KB buffer to RX standard Ethernet frame without * splitting it into multiple segments. @@ -756,8 +374,11 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask); /* define a set of marker types that can be used to refer to set points in the * mbuf */ +__extension__ typedef void *MARKER[0]; /**< generic marker for a point in a structure */ +__extension__ typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */ +__extension__ typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes * with a single assignment */ @@ -784,6 +405,7 @@ struct rte_mbuf { * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC * config option. */ + RTE_STD_C11 union { rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */ uint16_t refcnt; /**< Non-atomically accessed refcnt */ @@ -803,6 +425,7 @@ struct rte_mbuf { * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the * vlan is stripped from the data. */ + RTE_STD_C11 union { uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */ struct { @@ -824,6 +447,7 @@ struct rte_mbuf { union { uint32_t rss; /**< RSS hash result if RSS enabled */ struct { + RTE_STD_C11 union { struct { uint16_t hash; @@ -851,6 +475,7 @@ struct rte_mbuf { /* second cache line - fields only used in slow path or on TX */ MARKER cacheline1 __rte_cache_min_aligned; + RTE_STD_C11 union { void *userdata; /**< Can be used for external metadata */ uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */ @@ -860,10 +485,15 @@ struct rte_mbuf { struct rte_mbuf *next; /**< Next segment of scattered packet. */ /* fields to support TX offloads */ + RTE_STD_C11 union { uint64_t tx_offload; /**< combined for easy fetch */ + __extension__ struct { - uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint64_t l2_len:7; + /**< L2 (MAC) Header Length for non-tunneling pkt. + * Outer_L4_len + ... + Inner_L2_len for tunneling pkt. + */ uint64_t l3_len:9; /**< L3 (IP) Header Length. */ uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ uint64_t tso_segsz:16; /**< TCP TSO segment size */ @@ -1059,9 +689,6 @@ rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) static inline uint16_t rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) { - // TREX_PATCH - The code in #if 0 caused tx queue to hang when running: - // sudo ./t-rex-64-o -f avl/sfr_delay_10_1g_no_bundeling.yaml -m 35 -p -d 100 -#if 0 /* * The atomic_add is an expensive operation, so we don't want to * call it in the case where we know we are the uniq holder of @@ -1073,7 +700,7 @@ rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) rte_mbuf_refcnt_set(m, 1 + value); return 1 + value; } -#endif + return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value)); } @@ -1160,13 +787,6 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) return m; } -/* compat with older versions */ -__rte_deprecated static inline struct rte_mbuf * -__rte_mbuf_raw_alloc(struct rte_mempool *mp) -{ - return rte_mbuf_raw_alloc(mp); -} - /** * @internal Put mbuf back into its original mempool. * The use of that function is reserved for RTE internal needs. @@ -1387,6 +1007,19 @@ rte_pktmbuf_priv_size(struct rte_mempool *mp) return mbp_priv->mbuf_priv_size; } +/** + * Reset the data_off field of a packet mbuf to its default value. + * + * The given mbuf must have only one segment, which should be empty. + * + * @param m + * The packet mbuf's data_off field has to be reset. + */ +static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m) +{ + m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); +} + /** * Reset the fields of a packet mbuf to their default values. * @@ -1407,8 +1040,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) m->ol_flags = 0; m->packet_type = 0; - m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ? - RTE_PKTMBUF_HEADROOM : m->buf_len; + rte_pktmbuf_reset_headroom(m); m->data_len = 0; __rte_mbuf_sanity_check(m, 1); @@ -1526,7 +1158,6 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) mi->buf_addr = m->buf_addr; mi->buf_len = m->buf_len; - mi->next = m->next; mi->data_off = m->data_off; mi->data_len = m->data_len; mi->port = m->port; @@ -1572,7 +1203,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m) m->buf_addr = (char *)m + mbuf_size; m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size; m->buf_len = (uint16_t)buf_len; - m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); + rte_pktmbuf_reset_headroom(m); m->data_len = 0; m->ol_flags = 0; @@ -1960,6 +1591,41 @@ static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m) return !!(m->nb_segs == 1); } +/** + * @internal used by rte_pktmbuf_read(). + */ +const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off, + uint32_t len, void *buf); + +/** + * Read len data bytes in a mbuf at specified offset. + * + * If the data is contiguous, return the pointer in the mbuf data, else + * copy the data in the buffer provided by the user and return its + * pointer. + * + * @param m + * The pointer to the mbuf. + * @param off + * The offset of the data in the mbuf. + * @param len + * The amount of bytes to read. + * @param buf + * The buffer where data is copied if it is not contigous in mbuf + * data. Its length should be at least equal to the len parameter. + * @return + * The pointer to the data, either in the mbuf if it is contiguous, + * or in the user buffer. If mbuf is too small, NULL is returned. + */ +static inline const void *rte_pktmbuf_read(const struct rte_mbuf *m, + uint32_t off, uint32_t len, void *buf) +{ + if (likely(off + len <= rte_pktmbuf_data_len(m))) + return rte_pktmbuf_mtod_offset(m, char *, off); + else + return __rte_pktmbuf_read(m, off, len, buf); +} + /** * Chain an mbuf to another, thereby creating a segmented packet. * @@ -1999,7 +1665,109 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail } /** - * Dump an mbuf structure to the console. + * Validate general requirements for Tx offload in mbuf. + * + * This function checks correctness and completeness of Tx offload settings. + * + * @param m + * The packet mbuf to be validated. + * @return + * 0 if packet is valid + */ +static inline int +rte_validate_tx_offload(const struct rte_mbuf *m) +{ + uint64_t ol_flags = m->ol_flags; + uint64_t inner_l3_offset = m->l2_len; + + /* Does packet set any of available offloads? */ + if (!(ol_flags & PKT_TX_OFFLOAD_MASK)) + return 0; + + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) + inner_l3_offset += m->outer_l2_len + m->outer_l3_len; + + /* Headers are fragmented */ + if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len) + return -ENOTSUP; + + /* IP checksum can be counted only for IPv4 packet */ + if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6)) + return -EINVAL; + + /* IP type not set when required */ + if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) + if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6))) + return -EINVAL; + + /* Check requirements for TSO packet */ + if (ol_flags & PKT_TX_TCP_SEG) + if ((m->tso_segsz == 0) || + ((ol_flags & PKT_TX_IPV4) && + !(ol_flags & PKT_TX_IP_CKSUM))) + return -EINVAL; + + /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */ + if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) && + !(ol_flags & PKT_TX_OUTER_IPV4)) + return -EINVAL; + + return 0; +} + +/** + * Linearize data in mbuf. + * + * This function moves the mbuf data in the first segment if there is enough + * tailroom. The subsequent segments are unchained and freed. + * + * @param mbuf + * mbuf to linearize + * @return + * - 0, on success + * - -1, on error + */ +static inline int +rte_pktmbuf_linearize(struct rte_mbuf *mbuf) +{ + int seg_len, copy_len; + struct rte_mbuf *m; + struct rte_mbuf *m_next; + char *buffer; + + if (rte_pktmbuf_is_contiguous(mbuf)) + return 0; + + /* Extend first segment to the total packet length */ + copy_len = rte_pktmbuf_pkt_len(mbuf) - rte_pktmbuf_data_len(mbuf); + + if (unlikely(copy_len > rte_pktmbuf_tailroom(mbuf))) + return -1; + + buffer = rte_pktmbuf_mtod_offset(mbuf, char *, mbuf->data_len); + mbuf->data_len = (uint16_t)(mbuf->pkt_len); + + /* Append data from next segments to the first one */ + m = mbuf->next; + while (m != NULL) { + m_next = m->next; + + seg_len = rte_pktmbuf_data_len(m); + rte_memcpy(buffer, rte_pktmbuf_mtod(m, char *), seg_len); + buffer += seg_len; + + rte_pktmbuf_free_seg(m); + m = m_next; + } + + mbuf->next = NULL; + mbuf->nb_segs = 1; + + return 0; +} + +/** + * Dump an mbuf structure to a file. * * Dump all fields for the given packet mbuf and all its associated * segments (in the case of a chained buffer). diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c new file mode 100644 index 00000000..e5c4fae3 --- /dev/null +++ b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c @@ -0,0 +1,227 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include + +/* get the name of the l2 packet type */ +const char *rte_get_ptype_l2_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_L2_MASK) { + case RTE_PTYPE_L2_ETHER: return "L2_ETHER"; + case RTE_PTYPE_L2_ETHER_TIMESYNC: return "L2_ETHER_TIMESYNC"; + case RTE_PTYPE_L2_ETHER_ARP: return "L2_ETHER_ARP"; + case RTE_PTYPE_L2_ETHER_LLDP: return "L2_ETHER_LLDP"; + case RTE_PTYPE_L2_ETHER_NSH: return "L2_ETHER_NSH"; + case RTE_PTYPE_L2_ETHER_VLAN: return "L2_ETHER_VLAN"; + case RTE_PTYPE_L2_ETHER_QINQ: return "L2_ETHER_QINQ"; + default: return "L2_UNKNOWN"; + } +} + +/* get the name of the l3 packet type */ +const char *rte_get_ptype_l3_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_L3_MASK) { + case RTE_PTYPE_L3_IPV4: return "L3_IPV4"; + case RTE_PTYPE_L3_IPV4_EXT: return "L3_IPV4_EXT"; + case RTE_PTYPE_L3_IPV6: return "L3_IPV6"; + case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: return "L3_IPV4_EXT_UNKNOWN"; + case RTE_PTYPE_L3_IPV6_EXT: return "L3_IPV6_EXT"; + case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: return "L3_IPV6_EXT_UNKNOWN"; + default: return "L3_UNKNOWN"; + } +} + +/* get the name of the l4 packet type */ +const char *rte_get_ptype_l4_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_L4_MASK) { + case RTE_PTYPE_L4_TCP: return "L4_TCP"; + case RTE_PTYPE_L4_UDP: return "L4_UDP"; + case RTE_PTYPE_L4_FRAG: return "L4_FRAG"; + case RTE_PTYPE_L4_SCTP: return "L4_SCTP"; + case RTE_PTYPE_L4_ICMP: return "L4_ICMP"; + case RTE_PTYPE_L4_NONFRAG: return "L4_NONFRAG"; + default: return "L4_UNKNOWN"; + } +} + +/* get the name of the tunnel packet type */ +const char *rte_get_ptype_tunnel_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_TUNNEL_MASK) { + case RTE_PTYPE_TUNNEL_IP: return "TUNNEL_IP"; + case RTE_PTYPE_TUNNEL_GRE: return "TUNNEL_GRE"; + case RTE_PTYPE_TUNNEL_VXLAN: return "TUNNEL_VXLAN"; + case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE"; + case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE"; + case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT"; + default: return "TUNNEL_UNKNOWN"; + } +} + +/* get the name of the inner_l2 packet type */ +const char *rte_get_ptype_inner_l2_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_INNER_L2_MASK) { + case RTE_PTYPE_INNER_L2_ETHER: return "INNER_L2_ETHER"; + case RTE_PTYPE_INNER_L2_ETHER_VLAN: return "INNER_L2_ETHER_VLAN"; + case RTE_PTYPE_INNER_L2_ETHER_QINQ: return "INNER_L2_ETHER_QINQ"; + default: return "INNER_L2_UNKNOWN"; + } +} + +/* get the name of the inner_l3 packet type */ +const char *rte_get_ptype_inner_l3_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_INNER_L3_MASK) { + case RTE_PTYPE_INNER_L3_IPV4: return "INNER_L3_IPV4"; + case RTE_PTYPE_INNER_L3_IPV4_EXT: return "INNER_L3_IPV4_EXT"; + case RTE_PTYPE_INNER_L3_IPV6: return "INNER_L3_IPV6"; + case RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN: + return "INNER_L3_IPV4_EXT_UNKNOWN"; + case RTE_PTYPE_INNER_L3_IPV6_EXT: return "INNER_L3_IPV6_EXT"; + case RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN: + return "INNER_L3_IPV6_EXT_UNKNOWN"; + default: return "INNER_L3_UNKNOWN"; + } +} + +/* get the name of the inner_l4 packet type */ +const char *rte_get_ptype_inner_l4_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_INNER_L4_MASK) { + case RTE_PTYPE_INNER_L4_TCP: return "INNER_L4_TCP"; + case RTE_PTYPE_INNER_L4_UDP: return "INNER_L4_UDP"; + case RTE_PTYPE_INNER_L4_FRAG: return "INNER_L4_FRAG"; + case RTE_PTYPE_INNER_L4_SCTP: return "INNER_L4_SCTP"; + case RTE_PTYPE_INNER_L4_ICMP: return "INNER_L4_ICMP"; + case RTE_PTYPE_INNER_L4_NONFRAG: return "INNER_L4_NONFRAG"; + default: return "INNER_L4_UNKNOWN"; + } +} + +/* write the packet type name into the buffer */ +int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen) +{ + int ret; + + if (buflen == 0) + return -1; + + buf[0] = '\0'; + if ((ptype & RTE_PTYPE_ALL_MASK) == RTE_PTYPE_UNKNOWN) { + ret = snprintf(buf, buflen, "UNKNOWN"); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + return 0; + } + + if ((ptype & RTE_PTYPE_L2_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_l2_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_L3_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_l3_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_L4_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_l4_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_TUNNEL_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_tunnel_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_INNER_L2_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_inner_l2_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_INNER_L3_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_inner_l3_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_INNER_L4_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_inner_l4_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + + return 0; +} diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h new file mode 100644 index 00000000..ff6de9d1 --- /dev/null +++ b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h @@ -0,0 +1,668 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. + * Copyright 2014-2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MBUF_PTYPE_H_ +#define _RTE_MBUF_PTYPE_H_ + +/** + * @file + * RTE Mbuf Packet Types + * + * This file contains declarations for features related to mbuf packet + * types. The packet type gives information about the data carried by the + * mbuf, and is stored in the mbuf in a 32 bits field. + * + * The 32 bits are divided into several fields to mark packet types. Note that + * each field is indexical. + * - Bit 3:0 is for L2 types. + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types. + * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types. + * - Bit 15:12 is for tunnel types. + * - Bit 19:16 is for inner L2 types. + * - Bit 23:20 is for inner L3 types. + * - Bit 27:24 is for inner L4 types. + * - Bit 31:28 is reserved. + * + * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT, + * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP + * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits. + * + * Note that L3 types values are selected for checking IPV4/IPV6 header from + * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and + * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values. + * + * Note that the packet types of the same packet recognized by different + * hardware may be different, as different hardware may have different + * capability of packet type recognition. + * + * examples: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=0x29 + * | 'version'=6, 'next header'=0x3A + * | 'ICMPv6 header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_IP | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_ICMP. + * + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x2F + * | 'GRE header' + * | 'version'=6, 'next header'=0x11 + * | 'UDP header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_GRENAT | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_UDP. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * No packet type information. + */ +#define RTE_PTYPE_UNKNOWN 0x00000000 +/** + * Ethernet packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=[0x0800|0x86DD]> + */ +#define RTE_PTYPE_L2_ETHER 0x00000001 +/** + * Ethernet packet type for time sync. + * + * Packet format: + * <'ether type'=0x88F7> + */ +#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002 +/** + * ARP (Address Resolution Protocol) packet type. + * + * Packet format: + * <'ether type'=0x0806> + */ +#define RTE_PTYPE_L2_ETHER_ARP 0x00000003 +/** + * LLDP (Link Layer Discovery Protocol) packet type. + * + * Packet format: + * <'ether type'=0x88CC> + */ +#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004 +/** + * NSH (Network Service Header) packet type. + * + * Packet format: + * <'ether type'=0x894F> + */ +#define RTE_PTYPE_L2_ETHER_NSH 0x00000005 +/** + * VLAN packet type. + * + * Packet format: + * <'ether type'=[0x8100]> + */ +#define RTE_PTYPE_L2_ETHER_VLAN 0x00000006 +/** + * QinQ packet type. + * + * Packet format: + * <'ether type'=[0x88A8]> + */ +#define RTE_PTYPE_L2_ETHER_QINQ 0x00000007 +/** + * Mask of layer 2 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L2_MASK 0x0000000f +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * header option. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_L3_IPV4 0x00000010 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and contains header + * options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_L3_IPV4_EXT 0x00000030 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * extension header. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_L3_IPV6 0x00000040 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * header options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and contains extension + * headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * extension headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0 +/** + * Mask of layer 3 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L3_MASK 0x000000f0 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_L4_TCP 0x00000100 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_L4_UDP 0x00000200 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, which can be recognized as + * fragmented. A fragmented packet cannot be recognized as any other L4 types + * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, + * RTE_PTYPE_L4_NONFRAG). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x0800 + * | 'version'=4, 'frag_offset'!=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_L4_FRAG 0x00000300 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_L4_SCTP 0x00000400 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_L4_ICMP 0x00000500 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, while cannot be recognized as + * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, + * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_L4_NONFRAG 0x00000600 +/** + * Mask of layer 4 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L4_MASK 0x00000f00 +/** + * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=[4|41]> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[4|41]> + */ +#define RTE_PTYPE_TUNNEL_IP 0x00001000 +/** + * GRE (Generic Routing Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47> + */ +#define RTE_PTYPE_TUNNEL_GRE 0x00002000 +/** + * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=4798> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=4798> + */ +#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000 +/** + * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling + * packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47 + * | 'protocol type'=0x6558> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47 + * | 'protocol type'=0x6558'> + */ +#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000 +/** + * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=6081> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=6081> + */ +#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000 +/** + * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area + * Network) or GRE (Generic Routing Encapsulation) could be recognized as this + * packet type, if they can not be recognized independently as of hardware + * capability. + */ +#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000 +/** + * Mask of tunneling packet types. + */ +#define RTE_PTYPE_TUNNEL_MASK 0x0000f000 +/** + * Ethernet packet type. + * It is used for inner packet type only. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD]> + */ +#define RTE_PTYPE_INNER_L2_ETHER 0x00010000 +/** + * Ethernet packet type with VLAN (Virtual Local Area Network) tag. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD], vlan=[1-4095]> + */ +#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000 +/** + * QinQ packet type. + * + * Packet format: + * <'ether type'=[0x88A8]> + */ +#define RTE_PTYPE_INNER_L2_ETHER_QINQ 0x00030000 +/** + * Mask of inner layer 2 packet types. + */ +#define RTE_PTYPE_INNER_L2_MASK 0x000f0000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and does not contain any header option. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_INNER_L3_IPV4 0x00100000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and contains header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and does not contain any extension header. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_INNER_L3_IPV6 0x00300000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and may or maynot contain header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and contains extension headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and may or maynot contain extension + * headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000 +/** + * Mask of inner layer 3 packet types. + */ +#define RTE_PTYPE_INNER_L3_MASK 0x00f00000 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_INNER_L4_TCP 0x01000000 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_INNER_L4_UDP 0x02000000 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have layer 4 packet. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x0800 + * | 'version'=4, 'frag_offset'!=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_INNER_L4_FRAG 0x03000000 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_INNER_L4_SCTP 0x04000000 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_INNER_L4_ICMP 0x05000000 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have other unknown layer + * 4 packet types. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000 +/** + * Mask of inner layer 4 packet types. + */ +#define RTE_PTYPE_INNER_L4_MASK 0x0f000000 +/** + * All valid layer masks. + */ +#define RTE_PTYPE_ALL_MASK 0x0fffffff + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4) + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6) + +/* Check if it is a tunneling packet */ +#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & \ + (RTE_PTYPE_TUNNEL_MASK | \ + RTE_PTYPE_INNER_L2_MASK | \ + RTE_PTYPE_INNER_L3_MASK | \ + RTE_PTYPE_INNER_L4_MASK)) + +/** + * Get the name of the l2 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_l2_name(uint32_t ptype); + +/** + * Get the name of the l3 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_l3_name(uint32_t ptype); + +/** + * Get the name of the l4 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_l4_name(uint32_t ptype); + +/** + * Get the name of the tunnel packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_tunnel_name(uint32_t ptype); + +/** + * Get the name of the inner_l2 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_inner_l2_name(uint32_t ptype); + +/** + * Get the name of the inner_l3 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_inner_l3_name(uint32_t ptype); + +/** + * Get the name of the inner_l4 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_inner_l4_name(uint32_t ptype); + +/** + * Write the packet type name into the buffer + * + * @param ptype + * The packet type value. + * @param buf + * The buffer where the string is written. + * @param buflen + * The length of the buffer. + * @return + * - 0 on success + * - (-1) if the buffer is too small + */ +int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MBUF_PTYPE_H_ */ diff --git a/src/dpdk/lib/librte_mempool/rte_mempool.c b/src/dpdk/lib/librte_mempool/rte_mempool.c index 2e28e2e8..1c2aed8c 100644 --- a/src/dpdk/lib/librte_mempool/rte_mempool.c +++ b/src/dpdk/lib/librte_mempool/rte_mempool.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include @@ -429,7 +428,7 @@ rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, /* populate with the largest group of contiguous pages */ for (n = 1; (i + n) < pg_num && - paddr[i] + pg_sz == paddr[i+n]; n++) + paddr[i + n - 1] + pg_sz == paddr[i + n]; n++) ; ret = rte_mempool_populate_phys(mp, vaddr + i * pg_sz, @@ -579,8 +578,10 @@ rte_mempool_populate_default(struct rte_mempool *mp) mz->len, pg_sz, rte_mempool_memchunk_mz_free, (void *)(uintptr_t)mz); - if (ret < 0) + if (ret < 0) { + rte_memzone_free(mz); goto fail; + } } return mp->size; @@ -817,7 +818,6 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size, goto exit_unlock; } mp->mz = mz; - mp->socket_id = socket_id; mp->size = n; mp->flags = flags; mp->socket_id = socket_id; @@ -879,7 +879,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, * Since we have 4 combinations of the SP/SC/MP/MC examine the flags to * set the correct index into the table of ops structs. */ - if (flags & (MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET)) + if ((flags & MEMPOOL_F_SP_PUT) && (flags & MEMPOOL_F_SC_GET)) rte_mempool_set_ops_byname(mp, "ring_sp_sc", NULL); else if (flags & MEMPOOL_F_SP_PUT) rte_mempool_set_ops_byname(mp, "ring_sp_mc", NULL); @@ -909,9 +909,8 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, /* * Create the mempool over already allocated chunk of memory. * That external memory buffer can consists of physically disjoint pages. - * Setting vaddr to NULL, makes mempool to fallback to original behaviour - * and allocate space for mempool and it's elements as one big chunk of - * physically continuos memory. + * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create() + * behavior. */ struct rte_mempool * rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, diff --git a/src/dpdk/lib/librte_mempool/rte_mempool.h b/src/dpdk/lib/librte_mempool/rte_mempool.h index 059ad9e5..d0f5b27a 100644 --- a/src/dpdk/lib/librte_mempool/rte_mempool.h +++ b/src/dpdk/lib/librte_mempool/rte_mempool.h @@ -75,6 +75,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -216,6 +217,7 @@ struct rte_mempool { * RTE_MEMPOOL_NAMESIZE next time the ABI changes */ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of mempool. */ + RTE_STD_C11 union { void *pool_data; /**< Ring or pool to store objects. */ uint64_t pool_id; /**< External mempool identifier. */ @@ -355,7 +357,7 @@ void rte_mempool_check_cookies(const struct rte_mempool *mp, * Prototype for implementation specific data provisioning function. * * The function should provide the implementation specific memory for - * for use by the other mempool ops functions in a given mempool ops struct. + * use by the other mempool ops functions in a given mempool ops struct. * E.g. the default ops provides an instance of the rte_ring for this purpose. * it will most likely point to a different type of data structure, and * will be transparent to the application programmer. @@ -549,7 +551,7 @@ int rte_mempool_register_ops(const struct rte_mempool_ops *ops); /** * Macro to statically register the ops of a mempool handler. * Note that the rte_mempool_register_ops fails silently here when - * more then RTE_MEMPOOL_MAX_OPS_IDX is registered. + * more than RTE_MEMPOOL_MAX_OPS_IDX is registered. */ #define MEMPOOL_REGISTER_OPS(ops) \ void mp_hdlr_init_##ops(void); \ @@ -587,10 +589,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); /** * Create a new mempool named *name* in memory. * - * This function uses ``memzone_reserve()`` to allocate memory. The + * This function uses ``rte_memzone_reserve()`` to allocate memory. The * pool contains n elements of elt_size. Its size is set to n. - * All elements of the mempool are allocated together with the mempool header, - * in one physically continuous chunk of memory. * * @param name * The name of the mempool. @@ -610,9 +610,7 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); * never be used. The access to the per-lcore table is of course * faster than the multi-producer/consumer pool. The cache can be * disabled if the cache_size argument is set to 0; it can be useful to - * avoid losing objects in cache. Note that even if not used, the - * memory space for cache is always reserved in a mempool structure, - * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * avoid losing objects in cache. * @param private_data_size * The size of the private data appended after the mempool * structure. This is useful for storing some private data after the @@ -746,7 +744,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, * * The mempool is allocated and initialized, but it is not populated: no * memory is allocated for the mempool elements. The user has to call - * rte_mempool_populate_*() or to add memory chunks to the pool. Once + * rte_mempool_populate_*() to add memory chunks to the pool. Once * populated, the user may also want to initialize each object with * rte_mempool_obj_iter(). * @@ -798,6 +796,10 @@ rte_mempool_free(struct rte_mempool *mp); * Add a virtually and physically contiguous memory chunk in the pool * where objects can be instanciated. * + * If the given physical address is unknown (paddr = RTE_BAD_PHYS_ADDR), + * the chunk doesn't need to be physically contiguous (only virtually), + * and allocated objects may span two pages. + * * @param mp * A pointer to the mempool structure. * @param vaddr @@ -946,7 +948,7 @@ uint32_t rte_mempool_mem_iter(struct rte_mempool *mp, rte_mempool_mem_cb_t *mem_cb, void *mem_cb_arg); /** - * Dump the status of the mempool to the console. + * Dump the status of the mempool to a file. * * @param f * A pointer to a file for output @@ -1036,19 +1038,15 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id) */ static inline void __attribute__((always_inline)) __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, - unsigned n, struct rte_mempool_cache *cache, int flags) + unsigned n, struct rte_mempool_cache *cache) { void **cache_objs; /* increment stat now, adding in mempool always success */ __MEMPOOL_STAT_ADD(mp, put, n); - /* No cache provided or single producer */ - if (unlikely(cache == NULL || flags & MEMPOOL_F_SP_PUT)) - goto ring_enqueue; - - /* Go straight to ring if put would overflow mem allocated for cache */ - if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE)) + /* No cache provided or if put would overflow mem allocated for cache */ + if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE)) goto ring_enqueue; cache_objs = &cache->objs[cache->len]; @@ -1102,10 +1100,11 @@ ring_enqueue: */ static inline void __attribute__((always_inline)) rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, - unsigned n, struct rte_mempool_cache *cache, int flags) + unsigned n, struct rte_mempool_cache *cache, + __rte_unused int flags) { __mempool_check_cookies(mp, obj_table, n, 0); - __mempool_generic_put(mp, obj_table, n, cache, flags); + __mempool_generic_put(mp, obj_table, n, cache); } /** @@ -1242,15 +1241,14 @@ rte_mempool_put(struct rte_mempool *mp, void *obj) */ static inline int __attribute__((always_inline)) __mempool_generic_get(struct rte_mempool *mp, void **obj_table, - unsigned n, struct rte_mempool_cache *cache, int flags) + unsigned n, struct rte_mempool_cache *cache) { int ret; uint32_t index, len; void **cache_objs; - /* No cache provided or single consumer */ - if (unlikely(cache == NULL || flags & MEMPOOL_F_SC_GET || - n >= cache->size)) + /* No cache provided or cannot be satisfied from cache */ + if (unlikely(cache == NULL || n >= cache->size)) goto ring_dequeue; cache_objs = cache->objs; @@ -1324,10 +1322,10 @@ ring_dequeue: */ static inline int __attribute__((always_inline)) rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n, - struct rte_mempool_cache *cache, int flags) + struct rte_mempool_cache *cache, __rte_unused int flags) { int ret; - ret = __mempool_generic_get(mp, obj_table, n, cache, flags); + ret = __mempool_generic_get(mp, obj_table, n, cache); if (ret == 0) __mempool_check_cookies(mp, obj_table, n, 1); return ret; diff --git a/src/dpdk/lib/librte_mempool/rte_mempool_stack.c b/src/dpdk/lib/librte_mempool/rte_mempool_stack.c index 5fd8af24..817f77e6 100644 --- a/src/dpdk/lib/librte_mempool/rte_mempool_stack.c +++ b/src/dpdk/lib/librte_mempool/rte_mempool_stack.c @@ -118,7 +118,7 @@ stack_dequeue(struct rte_mempool *mp, void **obj_table, s->len -= n; rte_spinlock_unlock(&s->sl); - return n; + return 0; } static unsigned diff --git a/src/dpdk/lib/librte_net/rte_ether.h b/src/dpdk/lib/librte_net/rte_ether.h new file mode 100644 index 00000000..ff3d0654 --- /dev/null +++ b/src/dpdk/lib/librte_net/rte_ether.h @@ -0,0 +1,417 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHER_H_ +#define _RTE_ETHER_H_ + +/** + * @file + * + * Ethernet Helpers in RTE + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include +#include +#include +#include + +#define ETHER_ADDR_LEN 6 /**< Length of Ethernet address. */ +#define ETHER_TYPE_LEN 2 /**< Length of Ethernet type field. */ +#define ETHER_CRC_LEN 4 /**< Length of Ethernet CRC. */ +#define ETHER_HDR_LEN \ + (ETHER_ADDR_LEN * 2 + ETHER_TYPE_LEN) /**< Length of Ethernet header. */ +#define ETHER_MIN_LEN 64 /**< Minimum frame len, including CRC. */ +#define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */ +#define ETHER_MTU \ + (ETHER_MAX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN) /**< Ethernet MTU. */ + +#define ETHER_MAX_VLAN_FRAME_LEN \ + (ETHER_MAX_LEN + 4) /**< Maximum VLAN frame length, including CRC. */ + +#define ETHER_MAX_JUMBO_FRAME_LEN \ + 0x3F00 /**< Maximum Jumbo frame length, including CRC. */ + +#define ETHER_MAX_VLAN_ID 4095 /**< Maximum VLAN ID. */ + +#define ETHER_MIN_MTU 68 /**< Minimum MTU for IPv4 packets, see RFC 791. */ + +/** + * Ethernet address: + * A universally administered address is uniquely assigned to a device by its + * manufacturer. The first three octets (in transmission order) contain the + * Organizationally Unique Identifier (OUI). The following three (MAC-48 and + * EUI-48) octets are assigned by that organization with the only constraint + * of uniqueness. + * A locally administered address is assigned to a device by a network + * administrator and does not contain OUIs. + * See http://standards.ieee.org/regauth/groupmac/tutorial.html + */ +struct ether_addr { + uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Addr bytes in tx order */ +} __attribute__((__packed__)); + +#define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */ +#define ETHER_GROUP_ADDR 0x01 /**< Multicast or broadcast Eth. address. */ + +/** + * Check if two Ethernet addresses are the same. + * + * @param ea1 + * A pointer to the first ether_addr structure containing + * the ethernet address. + * @param ea2 + * A pointer to the second ether_addr structure containing + * the ethernet address. + * + * @return + * True (1) if the given two ethernet address are the same; + * False (0) otherwise. + */ +static inline int is_same_ether_addr(const struct ether_addr *ea1, + const struct ether_addr *ea2) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) + if (ea1->addr_bytes[i] != ea2->addr_bytes[i]) + return 0; + return 1; +} + +/** + * Check if an Ethernet address is filled with zeros. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is filled with zeros; + * false (0) otherwise. + */ +static inline int is_zero_ether_addr(const struct ether_addr *ea) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) + if (ea->addr_bytes[i] != 0x00) + return 0; + return 1; +} + +/** + * Check if an Ethernet address is a unicast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a unicast address; + * false (0) otherwise. + */ +static inline int is_unicast_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_GROUP_ADDR) == 0; +} + +/** + * Check if an Ethernet address is a multicast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a multicast address; + * false (0) otherwise. + */ +static inline int is_multicast_ether_addr(const struct ether_addr *ea) +{ + return ea->addr_bytes[0] & ETHER_GROUP_ADDR; +} + +/** + * Check if an Ethernet address is a broadcast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a broadcast address; + * false (0) otherwise. + */ +static inline int is_broadcast_ether_addr(const struct ether_addr *ea) +{ + const unaligned_uint16_t *ea_words = (const unaligned_uint16_t *)ea; + + return (ea_words[0] == 0xFFFF && ea_words[1] == 0xFFFF && + ea_words[2] == 0xFFFF); +} + +/** + * Check if an Ethernet address is a universally assigned address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a universally assigned address; + * false (0) otherwise. + */ +static inline int is_universal_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) == 0; +} + +/** + * Check if an Ethernet address is a locally assigned address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a locally assigned address; + * false (0) otherwise. + */ +static inline int is_local_admin_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) != 0; +} + +/** + * Check if an Ethernet address is a valid address. Checks that the address is a + * unicast address and is not filled with zeros. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is valid; + * false (0) otherwise. + */ +static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea) +{ + return is_unicast_ether_addr(ea) && (!is_zero_ether_addr(ea)); +} + +/** + * Generate a random Ethernet address that is locally administered + * and not multicast. + * @param addr + * A pointer to Ethernet address. + */ +static inline void eth_random_addr(uint8_t *addr) +{ + uint64_t rand = rte_rand(); + uint8_t *p = (uint8_t *)&rand; + + rte_memcpy(addr, p, ETHER_ADDR_LEN); + addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */ + addr[0] |= ETHER_LOCAL_ADMIN_ADDR; /* set local assignment bit */ +} + +/** + * Fast copy an Ethernet address. + * + * @param ea_from + * A pointer to a ether_addr structure holding the Ethernet address to copy. + * @param ea_to + * A pointer to a ether_addr structure where to copy the Ethernet address. + */ +static inline void ether_addr_copy(const struct ether_addr *ea_from, + struct ether_addr *ea_to) +{ +#ifdef __INTEL_COMPILER + uint16_t *from_words = (uint16_t *)(ea_from->addr_bytes); + uint16_t *to_words = (uint16_t *)(ea_to->addr_bytes); + + to_words[0] = from_words[0]; + to_words[1] = from_words[1]; + to_words[2] = from_words[2]; +#else + /* + * Use the common way, because of a strange gcc warning. + */ + *ea_to = *ea_from; +#endif +} + +#define ETHER_ADDR_FMT_SIZE 18 +/** + * Format 48bits Ethernet address in pattern xx:xx:xx:xx:xx:xx. + * + * @param buf + * A pointer to buffer contains the formatted MAC address. + * @param size + * The format buffer size. + * @param eth_addr + * A pointer to a ether_addr structure. + */ +static inline void +ether_format_addr(char *buf, uint16_t size, + const struct ether_addr *eth_addr) +{ + snprintf(buf, size, "%02X:%02X:%02X:%02X:%02X:%02X", + eth_addr->addr_bytes[0], + eth_addr->addr_bytes[1], + eth_addr->addr_bytes[2], + eth_addr->addr_bytes[3], + eth_addr->addr_bytes[4], + eth_addr->addr_bytes[5]); +} + +/** + * Ethernet header: Contains the destination address, source address + * and frame type. + */ +struct ether_hdr { + struct ether_addr d_addr; /**< Destination address. */ + struct ether_addr s_addr; /**< Source address. */ + uint16_t ether_type; /**< Frame type. */ +} __attribute__((__packed__)); + +/** + * Ethernet VLAN Header. + * Contains the 16-bit VLAN Tag Control Identifier and the Ethernet type + * of the encapsulated frame. + */ +struct vlan_hdr { + uint16_t vlan_tci; /**< Priority (3) + CFI (1) + Identifier Code (12) */ + uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */ +} __attribute__((__packed__)); + +/** + * VXLAN protocol header. + * Contains the 8-bit flag, 24-bit VXLAN Network Identifier and + * Reserved fields (24 bits and 8 bits) + */ +struct vxlan_hdr { + uint32_t vx_flags; /**< flag (8) + Reserved (24). */ + uint32_t vx_vni; /**< VNI (24) + Reserved (8). */ +} __attribute__((__packed__)); + +/* Ethernet frame types */ +#define ETHER_TYPE_IPv4 0x0800 /**< IPv4 Protocol. */ +#define ETHER_TYPE_IPv6 0x86DD /**< IPv6 Protocol. */ +#define ETHER_TYPE_ARP 0x0806 /**< Arp Protocol. */ +#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ +#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ +#define ETHER_TYPE_QINQ 0x88A8 /**< IEEE 802.1ad QinQ tagging. */ +#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ +#define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */ + +#define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)) +/**< VXLAN tunnel header length. */ + +/** + * Extract VLAN tag information into mbuf + * + * Software version of VLAN stripping + * + * @param m + * The packet mbuf. + * @return + * - 0: Success + * - 1: not a vlan packet + */ +static inline int rte_vlan_strip(struct rte_mbuf *m) +{ + struct ether_hdr *eh + = rte_pktmbuf_mtod(m, struct ether_hdr *); + + if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) + return -1; + + struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); + m->ol_flags |= PKT_RX_VLAN_PKT; + m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); + + /* Copy ether header over rather than moving whole packet */ + memmove(rte_pktmbuf_adj(m, sizeof(struct vlan_hdr)), + eh, 2 * ETHER_ADDR_LEN); + + return 0; +} + +/** + * Insert VLAN tag into mbuf. + * + * Software version of VLAN unstripping + * + * @param m + * The packet mbuf. + * @return + * - 0: On success + * -EPERM: mbuf is is shared overwriting would be unsafe + * -ENOSPC: not enough headroom in mbuf + */ +static inline int rte_vlan_insert(struct rte_mbuf **m) +{ + struct ether_hdr *oh, *nh; + struct vlan_hdr *vh; + + /* Can't insert header if mbuf is shared */ + if (rte_mbuf_refcnt_read(*m) > 1) { + struct rte_mbuf *copy; + + copy = rte_pktmbuf_clone(*m, (*m)->pool); + if (unlikely(copy == NULL)) + return -ENOMEM; + rte_pktmbuf_free(*m); + *m = copy; + } + + oh = rte_pktmbuf_mtod(*m, struct ether_hdr *); + nh = (struct ether_hdr *) + rte_pktmbuf_prepend(*m, sizeof(struct vlan_hdr)); + if (nh == NULL) + return -ENOSPC; + + memmove(nh, oh, 2 * ETHER_ADDR_LEN); + nh->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN); + + vh = (struct vlan_hdr *) (nh + 1); + vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci); + + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHER_H_ */ diff --git a/src/dpdk/lib/librte_net/rte_gre.h b/src/dpdk/lib/librte_net/rte_gre.h new file mode 100644 index 00000000..46568ff5 --- /dev/null +++ b/src/dpdk/lib/librte_net/rte_gre.h @@ -0,0 +1,71 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_GRE_H_ +#define _RTE_GRE_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * GRE Header + */ +struct gre_hdr { +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + uint16_t res2:4; /**< Reserved */ + uint16_t s:1; /**< Sequence Number Present bit */ + uint16_t k:1; /**< Key Present bit */ + uint16_t res1:1; /**< Reserved */ + uint16_t c:1; /**< Checksum Present bit */ + uint16_t ver:3; /**< Version Number */ + uint16_t res3:5; /**< Reserved */ +#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN + uint16_t c:1; /**< Checksum Present bit */ + uint16_t res1:1; /**< Reserved */ + uint16_t k:1; /**< Key Present bit */ + uint16_t s:1; /**< Sequence Number Present bit */ + uint16_t res2:4; /**< Reserved */ + uint16_t res3:5; /**< Reserved */ + uint16_t ver:3; /**< Version Number */ +#endif + uint16_t proto; /**< Protocol Type */ +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_GRE_H_ */ diff --git a/src/dpdk/lib/librte_net/rte_ip.h b/src/dpdk/lib/librte_net/rte_ip.h index 5b7554ab..4491b86e 100644 --- a/src/dpdk/lib/librte_net/rte_ip.h +++ b/src/dpdk/lib/librte_net/rte_ip.h @@ -229,6 +229,77 @@ rte_raw_cksum(const void *buf, size_t len) return __rte_raw_cksum_reduce(sum); } +/** + * Compute the raw (non complemented) checksum of a packet. + * + * @param m + * The pointer to the mbuf. + * @param off + * The offset in bytes to start the checksum. + * @param len + * The length in bytes of the data to ckecksum. + * @param cksum + * A pointer to the checksum, filled on success. + * @return + * 0 on success, -1 on error (bad length or offset). + */ +static inline int +rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len, + uint16_t *cksum) +{ + const struct rte_mbuf *seg; + const char *buf; + uint32_t sum, tmp; + uint32_t seglen, done; + + /* easy case: all data in the first segment */ + if (off + len <= rte_pktmbuf_data_len(m)) { + *cksum = rte_raw_cksum(rte_pktmbuf_mtod_offset(m, + const char *, off), len); + return 0; + } + + if (unlikely(off + len > rte_pktmbuf_pkt_len(m))) + return -1; /* invalid params, return a dummy value */ + + /* else browse the segment to find offset */ + seglen = 0; + for (seg = m; seg != NULL; seg = seg->next) { + seglen = rte_pktmbuf_data_len(seg); + if (off < seglen) + break; + off -= seglen; + } + seglen -= off; + buf = rte_pktmbuf_mtod_offset(seg, const char *, off); + if (seglen >= len) { + /* all in one segment */ + *cksum = rte_raw_cksum(buf, len); + return 0; + } + + /* hard case: process checksum of several segments */ + sum = 0; + done = 0; + for (;;) { + tmp = __rte_raw_cksum(buf, seglen, 0); + if (done & 1) + tmp = rte_bswap16(tmp); + sum += tmp; + done += seglen; + if (done == len) + break; + seg = seg->next; + buf = rte_pktmbuf_mtod(seg, const char *); + seglen = rte_pktmbuf_data_len(seg); + if (seglen > len - done) + seglen = len - done; + } + + *cksum = __rte_raw_cksum_reduce(sum); + return 0; +} + /** * Process the IPv4 checksum of an IPv4 header. * diff --git a/src/dpdk/lib/librte_net/rte_net.c b/src/dpdk/lib/librte_net/rte_net.c new file mode 100644 index 00000000..a8c7aff9 --- /dev/null +++ b/src/dpdk/lib/librte_net/rte_net.c @@ -0,0 +1,517 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* get l3 packet type from ip6 next protocol */ +static uint32_t +ptype_l3_ip6(uint8_t ip6_proto) +{ + static const uint32_t ip6_ext_proto_map[256] = { + [IPPROTO_HOPOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_ROUTING] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_FRAGMENT] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_ESP] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_AH] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_DSTOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + }; + + return RTE_PTYPE_L3_IPV6 + ip6_ext_proto_map[ip6_proto]; +} + +/* get l3 packet type from ip version and header length */ +static uint32_t +ptype_l3_ip(uint8_t ipv_ihl) +{ + static const uint32_t ptype_l3_ip_proto_map[256] = { + [0x45] = RTE_PTYPE_L3_IPV4, + [0x46] = RTE_PTYPE_L3_IPV4_EXT, + [0x47] = RTE_PTYPE_L3_IPV4_EXT, + [0x48] = RTE_PTYPE_L3_IPV4_EXT, + [0x49] = RTE_PTYPE_L3_IPV4_EXT, + [0x4A] = RTE_PTYPE_L3_IPV4_EXT, + [0x4B] = RTE_PTYPE_L3_IPV4_EXT, + [0x4C] = RTE_PTYPE_L3_IPV4_EXT, + [0x4D] = RTE_PTYPE_L3_IPV4_EXT, + [0x4E] = RTE_PTYPE_L3_IPV4_EXT, + [0x4F] = RTE_PTYPE_L3_IPV4_EXT, + }; + + return ptype_l3_ip_proto_map[ipv_ihl]; +} + +/* get l4 packet type from proto */ +static uint32_t +ptype_l4(uint8_t proto) +{ + static const uint32_t ptype_l4_proto[256] = { + [IPPROTO_UDP] = RTE_PTYPE_L4_UDP, + [IPPROTO_TCP] = RTE_PTYPE_L4_TCP, + [IPPROTO_SCTP] = RTE_PTYPE_L4_SCTP, + }; + + return ptype_l4_proto[proto]; +} + +/* get inner l3 packet type from ip6 next protocol */ +static uint32_t +ptype_inner_l3_ip6(uint8_t ip6_proto) +{ + static const uint32_t ptype_inner_ip6_ext_proto_map[256] = { + [IPPROTO_HOPOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_ROUTING] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_FRAGMENT] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_ESP] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_AH] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_DSTOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + }; + + return RTE_PTYPE_INNER_L3_IPV6 + + ptype_inner_ip6_ext_proto_map[ip6_proto]; +} + +/* get inner l3 packet type from ip version and header length */ +static uint32_t +ptype_inner_l3_ip(uint8_t ipv_ihl) +{ + static const uint32_t ptype_inner_l3_ip_proto_map[256] = { + [0x45] = RTE_PTYPE_INNER_L3_IPV4, + [0x46] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x47] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x48] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x49] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4A] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4B] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4C] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4D] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4E] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4F] = RTE_PTYPE_INNER_L3_IPV4_EXT, + }; + + return ptype_inner_l3_ip_proto_map[ipv_ihl]; +} + +/* get inner l4 packet type from proto */ +static uint32_t +ptype_inner_l4(uint8_t proto) +{ + static const uint32_t ptype_inner_l4_proto[256] = { + [IPPROTO_UDP] = RTE_PTYPE_INNER_L4_UDP, + [IPPROTO_TCP] = RTE_PTYPE_INNER_L4_TCP, + [IPPROTO_SCTP] = RTE_PTYPE_INNER_L4_SCTP, + }; + + return ptype_inner_l4_proto[proto]; +} + +/* get the tunnel packet type if any, update proto and off. */ +static uint32_t +ptype_tunnel(uint16_t *proto, const struct rte_mbuf *m, + uint32_t *off) +{ + switch (*proto) { + case IPPROTO_GRE: { + static const uint8_t opt_len[16] = { + [0x0] = 4, + [0x1] = 8, + [0x2] = 8, + [0x8] = 8, + [0x3] = 12, + [0x9] = 12, + [0xa] = 12, + [0xb] = 16, + }; + const struct gre_hdr *gh; + struct gre_hdr gh_copy; + uint16_t flags; + + gh = rte_pktmbuf_read(m, *off, sizeof(*gh), &gh_copy); + if (unlikely(gh == NULL)) + return 0; + + flags = rte_be_to_cpu_16(*(const uint16_t *)gh); + flags >>= 12; + if (opt_len[flags] == 0) + return 0; + + *off += opt_len[flags]; + *proto = gh->proto; + if (*proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) + return RTE_PTYPE_TUNNEL_NVGRE; + else + return RTE_PTYPE_TUNNEL_GRE; + } + case IPPROTO_IPIP: + *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv4); + return RTE_PTYPE_TUNNEL_IP; + case IPPROTO_IPV6: + *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv6); + return RTE_PTYPE_TUNNEL_IP; /* IP is also valid for IPv6 */ + default: + return 0; + } +} + +/* get the ipv4 header length */ +static uint8_t +ip4_hlen(const struct ipv4_hdr *hdr) +{ + return (hdr->version_ihl & 0xf) * 4; +} + +/* parse ipv6 extended headers, update offset and return next proto */ +static uint16_t +skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off, + int *frag) +{ + struct ext_hdr { + uint8_t next_hdr; + uint8_t len; + }; + const struct ext_hdr *xh; + struct ext_hdr xh_copy; + unsigned int i; + + *frag = 0; + +#define MAX_EXT_HDRS 5 + for (i = 0; i < MAX_EXT_HDRS; i++) { + switch (proto) { + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + xh = rte_pktmbuf_read(m, *off, sizeof(*xh), + &xh_copy); + if (xh == NULL) + return 0; + *off += (xh->len + 1) * 8; + proto = xh->next_hdr; + break; + case IPPROTO_FRAGMENT: + xh = rte_pktmbuf_read(m, *off, sizeof(*xh), + &xh_copy); + if (xh == NULL) + return 0; + *off += 8; + proto = xh->next_hdr; + *frag = 1; + return proto; /* this is always the last ext hdr */ + case IPPROTO_NONE: + return 0; + default: + return proto; + } + } + return 0; +} + +/* parse mbuf data to get packet type */ +uint32_t rte_net_get_ptype(const struct rte_mbuf *m, + struct rte_net_hdr_lens *hdr_lens, uint32_t layers) +{ + struct rte_net_hdr_lens local_hdr_lens; + const struct ether_hdr *eh; + struct ether_hdr eh_copy; + uint32_t pkt_type = RTE_PTYPE_L2_ETHER; + uint32_t off = 0; + uint16_t proto; + + if (hdr_lens == NULL) + hdr_lens = &local_hdr_lens; + + eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy); + if (unlikely(eh == NULL)) + return 0; + proto = eh->ether_type; + off = sizeof(*eh); + hdr_lens->l2_len = off; + + if ((layers & RTE_PTYPE_L2_MASK) == 0) + return 0; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) + goto l3; /* fast path if packet is IPv4 */ + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type = RTE_PTYPE_L2_ETHER_VLAN; + vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += sizeof(*vh); + hdr_lens->l2_len += sizeof(*vh); + proto = vh->eth_proto; + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type = RTE_PTYPE_L2_ETHER_QINQ; + vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh), + &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += 2 * sizeof(*vh); + hdr_lens->l2_len += 2 * sizeof(*vh); + proto = vh->eth_proto; + } + + l3: + if ((layers & RTE_PTYPE_L3_MASK) == 0) + return pkt_type; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + const struct ipv4_hdr *ip4h; + struct ipv4_hdr ip4h_copy; + + ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy); + if (unlikely(ip4h == NULL)) + return pkt_type; + + pkt_type |= ptype_l3_ip(ip4h->version_ihl); + hdr_lens->l3_len = ip4_hlen(ip4h); + off += hdr_lens->l3_len; + + if ((layers & RTE_PTYPE_L4_MASK) == 0) + return pkt_type; + + if (ip4h->fragment_offset & rte_cpu_to_be_16( + IPV4_HDR_OFFSET_MASK | IPV4_HDR_MF_FLAG)) { + pkt_type |= RTE_PTYPE_L4_FRAG; + hdr_lens->l4_len = 0; + return pkt_type; + } + proto = ip4h->next_proto_id; + pkt_type |= ptype_l4(proto); + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { + const struct ipv6_hdr *ip6h; + struct ipv6_hdr ip6h_copy; + int frag = 0; + + ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy); + if (unlikely(ip6h == NULL)) + return pkt_type; + + proto = ip6h->proto; + hdr_lens->l3_len = sizeof(*ip6h); + off += hdr_lens->l3_len; + pkt_type |= ptype_l3_ip6(proto); + if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) { + proto = skip_ip6_ext(proto, m, &off, &frag); + hdr_lens->l3_len = off - hdr_lens->l2_len; + } + if (proto == 0) + return pkt_type; + + if ((layers & RTE_PTYPE_L4_MASK) == 0) + return pkt_type; + + if (frag) { + pkt_type |= RTE_PTYPE_L4_FRAG; + hdr_lens->l4_len = 0; + return pkt_type; + } + pkt_type |= ptype_l4(proto); + } + + if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP) { + hdr_lens->l4_len = sizeof(struct udp_hdr); + return pkt_type; + } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) { + const struct tcp_hdr *th; + struct tcp_hdr th_copy; + + th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy); + if (unlikely(th == NULL)) + return pkt_type & (RTE_PTYPE_L2_MASK | + RTE_PTYPE_L3_MASK); + hdr_lens->l4_len = (th->data_off & 0xf0) >> 2; + return pkt_type; + } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) { + hdr_lens->l4_len = sizeof(struct sctp_hdr); + return pkt_type; + } else { + uint32_t prev_off = off; + + hdr_lens->l4_len = 0; + + if ((layers & RTE_PTYPE_TUNNEL_MASK) == 0) + return pkt_type; + + pkt_type |= ptype_tunnel(&proto, m, &off); + hdr_lens->tunnel_len = off - prev_off; + } + + /* same job for inner header: we need to duplicate the code + * because the packet types do not have the same value. + */ + if ((layers & RTE_PTYPE_INNER_L2_MASK) == 0) + return pkt_type; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) { + eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy); + if (unlikely(eh == NULL)) + return pkt_type; + pkt_type |= RTE_PTYPE_INNER_L2_ETHER; + proto = eh->ether_type; + off += sizeof(*eh); + hdr_lens->inner_l2_len = sizeof(*eh); + } + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type &= ~RTE_PTYPE_INNER_L2_MASK; + pkt_type |= RTE_PTYPE_INNER_L2_ETHER_VLAN; + vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += sizeof(*vh); + hdr_lens->inner_l2_len += sizeof(*vh); + proto = vh->eth_proto; + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type &= ~RTE_PTYPE_INNER_L2_MASK; + pkt_type |= RTE_PTYPE_INNER_L2_ETHER_QINQ; + vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh), + &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += 2 * sizeof(*vh); + hdr_lens->inner_l2_len += 2 * sizeof(*vh); + proto = vh->eth_proto; + } + + if ((layers & RTE_PTYPE_INNER_L3_MASK) == 0) + return pkt_type; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + const struct ipv4_hdr *ip4h; + struct ipv4_hdr ip4h_copy; + + ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy); + if (unlikely(ip4h == NULL)) + return pkt_type; + + pkt_type |= ptype_inner_l3_ip(ip4h->version_ihl); + hdr_lens->inner_l3_len = ip4_hlen(ip4h); + off += hdr_lens->inner_l3_len; + + if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0) + return pkt_type; + if (ip4h->fragment_offset & + rte_cpu_to_be_16(IPV4_HDR_OFFSET_MASK | + IPV4_HDR_MF_FLAG)) { + pkt_type |= RTE_PTYPE_INNER_L4_FRAG; + hdr_lens->inner_l4_len = 0; + return pkt_type; + } + proto = ip4h->next_proto_id; + pkt_type |= ptype_inner_l4(proto); + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { + const struct ipv6_hdr *ip6h; + struct ipv6_hdr ip6h_copy; + int frag = 0; + + ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy); + if (unlikely(ip6h == NULL)) + return pkt_type; + + proto = ip6h->proto; + hdr_lens->inner_l3_len = sizeof(*ip6h); + off += hdr_lens->inner_l3_len; + pkt_type |= ptype_inner_l3_ip6(proto); + if ((pkt_type & RTE_PTYPE_INNER_L3_MASK) == + RTE_PTYPE_INNER_L3_IPV6_EXT) { + uint32_t prev_off; + + prev_off = off; + proto = skip_ip6_ext(proto, m, &off, &frag); + hdr_lens->inner_l3_len += off - prev_off; + } + if (proto == 0) + return pkt_type; + + if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0) + return pkt_type; + + if (frag) { + pkt_type |= RTE_PTYPE_INNER_L4_FRAG; + hdr_lens->inner_l4_len = 0; + return pkt_type; + } + pkt_type |= ptype_inner_l4(proto); + } + + if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == RTE_PTYPE_INNER_L4_UDP) { + hdr_lens->inner_l4_len = sizeof(struct udp_hdr); + } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == + RTE_PTYPE_INNER_L4_TCP) { + const struct tcp_hdr *th; + struct tcp_hdr th_copy; + + th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy); + if (unlikely(th == NULL)) + return pkt_type & (RTE_PTYPE_INNER_L2_MASK | + RTE_PTYPE_INNER_L3_MASK); + hdr_lens->inner_l4_len = (th->data_off & 0xf0) >> 2; + } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == + RTE_PTYPE_INNER_L4_SCTP) { + hdr_lens->inner_l4_len = sizeof(struct sctp_hdr); + } else { + hdr_lens->inner_l4_len = 0; + } + + return pkt_type; +} diff --git a/src/dpdk/lib/librte_net/rte_net.h b/src/dpdk/lib/librte_net/rte_net.h new file mode 100644 index 00000000..548eaedb --- /dev/null +++ b/src/dpdk/lib/librte_net/rte_net.h @@ -0,0 +1,204 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_NET_PTYPE_H_ +#define _RTE_NET_PTYPE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +/** + * Structure containing header lengths associated to a packet, filled + * by rte_net_get_ptype(). + */ +struct rte_net_hdr_lens { + uint8_t l2_len; + uint8_t l3_len; + uint8_t l4_len; + uint8_t tunnel_len; + uint8_t inner_l2_len; + uint8_t inner_l3_len; + uint8_t inner_l4_len; +}; + +/** + * Parse an Ethernet packet to get its packet type. + * + * This function parses the network headers in mbuf data and return its + * packet type. + * + * If it is provided by the user, it also fills a rte_net_hdr_lens + * structure that contains the lengths of the parsed network + * headers. Each length field is valid only if the associated packet + * type is set. For instance, hdr_lens->l2_len is valid only if + * (retval & RTE_PTYPE_L2_MASK) != RTE_PTYPE_UNKNOWN. + * + * Supported packet types are: + * L2: Ether, Vlan, QinQ + * L3: IPv4, IPv6 + * L4: TCP, UDP, SCTP + * Tunnels: IPv4, IPv6, Gre, Nvgre + * + * @param m + * The packet mbuf to be parsed. + * @param hdr_lens + * A pointer to a structure where the header lengths will be returned, + * or NULL. + * @param layers + * List of layers to parse. The function will stop at the first + * empty layer. Examples: + * - To parse all known layers, use RTE_PTYPE_ALL_MASK. + * - To parse only L2 and L3, use RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK + * @return + * The packet type of the packet. + */ +uint32_t rte_net_get_ptype(const struct rte_mbuf *m, + struct rte_net_hdr_lens *hdr_lens, uint32_t layers); + +/** + * Prepare pseudo header checksum + * + * This function prepares pseudo header checksum for TSO and non-TSO tcp/udp in + * provided mbufs packet data and based on the requested offload flags. + * + * - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set + * in packet data, + * - for TSO the IP payload length is not included in pseudo header. + * + * This function expects that used headers are in the first data segment of + * mbuf, are not fragmented and can be safely modified. + * + * @param m + * The packet mbuf to be fixed. + * @param ol_flags + * TX offloads flags to use with this packet. + * @return + * 0 if checksum is initialized properly + */ +static inline int +rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags) +{ + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + struct tcp_hdr *tcp_hdr; + struct udp_hdr *udp_hdr; + uint64_t inner_l3_offset = m->l2_len; + + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) + inner_l3_offset += m->outer_l2_len + m->outer_l3_len; + + if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) { + if (ol_flags & PKT_TX_IPV4) { + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + inner_l3_offset); + + if (ol_flags & PKT_TX_IP_CKSUM) + ipv4_hdr->hdr_checksum = 0; + + udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + + m->l3_len); + udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr, + ol_flags); + } else { + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + inner_l3_offset); + /* non-TSO udp */ + udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, + inner_l3_offset + m->l3_len); + udp_hdr->dgram_cksum = rte_ipv6_phdr_cksum(ipv6_hdr, + ol_flags); + } + } else if ((ol_flags & PKT_TX_TCP_CKSUM) || + (ol_flags & PKT_TX_TCP_SEG)) { + if (ol_flags & PKT_TX_IPV4) { + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + inner_l3_offset); + + if (ol_flags & PKT_TX_IP_CKSUM) + ipv4_hdr->hdr_checksum = 0; + + /* non-TSO tcp or TSO */ + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + + m->l3_len); + tcp_hdr->cksum = rte_ipv4_phdr_cksum(ipv4_hdr, + ol_flags); + } else { + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + inner_l3_offset); + /* non-TSO tcp or TSO */ + tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, + inner_l3_offset + m->l3_len); + tcp_hdr->cksum = rte_ipv6_phdr_cksum(ipv6_hdr, + ol_flags); + } + } + + return 0; +} + +/** + * Prepare pseudo header checksum + * + * This function prepares pseudo header checksum for TSO and non-TSO tcp/udp in + * provided mbufs packet data. + * + * - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set + * in packet data, + * - for TSO the IP payload length is not included in pseudo header. + * + * This function expects that used headers are in the first data segment of + * mbuf, are not fragmented and can be safely modified. + * + * @param m + * The packet mbuf to be fixed. + * @return + * 0 if checksum is initialized properly + */ +static inline int +rte_net_intel_cksum_prepare(struct rte_mbuf *m) +{ + return rte_net_intel_cksum_flags_prepare(m, m->ol_flags); +} + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_NET_PTYPE_H_ */ diff --git a/src/dpdk/lib/librte_pipeline/rte_pipeline.h b/src/dpdk/lib/librte_pipeline/rte_pipeline.h index 84d18025..f3663483 100644 --- a/src/dpdk/lib/librte_pipeline/rte_pipeline.h +++ b/src/dpdk/lib/librte_pipeline/rte_pipeline.h @@ -87,6 +87,7 @@ extern "C" { #include #include +#include struct rte_mbuf; @@ -244,6 +245,7 @@ struct rte_pipeline_table_entry { /** Reserved action */ enum rte_pipeline_action action; + RTE_STD_C11 union { /** Output port ID (meta-data for "Send packet to output port" action) */ @@ -252,7 +254,7 @@ struct rte_pipeline_table_entry { uint32_t table_id; }; /** Start of table entry area for user defined actions and meta-data */ - uint8_t action_data[0]; + __extension__ uint8_t action_data[0]; }; /** diff --git a/src/dpdk/lib/librte_port/rte_port_fd.c b/src/dpdk/lib/librte_port/rte_port_fd.c new file mode 100644 index 00000000..0d640f34 --- /dev/null +++ b/src/dpdk/lib/librte_port/rte_port_fd.c @@ -0,0 +1,552 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include + +#include +#include + +#include "rte_port_fd.h" + +/* + * Port FD Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_reader { + struct rte_port_in_stats stats; + int fd; + uint32_t mtu; + struct rte_mempool *mempool; +}; + +static void * +rte_port_fd_reader_create(void *params, int socket_id) +{ + struct rte_port_fd_reader_params *conf = + (struct rte_port_fd_reader_params *) params; + struct rte_port_fd_reader *port; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__); + return NULL; + } + if (conf->fd < 0) { + RTE_LOG(ERR, PORT, "%s: Invalid file descriptor\n", __func__); + return NULL; + } + if (conf->mtu == 0) { + RTE_LOG(ERR, PORT, "%s: Invalid MTU\n", __func__); + return NULL; + } + if (conf->mempool == NULL) { + RTE_LOG(ERR, PORT, "%s: Invalid mempool\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->mtu = conf->mtu; + port->mempool = conf->mempool; + + return port; +} + +static int +rte_port_fd_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_fd_reader *p = (struct rte_port_fd_reader *) port; + uint32_t i; + + if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0) + return 0; + + for (i = 0; i < n_pkts; i++) { + rte_mbuf_refcnt_set(pkts[i], 1); + rte_pktmbuf_reset(pkts[i]); + } + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void *); + ssize_t n_bytes; + + n_bytes = read(p->fd, pkt_data, (size_t) p->mtu); + if (n_bytes <= 0) + break; + + pkt->data_len = n_bytes; + pkt->pkt_len = n_bytes; + } + + for ( ; i < n_pkts; i++) + rte_pktmbuf_free(pkts[i]); + + RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(p, i); + + return n_pkts; +} + +static int +rte_port_fd_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int rte_port_fd_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_fd_reader *p = + (struct rte_port_fd_reader *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port FD Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint32_t fd; +}; + +static void * +rte_port_fd_writer_create(void *params, int socket_id) +{ + struct rte_port_fd_writer_params *conf = + (struct rte_port_fd_writer_params *) params; + struct rte_port_fd_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + return port; +} + +static inline void +send_burst(struct rte_port_fd_writer *p) +{ + uint32_t i; + + for (i = 0; i < p->tx_buf_count; i++) { + struct rte_mbuf *pkt = p->tx_buf[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void*); + size_t n_bytes = rte_pktmbuf_data_len(pkt); + ssize_t ret; + + ret = write(p->fd, pkt_data, n_bytes); + if (ret < 0) + break; + } + + RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i); + + for (i = 0; i < p->tx_buf_count; i++) + rte_pktmbuf_free(p->tx_buf[i]); + + p->tx_buf_count = 0; +} + +static int +rte_port_fd_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + uint32_t tx_buf_count = p->tx_buf_count; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + p->tx_buf[tx_buf_count++] = pkts[i]; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); + } else + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_flush(void *port) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_fd_writer_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_fd_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port FD Writer Nodrop + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_writer_nodrop { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint64_t n_retries; + uint32_t fd; +}; + +static void * +rte_port_fd_writer_nodrop_create(void *params, int socket_id) +{ + struct rte_port_fd_writer_nodrop_params *conf = + (struct rte_port_fd_writer_nodrop_params *) params; + struct rte_port_fd_writer_nodrop *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->fd < 0) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + /* + * When n_retries is 0 it means that we should wait for every packet to + * send no matter how many retries should it take. To limit number of + * branches in fast path, we use UINT64_MAX instead of branching. + */ + port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries; + + return port; +} + +static inline void +send_burst_nodrop(struct rte_port_fd_writer_nodrop *p) +{ + uint64_t n_retries; + uint32_t i; + + n_retries = 0; + for (i = 0; (i < p->tx_buf_count) && (n_retries < p->n_retries); i++) { + struct rte_mbuf *pkt = p->tx_buf[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void*); + size_t n_bytes = rte_pktmbuf_data_len(pkt); + + for ( ; n_retries < p->n_retries; n_retries++) { + ssize_t ret; + + ret = write(p->fd, pkt_data, n_bytes); + if (ret) + break; + } + } + + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i); + + for (i = 0; i < p->tx_buf_count; i++) + rte_pktmbuf_free(p->tx_buf[i]); + + p->tx_buf_count = 0; +} + +static int +rte_port_fd_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + uint32_t tx_buf_count = p->tx_buf_count; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + p->tx_buf[tx_buf_count++] = pkts[i]; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); + } else + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_flush(void *port) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + + if (p->tx_buf_count > 0) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_fd_writer_nodrop_flush(port); + rte_free(port); + +return 0; +} + +static int rte_port_fd_writer_nodrop_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_fd_reader_ops = { + .f_create = rte_port_fd_reader_create, + .f_free = rte_port_fd_reader_free, + .f_rx = rte_port_fd_reader_rx, + .f_stats = rte_port_fd_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_fd_writer_ops = { + .f_create = rte_port_fd_writer_create, + .f_free = rte_port_fd_writer_free, + .f_tx = rte_port_fd_writer_tx, + .f_tx_bulk = rte_port_fd_writer_tx_bulk, + .f_flush = rte_port_fd_writer_flush, + .f_stats = rte_port_fd_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_fd_writer_nodrop_ops = { + .f_create = rte_port_fd_writer_nodrop_create, + .f_free = rte_port_fd_writer_nodrop_free, + .f_tx = rte_port_fd_writer_nodrop_tx, + .f_tx_bulk = rte_port_fd_writer_nodrop_tx_bulk, + .f_flush = rte_port_fd_writer_nodrop_flush, + .f_stats = rte_port_fd_writer_nodrop_stats_read, +}; diff --git a/src/dpdk/lib/librte_port/rte_port_fd.h b/src/dpdk/lib/librte_port/rte_port_fd.h new file mode 100644 index 00000000..77a2d31b --- /dev/null +++ b/src/dpdk/lib/librte_port/rte_port_fd.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_FD_H__ +#define __INCLUDE_RTE_PORT_FD_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port FD Device + * + * fd_reader: input port built on top of valid non-blocking file descriptor + * fd_writer: output port built on top of valid non-blocking file descriptor + * + ***/ + +#include + +#include +#include "rte_port.h" + +/** fd_reader port parameters */ +struct rte_port_fd_reader_params { + /** File descriptor */ + int fd; + + /** Maximum Transfer Unit (MTU) */ + uint32_t mtu; + + /** Pre-initialized buffer pool */ + struct rte_mempool *mempool; +}; + +/** fd_reader port operations */ +extern struct rte_port_in_ops rte_port_fd_reader_ops; + +/** fd_writer port parameters */ +struct rte_port_fd_writer_params { + /** File descriptor */ + int fd; + + /**< Recommended write burst size. The actual burst size can be + * bigger or smaller than this value. + */ + uint32_t tx_burst_sz; +}; + +/** fd_writer port operations */ +extern struct rte_port_out_ops rte_port_fd_writer_ops; + +/** fd_writer_nodrop port parameters */ +struct rte_port_fd_writer_nodrop_params { + /** File descriptor */ + int fd; + + /**< Recommended write burst size. The actual burst size can be + * bigger or smaller than this value. + */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** fd_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_fd_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk/lib/librte_port/rte_port_source_sink.h b/src/dpdk/lib/librte_port/rte_port_source_sink.h index 4db8a8a8..be585a77 100644 --- a/src/dpdk/lib/librte_port/rte_port_source_sink.h +++ b/src/dpdk/lib/librte_port/rte_port_source_sink.h @@ -55,7 +55,7 @@ struct rte_port_source_params { struct rte_mempool *mempool; /** The full path of the pcap file to read packets from */ - char *file_name; + const char *file_name; /** The number of bytes to be read from each packet in the * pcap file. If this value is 0, the whole packet is read; * if it is bigger than packet size, the generated packets @@ -69,7 +69,7 @@ extern struct rte_port_in_ops rte_port_source_ops; /** sink port parameters */ struct rte_port_sink_params { /** The full path of the pcap file to write the packets to */ - char *file_name; + const char *file_name; /** The maximum number of packets write to the pcap file. * If this value is 0, the "infinite" write will be carried * out. diff --git a/src/dpdk/lib/librte_ring/rte_ring.h b/src/dpdk/lib/librte_ring/rte_ring.h index 0e22e694..e359affc 100644 --- a/src/dpdk/lib/librte_ring/rte_ring.h +++ b/src/dpdk/lib/librte_ring/rte_ring.h @@ -106,7 +106,7 @@ extern "C" { enum rte_ring_queue_behavior { RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */ - RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items a possible from ring */ + RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items as possible from ring */ }; #ifdef RTE_LIBRTE_RING_DEBUG @@ -187,7 +187,7 @@ struct rte_ring { struct rte_ring_debug_stats stats[RTE_MAX_LCORE]; #endif - void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here. + void *ring[] __rte_cache_aligned; /**< Memory space of ring starts here. * not volatile so need to be careful * about compiler re-ordering */ }; @@ -341,7 +341,7 @@ void rte_ring_free(struct rte_ring *r); int rte_ring_set_water_mark(struct rte_ring *r, unsigned count); /** - * Dump the status of the ring to the console. + * Dump the status of the ring to a file. * * @param f * A pointer to a file for output diff --git a/src/dpdk/lib/librte_table/rte_table_acl.c b/src/dpdk/lib/librte_table/rte_table_acl.c index 8f1f8ceb..94b69a98 100644 --- a/src/dpdk/lib/librte_table/rte_table_acl.c +++ b/src/dpdk/lib/librte_table/rte_table_acl.c @@ -792,7 +792,7 @@ rte_table_acl_lookup( pkts_mask &= ~pkt_mask; - if (action_table_pos != RTE_ACL_INVALID_USERDATA) { + if (action_table_pos != 0) { pkts_out_mask |= pkt_mask; entries[pkt_pos] = (void *) &acl->memory[action_table_pos * diff --git a/src/dpdk/lib/librte_table/rte_table_hash.h b/src/dpdk/lib/librte_table/rte_table_hash.h index 9d17516a..57505a6f 100644 --- a/src/dpdk/lib/librte_table/rte_table_hash.h +++ b/src/dpdk/lib/librte_table/rte_table_hash.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -363,6 +363,35 @@ struct rte_table_hash_key32_ext_params { /** Extendible bucket hash table operations */ extern struct rte_table_ops rte_table_hash_key32_ext_ops; +/** Cuckoo hash table parameters */ +struct rte_table_hash_cuckoo_params { + /** Key size (number of bytes */ + uint32_t key_size; + + /** Maximum number of hash table entries */ + uint32_t n_keys; + + /** Hash function used to calculate hash */ + rte_table_hash_op_hash f_hash; + + /** Seed value or Init value used by f_hash */ + uint32_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Hash table name */ + const char *name; +}; + +/** Cuckoo hash table operations */ +extern struct rte_table_ops rte_table_hash_cuckoo_dosig_ops; + #ifdef __cplusplus } #endif diff --git a/src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c b/src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c new file mode 100644 index 00000000..ff7baee3 --- /dev/null +++ b/src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c @@ -0,0 +1,382 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include +#include "rte_table_hash.h" + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val) \ + (table->stats.n_pkts_in += val) +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val) \ + (table->stats.n_pkts_lookup_miss += val) + +#else + +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + + +struct rte_table_hash { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t key_size; + uint32_t entry_size; + uint32_t n_keys; + rte_table_hash_op_hash f_hash; + uint32_t seed; + uint32_t signature_offset; + uint32_t key_offset; + const char *name; + + /* cuckoo hash table object */ + struct rte_hash *h_table; + + /* Lookup table */ + uint8_t memory[0] __rte_cache_aligned; }; + +static int +check_params_create_hash_cuckoo(const struct +rte_table_hash_cuckoo_params *params) { + /* Check for valid parameters */ + if (params == NULL) { + RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n"); + return -EINVAL; + } + + if (params->key_size == 0) { + RTE_LOG(ERR, TABLE, "Invalid key_size.\n"); + return -EINVAL; + } + + if (params->n_keys == 0) { + RTE_LOG(ERR, TABLE, "Invalid n_keys.\n"); + return -EINVAL; + } + + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, "f_hash is NULL.\n"); + return -EINVAL; + } + + if (params->name == NULL) { + RTE_LOG(ERR, TABLE, "Table name is NULL.\n"); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_cuckoo_create(void *params, + int socket_id, + uint32_t entry_size) +{ + struct rte_hash *rte_hash_handle; + struct rte_table_hash *t; + uint32_t total_size, total_cl_size; + + /* Check input parameters */ + struct rte_table_hash_cuckoo_params *p = + (struct rte_table_hash_cuckoo_params *) params; + + if (check_params_create_hash_cuckoo(params)) + return NULL; + + /* Memory allocation */ + total_cl_size = + (sizeof(struct rte_table_hash) + + RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; + total_cl_size += (p->n_keys * entry_size + + RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; + total_size = total_cl_size * RTE_CACHE_LINE_SIZE; + + t = rte_zmalloc_socket("TABLE", + total_size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (t == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for Cuckoo hash table\n", + __func__, + (uint32_t)sizeof(struct rte_table_hash)); + return NULL; + } + + /* Create cuckoo hash table */ + struct rte_hash_parameters hash_cuckoo_params = { + .entries = p->n_keys, + .key_len = p->key_size, + .hash_func = (rte_hash_function)(p->f_hash), + .hash_func_init_val = p->seed, + .socket_id = socket_id, + .name = p->name + }; + + rte_hash_handle = rte_hash_find_existing(p->name); + if (rte_hash_handle == NULL) { + rte_hash_handle = rte_hash_create(&hash_cuckoo_params); + if (NULL == rte_hash_handle) { + RTE_LOG(ERR, TABLE, + "%s: failed to create cuckoo hash table. keysize: %u", + __func__, hash_cuckoo_params.key_len); + rte_free(t); + return NULL; + } + } + + /* initialize the cuckoo hash parameters */ + t->key_size = p->key_size; + t->entry_size = entry_size; + t->n_keys = p->n_keys; + t->f_hash = p->f_hash; + t->seed = p->seed; + t->signature_offset = p->signature_offset; + t->key_offset = p->key_offset; + t->name = p->name; + t->h_table = rte_hash_handle; + + RTE_LOG(INFO, TABLE, + "%s: Cuckoo Hash table memory footprint is %u bytes\n", + __func__, total_size); + return t; +} + +static int +rte_table_hash_cuckoo_free(void *table) { + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + struct rte_table_hash *t = (struct rte_table_hash *)table; + + rte_hash_free(t->h_table); + rte_free(t); + + return 0; +} + +static int +rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry, + int *key_found, void **entry_ptr) { + int pos = 0; + + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + + if (entry == NULL) { + RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__); + return -EINVAL; + } + + struct rte_table_hash *t = (struct rte_table_hash *)table; + + /* Find Existing entries */ + pos = rte_hash_lookup(t->h_table, key); + if (pos >= 0) { + uint8_t *existing_entry; + + *key_found = 1; + existing_entry = &t->memory[pos * t->entry_size]; + memcpy(existing_entry, entry, t->entry_size); + *entry_ptr = existing_entry; + + return 0; +} else if (pos == -ENOENT) { + /* Entry not found. Adding new entry */ + uint8_t *new_entry; + + pos = rte_hash_add_key(t->h_table, key); + if (pos < 0) { + RTE_LOG(ERR, TABLE, + "%s: Entry not added, status : %u\n", + __func__, pos); + return pos; + } + + new_entry = &t->memory[pos * t->entry_size]; + memcpy(new_entry, entry, t->entry_size); + + *key_found = 0; + *entry_ptr = new_entry; + return 0; + } + return pos; +} + +static int +rte_table_hash_cuckoo_entry_delete(void *table, void *key, + int *key_found, __rte_unused void *entry) { + int pos = 0; + + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + + struct rte_table_hash *t = (struct rte_table_hash *)table; + + pos = rte_hash_del_key(t->h_table, key); + if (pos >= 0) { + *key_found = 1; + uint8_t *entry_ptr = &t->memory[pos * t->entry_size]; + + if (entry) + memcpy(entry, entry_ptr, t->entry_size); + + memset(&t->memory[pos * t->entry_size], 0, t->entry_size); + } + + return pos; +} + + +static int +rte_table_hash_cuckoo_lookup_dosig(void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *t = (struct rte_table_hash *)table; + uint64_t pkts_mask_out = 0; + uint32_t i; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + + RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(t, n_pkts_in); + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + const uint8_t *keys[64]; + int32_t positions[64], status; + + /* Keys for bulk lookup */ + for (i = 0; i < n_pkts_in; i++) + keys[i] = RTE_MBUF_METADATA_UINT8_PTR(pkts[i], + t->key_offset); + + /* Bulk Lookup */ + status = rte_hash_lookup_bulk(t->h_table, + (const void **) keys, + n_pkts_in, + positions); + + if (status == 0) { + for (i = 0; i < n_pkts_in; i++) { + if (likely(positions[i] >= 0)) { + uint64_t pkt_mask = 1LLU << i; + + entries[i] = &t->memory[positions[i] + * t->entry_size]; + pkts_mask_out |= pkt_mask; + } + } + } + } else { + for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX + - __builtin_clzll(pkts_mask)); i++) { + uint64_t pkt_mask = 1LLU << i; + + if (pkt_mask & pkts_mask) { + struct rte_mbuf *pkt = pkts[i]; + uint8_t *key = RTE_MBUF_METADATA_UINT8_PTR(pkt, + t->key_offset); + int pos; + + pos = rte_hash_lookup(t->h_table, key); + if (likely(pos >= 0)) { + entries[i] = &t->memory[pos + * t->entry_size]; + pkts_mask_out |= pkt_mask; + } + } + } + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(t, + n_pkts_in - __builtin_popcountll(pkts_mask_out)); + + return 0; + +} + +static int +rte_table_hash_cuckoo_stats_read(void *table, struct rte_table_stats *stats, + int clear) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_hash_cuckoo_dosig_ops = { + .f_create = rte_table_hash_cuckoo_create, + .f_free = rte_table_hash_cuckoo_free, + .f_add = rte_table_hash_cuckoo_entry_add, + .f_delete = rte_table_hash_cuckoo_entry_delete, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_cuckoo_lookup_dosig, + .f_stats = rte_table_hash_cuckoo_stats_read, +}; diff --git a/src/dpdk/lib/librte_table/rte_table_hash_key16.c b/src/dpdk/lib/librte_table/rte_table_hash_key16.c index b7e000fd..08d4d77e 100644 --- a/src/dpdk/lib/librte_table/rte_table_hash_key16.c +++ b/src/dpdk/lib/librte_table/rte_table_hash_key16.c @@ -130,7 +130,7 @@ rte_table_hash_create_key16_lru(void *params, /* Check input parameters */ if ((check_params_create_lru(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_16) % 64) != 0)) return NULL; n_entries_per_bucket = 4; key_size = 16; @@ -344,7 +344,7 @@ rte_table_hash_create_key16_ext(void *params, /* Check input parameters */ if ((check_params_create_ext(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_16) % 64) != 0)) return NULL; n_entries_per_bucket = 4; diff --git a/src/dpdk/lib/librte_table/rte_table_hash_key32.c b/src/dpdk/lib/librte_table/rte_table_hash_key32.c index a7aba492..161f6b7a 100644 --- a/src/dpdk/lib/librte_table/rte_table_hash_key32.c +++ b/src/dpdk/lib/librte_table/rte_table_hash_key32.c @@ -129,7 +129,7 @@ rte_table_hash_create_key32_lru(void *params, /* Check input parameters */ if ((check_params_create_lru(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) { + ((sizeof(struct rte_bucket_4_32) % 64) != 0)) { return NULL; } n_entries_per_bucket = 4; @@ -337,7 +337,7 @@ rte_table_hash_create_key32_ext(void *params, /* Check input parameters */ if ((check_params_create_ext(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_32) % 64) != 0)) return NULL; n_entries_per_bucket = 4; diff --git a/src/dpdk/lib/librte_table/rte_table_hash_key8.c b/src/dpdk/lib/librte_table/rte_table_hash_key8.c index e2e2bdc4..b04f60dc 100644 --- a/src/dpdk/lib/librte_table/rte_table_hash_key8.c +++ b/src/dpdk/lib/librte_table/rte_table_hash_key8.c @@ -125,7 +125,7 @@ rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size) /* Check input parameters */ if ((check_params_create_lru(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) { + ((sizeof(struct rte_bucket_4_8) % 64) != 0)) { return NULL; } n_entries_per_bucket = 4; @@ -332,7 +332,7 @@ rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size) /* Check input parameters */ if ((check_params_create_ext(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_8) % 64) != 0)) return NULL; n_entries_per_bucket = 4; -- cgit 1.2.3-korg