From b63264c8342e6a1b6971c79550d2af2024b6a4de Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 14 Aug 2018 18:52:30 +0100 Subject: New upstream version 18.08 Change-Id: I32fdf5e5016556d9c0a6d88ddaf1fc468961790a Signed-off-by: Luca Boccassi --- lib/Makefile | 44 +- lib/librte_bbdev/rte_bbdev.c | 17 +- lib/librte_bbdev/rte_bbdev.h | 8 +- lib/librte_bbdev/rte_bbdev_op.h | 28 +- lib/librte_bitratestats/meson.build | 1 + lib/librte_bitratestats/rte_bitrate.c | 6 + lib/librte_bpf/Makefile | 41 + lib/librte_bpf/bpf.c | 61 + lib/librte_bpf/bpf_def.h | 143 + lib/librte_bpf/bpf_exec.c | 453 + lib/librte_bpf/bpf_impl.h | 55 + lib/librte_bpf/bpf_jit_x86.c | 1356 +++ lib/librte_bpf/bpf_load.c | 148 + lib/librte_bpf/bpf_load_elf.c | 322 + lib/librte_bpf/bpf_pkt.c | 605 ++ lib/librte_bpf/bpf_validate.c | 2248 ++++ lib/librte_bpf/meson.build | 25 + lib/librte_bpf/rte_bpf.h | 203 + lib/librte_bpf/rte_bpf_ethdev.h | 117 + lib/librte_bpf/rte_bpf_version.map | 16 + lib/librte_cmdline/cmdline_parse.c | 13 +- lib/librte_cmdline/cmdline_parse_etheraddr.c | 2 +- lib/librte_cmdline/cmdline_parse_ipaddr.c | 223 +- lib/librte_cmdline/cmdline_parse_portlist.c | 2 +- lib/librte_cmdline/cmdline_parse_string.c | 4 +- lib/librte_compat/Makefile | 33 +- lib/librte_compressdev/Makefile | 31 + lib/librte_compressdev/meson.build | 12 + lib/librte_compressdev/rte_comp.c | 215 + lib/librte_compressdev/rte_comp.h | 485 + lib/librte_compressdev/rte_compressdev.c | 772 ++ lib/librte_compressdev/rte_compressdev.h | 540 + lib/librte_compressdev/rte_compressdev_internal.h | 114 + lib/librte_compressdev/rte_compressdev_pmd.c | 160 + lib/librte_compressdev/rte_compressdev_pmd.h | 390 + lib/librte_compressdev/rte_compressdev_version.map | 39 + lib/librte_cryptodev/Makefile | 1 + lib/librte_cryptodev/meson.build | 5 +- lib/librte_cryptodev/rte_crypto.h | 82 +- lib/librte_cryptodev/rte_crypto_asym.h | 496 + lib/librte_cryptodev/rte_crypto_sym.h | 17 + lib/librte_cryptodev/rte_cryptodev.c | 430 +- lib/librte_cryptodev/rte_cryptodev.h | 407 +- lib/librte_cryptodev/rte_cryptodev_pmd.c | 12 +- lib/librte_cryptodev/rte_cryptodev_pmd.h | 157 +- lib/librte_cryptodev/rte_cryptodev_version.map | 33 +- lib/librte_eal/bsdapp/Makefile | 2 - lib/librte_eal/bsdapp/contigmem/BSDmakefile | 8 - lib/librte_eal/bsdapp/contigmem/Makefile | 24 - lib/librte_eal/bsdapp/contigmem/contigmem.c | 353 - lib/librte_eal/bsdapp/contigmem/meson.build | 4 - lib/librte_eal/bsdapp/eal/Makefile | 11 +- lib/librte_eal/bsdapp/eal/eal.c | 290 +- lib/librte_eal/bsdapp/eal/eal_alarm.c | 299 +- lib/librte_eal/bsdapp/eal/eal_alarm_private.h | 19 + lib/librte_eal/bsdapp/eal/eal_cpuflags.c | 21 + lib/librte_eal/bsdapp/eal/eal_dev.c | 21 + lib/librte_eal/bsdapp/eal/eal_hugepage_info.c | 69 +- lib/librte_eal/bsdapp/eal/eal_interrupts.c | 464 +- lib/librte_eal/bsdapp/eal/eal_memalloc.c | 54 + lib/librte_eal/bsdapp/eal/eal_memory.c | 471 +- lib/librte_eal/bsdapp/eal/eal_thread.c | 2 +- lib/librte_eal/bsdapp/eal/meson.build | 5 + lib/librte_eal/bsdapp/nic_uio/BSDmakefile | 8 - lib/librte_eal/bsdapp/nic_uio/Makefile | 24 - lib/librte_eal/bsdapp/nic_uio/meson.build | 4 - lib/librte_eal/bsdapp/nic_uio/nic_uio.c | 350 - lib/librte_eal/common/Makefile | 4 +- lib/librte_eal/common/arch/arm/rte_cpuflags.c | 54 +- lib/librte_eal/common/arch/arm/rte_hypervisor.c | 2 +- lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c | 15 +- lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c | 2 +- lib/librte_eal/common/arch/x86/rte_hypervisor.c | 2 +- lib/librte_eal/common/eal_common_bus.c | 3 +- lib/librte_eal/common/eal_common_class.c | 64 + lib/librte_eal/common/eal_common_dev.c | 443 +- lib/librte_eal/common/eal_common_devargs.c | 228 +- lib/librte_eal/common/eal_common_fbarray.c | 1239 +++ lib/librte_eal/common/eal_common_hypervisor.c | 2 +- lib/librte_eal/common/eal_common_lcore.c | 75 +- lib/librte_eal/common/eal_common_log.c | 121 +- lib/librte_eal/common/eal_common_memalloc.c | 364 + lib/librte_eal/common/eal_common_memory.c | 528 +- lib/librte_eal/common/eal_common_memzone.c | 290 +- lib/librte_eal/common/eal_common_options.c | 192 +- lib/librte_eal/common/eal_common_proc.c | 713 +- lib/librte_eal/common/eal_common_thread.c | 98 +- lib/librte_eal/common/eal_common_uuid.c | 193 + lib/librte_eal/common/eal_filesystem.h | 70 +- lib/librte_eal/common/eal_hugepages.h | 11 +- lib/librte_eal/common/eal_internal_cfg.h | 20 +- lib/librte_eal/common/eal_memalloc.h | 82 + lib/librte_eal/common/eal_options.h | 8 + lib/librte_eal/common/eal_private.h | 99 + .../common/include/arch/arm/rte_atomic.h | 32 +- .../common/include/arch/arm/rte_atomic_32.h | 32 +- .../common/include/arch/arm/rte_byteorder.h | 32 +- .../common/include/arch/arm/rte_cpuflags.h | 32 +- .../common/include/arch/arm/rte_cpuflags_32.h | 32 +- .../common/include/arch/arm/rte_cycles.h | 32 +- .../common/include/arch/arm/rte_cycles_32.h | 32 +- .../common/include/arch/arm/rte_memcpy.h | 32 +- .../common/include/arch/arm/rte_memcpy_32.h | 32 +- .../common/include/arch/arm/rte_prefetch.h | 32 +- .../common/include/arch/arm/rte_prefetch_32.h | 32 +- .../common/include/arch/arm/rte_rwlock.h | 2 + .../common/include/arch/arm/rte_spinlock.h | 32 +- .../common/include/arch/ppc_64/rte_atomic.h | 23 +- .../common/include/arch/ppc_64/rte_rwlock.h | 2 + .../common/include/arch/x86/rte_atomic.h | 24 + .../common/include/arch/x86/rte_atomic_32.h | 12 + .../common/include/arch/x86/rte_atomic_64.h | 12 + .../common/include/arch/x86/rte_memcpy.h | 24 +- .../common/include/arch/x86/rte_spinlock.h | 4 +- lib/librte_eal/common/include/generic/rte_atomic.h | 90 + .../common/include/generic/rte_byteorder.h | 6 +- .../common/include/generic/rte_cpuflags.h | 21 + lib/librte_eal/common/include/generic/rte_rwlock.h | 4 +- lib/librte_eal/common/include/rte_bitmap.h | 8 +- lib/librte_eal/common/include/rte_bus.h | 4 +- lib/librte_eal/common/include/rte_class.h | 134 + lib/librte_eal/common/include/rte_common.h | 160 +- lib/librte_eal/common/include/rte_dev.h | 211 +- lib/librte_eal/common/include/rte_devargs.h | 173 +- lib/librte_eal/common/include/rte_eal.h | 54 +- lib/librte_eal/common/include/rte_eal_interrupts.h | 1 + lib/librte_eal/common/include/rte_eal_memconfig.h | 28 +- lib/librte_eal/common/include/rte_fbarray.h | 470 + lib/librte_eal/common/include/rte_hypervisor.h | 2 +- lib/librte_eal/common/include/rte_lcore.h | 60 +- lib/librte_eal/common/include/rte_log.h | 40 +- lib/librte_eal/common/include/rte_malloc.h | 10 + lib/librte_eal/common/include/rte_malloc_heap.h | 6 + lib/librte_eal/common/include/rte_memory.h | 330 +- lib/librte_eal/common/include/rte_memzone.h | 45 +- .../common/include/rte_pci_dev_feature_defs.h | 58 +- .../common/include/rte_pci_dev_features.h | 58 +- lib/librte_eal/common/include/rte_random.h | 6 +- lib/librte_eal/common/include/rte_service.h | 167 +- .../common/include/rte_service_component.h | 38 +- lib/librte_eal/common/include/rte_string_fns.h | 31 + lib/librte_eal/common/include/rte_tailq.h | 3 +- lib/librte_eal/common/include/rte_uuid.h | 129 + lib/librte_eal/common/include/rte_version.h | 2 +- lib/librte_eal/common/include/rte_vfio.h | 243 +- lib/librte_eal/common/malloc_elem.c | 479 +- lib/librte_eal/common/malloc_elem.h | 51 +- lib/librte_eal/common/malloc_heap.c | 868 +- lib/librte_eal/common/malloc_heap.h | 19 +- lib/librte_eal/common/malloc_mp.c | 743 ++ lib/librte_eal/common/malloc_mp.h | 86 + lib/librte_eal/common/meson.build | 8 + lib/librte_eal/common/rte_malloc.c | 85 +- lib/librte_eal/common/rte_service.c | 130 +- lib/librte_eal/linuxapp/Makefile | 2 - lib/librte_eal/linuxapp/eal/Makefile | 12 +- lib/librte_eal/linuxapp/eal/eal.c | 269 +- lib/librte_eal/linuxapp/eal/eal_alarm.c | 9 +- lib/librte_eal/linuxapp/eal/eal_cpuflags.c | 84 + lib/librte_eal/linuxapp/eal/eal_dev.c | 224 + lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 253 +- lib/librte_eal/linuxapp/eal/eal_interrupts.c | 44 +- lib/librte_eal/linuxapp/eal/eal_log.c | 13 +- lib/librte_eal/linuxapp/eal/eal_memalloc.c | 1363 +++ lib/librte_eal/linuxapp/eal/eal_memory.c | 1528 ++- lib/librte_eal/linuxapp/eal/eal_thread.c | 6 +- lib/librte_eal/linuxapp/eal/eal_timer.c | 12 +- lib/librte_eal/linuxapp/eal/eal_vfio.c | 1586 ++- lib/librte_eal/linuxapp/eal/eal_vfio.h | 60 +- lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c | 402 +- lib/librte_eal/linuxapp/eal/meson.build | 4 + lib/librte_eal/linuxapp/igb_uio/Kbuild | 1 - lib/librte_eal/linuxapp/igb_uio/Makefile | 25 - lib/librte_eal/linuxapp/igb_uio/compat.h | 134 - lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 643 -- lib/librte_eal/linuxapp/igb_uio/meson.build | 24 - lib/librte_eal/linuxapp/kni/Makefile | 58 - lib/librte_eal/linuxapp/kni/compat.h | 106 - lib/librte_eal/linuxapp/kni/ethtool/README | 71 - .../linuxapp/kni/ethtool/igb/e1000_82575.c | 3650 ------- .../linuxapp/kni/ethtool/igb/e1000_82575.h | 494 - .../linuxapp/kni/ethtool/igb/e1000_api.c | 1144 -- .../linuxapp/kni/ethtool/igb/e1000_api.h | 142 - .../linuxapp/kni/ethtool/igb/e1000_defines.h | 1365 --- lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h | 778 -- .../linuxapp/kni/ethtool/igb/e1000_i210.c | 894 -- .../linuxapp/kni/ethtool/igb/e1000_i210.h | 76 - .../linuxapp/kni/ethtool/igb/e1000_mac.c | 2081 ---- .../linuxapp/kni/ethtool/igb/e1000_mac.h | 65 - .../linuxapp/kni/ethtool/igb/e1000_manage.c | 539 - .../linuxapp/kni/ethtool/igb/e1000_manage.h | 74 - .../linuxapp/kni/ethtool/igb/e1000_mbx.c | 510 - .../linuxapp/kni/ethtool/igb/e1000_mbx.h | 72 - .../linuxapp/kni/ethtool/igb/e1000_nvm.c | 950 -- .../linuxapp/kni/ethtool/igb/e1000_nvm.h | 60 - .../linuxapp/kni/ethtool/igb/e1000_osdep.h | 121 - .../linuxapp/kni/ethtool/igb/e1000_phy.c | 3392 ------ .../linuxapp/kni/ethtool/igb/e1000_phy.h | 241 - .../linuxapp/kni/ethtool/igb/e1000_regs.h | 631 -- lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h | 844 -- .../linuxapp/kni/ethtool/igb/igb_ethtool.c | 2842 ----- lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c | 10344 ------------------- .../linuxapp/kni/ethtool/igb/igb_param.c | 832 -- .../linuxapp/kni/ethtool/igb/igb_regtest.h | 234 - lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c | 421 - lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h | 31 - lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h | 3933 ------- lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h | 910 -- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c | 1281 --- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h | 29 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c | 2299 ----- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h | 43 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.c | 1142 -- .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.h | 153 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.c | 4067 -------- .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.h | 125 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h | 153 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c | 2886 ------ .../linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h | 76 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_main.c | 2951 ------ .../linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h | 90 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h | 117 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c | 1832 ---- .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h | 122 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_type.h | 3239 ------ .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c | 922 -- .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h | 43 - .../linuxapp/kni/ethtool/ixgbe/kcompat.c | 1231 --- .../linuxapp/kni/ethtool/ixgbe/kcompat.h | 3140 ------ lib/librte_eal/linuxapp/kni/kni_dev.h | 106 - lib/librte_eal/linuxapp/kni/kni_ethtool.c | 219 - lib/librte_eal/linuxapp/kni/kni_fifo.h | 75 - lib/librte_eal/linuxapp/kni/kni_misc.c | 663 -- lib/librte_eal/linuxapp/kni/kni_net.c | 757 -- lib/librte_eal/meson.build | 27 +- lib/librte_eal/rte_eal_version.map | 120 +- lib/librte_ethdev/Makefile | 44 + lib/librte_ethdev/ethdev_profile.c | 135 + lib/librte_ethdev/ethdev_profile.h | 27 + lib/librte_ethdev/meson.build | 27 + lib/librte_ethdev/rte_dev_info.h | 49 + lib/librte_ethdev/rte_eth_ctrl.h | 828 ++ lib/librte_ethdev/rte_ethdev.c | 4425 ++++++++ lib/librte_ethdev/rte_ethdev.h | 4298 ++++++++ lib/librte_ethdev/rte_ethdev_core.h | 625 ++ lib/librte_ethdev/rte_ethdev_driver.h | 357 + lib/librte_ethdev/rte_ethdev_pci.h | 210 + lib/librte_ethdev/rte_ethdev_vdev.h | 84 + lib/librte_ethdev/rte_ethdev_version.map | 255 + lib/librte_ethdev/rte_flow.c | 635 ++ lib/librte_ethdev/rte_flow.h | 2208 ++++ lib/librte_ethdev/rte_flow_driver.h | 184 + lib/librte_ethdev/rte_mtr.c | 201 + lib/librte_ethdev/rte_mtr.h | 730 ++ lib/librte_ethdev/rte_mtr_driver.h | 192 + lib/librte_ethdev/rte_tm.c | 409 + lib/librte_ethdev/rte_tm.h | 1965 ++++ lib/librte_ethdev/rte_tm_driver.h | 337 + lib/librte_ether/Makefile | 44 - lib/librte_ether/ethdev_profile.c | 135 - lib/librte_ether/ethdev_profile.h | 27 - lib/librte_ether/meson.build | 27 - lib/librte_ether/rte_dev_info.h | 31 - lib/librte_ether/rte_eth_ctrl.h | 827 -- lib/librte_ether/rte_ethdev.c | 4170 -------- lib/librte_ether/rte_ethdev.h | 4112 -------- lib/librte_ether/rte_ethdev_core.h | 613 -- lib/librte_ether/rte_ethdev_driver.h | 132 - lib/librte_ether/rte_ethdev_pci.h | 196 - lib/librte_ether/rte_ethdev_vdev.h | 84 - lib/librte_ether/rte_ethdev_version.map | 232 - lib/librte_ether/rte_flow.c | 420 - lib/librte_ether/rte_flow.h | 1483 --- lib/librte_ether/rte_flow_driver.h | 121 - lib/librte_ether/rte_mtr.c | 201 - lib/librte_ether/rte_mtr.h | 730 -- lib/librte_ether/rte_mtr_driver.h | 192 - lib/librte_ether/rte_tm.c | 409 - lib/librte_ether/rte_tm.h | 1912 ---- lib/librte_ether/rte_tm_driver.h | 337 - lib/librte_eventdev/Makefile | 15 +- lib/librte_eventdev/meson.build | 19 +- lib/librte_eventdev/rte_event_crypto_adapter.c | 1128 ++ lib/librte_eventdev/rte_event_crypto_adapter.h | 575 ++ lib/librte_eventdev/rte_event_eth_rx_adapter.c | 1784 +++- lib/librte_eventdev/rte_event_eth_rx_adapter.h | 129 +- lib/librte_eventdev/rte_event_ring.c | 15 +- lib/librte_eventdev/rte_event_ring.h | 4 +- lib/librte_eventdev/rte_event_timer_adapter.c | 1299 +++ lib/librte_eventdev/rte_event_timer_adapter.h | 766 ++ lib/librte_eventdev/rte_event_timer_adapter_pmd.h | 114 + lib/librte_eventdev/rte_eventdev.c | 79 +- lib/librte_eventdev/rte_eventdev.h | 167 +- lib/librte_eventdev/rte_eventdev_pmd.h | 225 + lib/librte_eventdev/rte_eventdev_version.map | 39 +- lib/librte_flow_classify/rte_flow_classify.c | 9 +- lib/librte_flow_classify/rte_flow_classify_parse.c | 24 +- lib/librte_gso/Makefile | 1 + lib/librte_gso/gso_common.h | 3 + lib/librte_gso/gso_udp4.c | 81 + lib/librte_gso/gso_udp4.h | 42 + lib/librte_gso/meson.build | 2 +- lib/librte_gso/rte_gso.c | 24 +- lib/librte_gso/rte_gso.h | 6 +- lib/librte_hash/meson.build | 1 - lib/librte_hash/rte_cuckoo_hash.c | 700 +- lib/librte_hash/rte_cuckoo_hash.h | 22 +- lib/librte_hash/rte_cuckoo_hash_x86.h | 164 - lib/librte_hash/rte_hash.h | 88 +- lib/librte_hash/rte_hash_crc.h | 11 +- lib/librte_hash/rte_hash_version.map | 8 + lib/librte_ip_frag/ip_frag_internal.c | 8 +- lib/librte_ip_frag/rte_ipv4_reassembly.c | 2 + lib/librte_ip_frag/rte_ipv6_reassembly.c | 2 + lib/librte_kni/meson.build | 2 +- lib/librte_kni/rte_kni.c | 7 +- lib/librte_kvargs/Makefile | 34 +- lib/librte_kvargs/meson.build | 5 + lib/librte_kvargs/rte_kvargs.c | 61 +- lib/librte_kvargs/rte_kvargs.h | 58 + lib/librte_kvargs/rte_kvargs_version.map | 8 + lib/librte_latencystats/rte_latencystats.c | 16 +- lib/librte_mbuf/Makefile | 3 +- lib/librte_mbuf/meson.build | 1 - lib/librte_mbuf/rte_mbuf.c | 26 +- lib/librte_mbuf/rte_mbuf.h | 540 +- lib/librte_mbuf/rte_mbuf_pool_ops.c | 14 +- lib/librte_mbuf/rte_mbuf_pool_ops.h | 13 +- lib/librte_mbuf/rte_mbuf_ptype.c | 3 + lib/librte_mbuf/rte_mbuf_ptype.h | 53 +- lib/librte_mbuf/rte_mbuf_version.map | 4 +- lib/librte_member/rte_member.c | 5 +- lib/librte_mempool/Makefile | 7 +- lib/librte_mempool/meson.build | 18 +- lib/librte_mempool/rte_mempool.c | 546 +- lib/librte_mempool/rte_mempool.h | 588 +- lib/librte_mempool/rte_mempool_ops.c | 51 +- lib/librte_mempool/rte_mempool_ops_default.c | 70 + lib/librte_mempool/rte_mempool_version.map | 23 +- lib/librte_meter/Makefile | 2 +- lib/librte_meter/meson.build | 1 + lib/librte_meter/rte_meter.c | 93 +- lib/librte_meter/rte_meter.h | 197 +- lib/librte_meter/rte_meter_version.map | 7 + lib/librte_metrics/rte_metrics.c | 23 +- lib/librte_net/Makefile | 1 + lib/librte_net/meson.build | 1 + lib/librte_net/rte_esp.h | 2 +- lib/librte_net/rte_ether.h | 31 +- lib/librte_net/rte_ip.h | 42 +- lib/librte_net/rte_net.c | 21 +- lib/librte_net/rte_net.h | 27 + lib/librte_net/rte_net_version.map | 3 +- lib/librte_pci/Makefile | 32 +- lib/librte_pci/rte_pci.c | 11 +- lib/librte_pci/rte_pci_version.map | 5 +- lib/librte_pdump/Makefile | 3 +- lib/librte_pdump/meson.build | 2 + lib/librte_pdump/rte_pdump.c | 428 +- lib/librte_pdump/rte_pdump.h | 9 +- lib/librte_pipeline/Makefile | 7 +- lib/librte_pipeline/meson.build | 7 +- lib/librte_pipeline/rte_pipeline_version.map | 28 + lib/librte_pipeline/rte_port_in_action.c | 531 + lib/librte_pipeline/rte_port_in_action.h | 301 + lib/librte_pipeline/rte_table_action.c | 2386 +++++ lib/librte_pipeline/rte_table_action.h | 905 ++ lib/librte_port/meson.build | 12 +- lib/librte_power/channel_commands.h | 3 +- lib/librte_power/power_acpi_cpufreq.c | 21 + lib/librte_power/power_acpi_cpufreq.h | 16 + lib/librte_power/power_kvm_vm.c | 8 + lib/librte_power/power_kvm_vm.h | 17 + lib/librte_power/rte_power.c | 3 + lib/librte_power/rte_power.h | 32 + lib/librte_power/rte_power_version.map | 9 +- lib/librte_rawdev/Makefile | 1 - lib/librte_rawdev/meson.build | 5 + lib/librte_rawdev/rte_rawdev.c | 76 +- lib/librte_rawdev/rte_rawdev.h | 55 +- lib/librte_rawdev/rte_rawdev_pmd.h | 28 +- lib/librte_rawdev/rte_rawdev_version.map | 3 +- lib/librte_ring/Makefile | 2 +- lib/librte_ring/rte_ring.h | 27 +- lib/librte_ring/rte_ring_c11_mem.h | 10 +- lib/librte_ring/rte_ring_generic.h | 10 +- lib/librte_sched/rte_sched.c | 239 +- lib/librte_sched/rte_sched.h | 21 + lib/librte_sched/rte_sched_version.map | 6 + lib/librte_security/rte_security.c | 37 +- lib/librte_security/rte_security.h | 50 +- lib/librte_security/rte_security_driver.h | 40 +- lib/librte_table/Makefile | 1 + lib/librte_table/meson.build | 37 +- lib/librte_table/rte_table_acl.c | 6 - lib/librte_table/rte_table_hash.h | 3 - lib/librte_table/rte_table_hash_cuckoo.c | 11 +- lib/librte_table/rte_table_hash_cuckoo.h | 57 + lib/librte_timer/rte_timer.c | 2 +- lib/librte_vhost/Makefile | 13 +- lib/librte_vhost/fd_man.c | 98 +- lib/librte_vhost/fd_man.h | 17 + lib/librte_vhost/iotlb.c | 10 +- lib/librte_vhost/iotlb.h | 2 +- lib/librte_vhost/meson.build | 11 +- lib/librte_vhost/rte_vdpa.h | 87 + lib/librte_vhost/rte_vhost.h | 212 + lib/librte_vhost/rte_vhost_crypto.h | 109 + lib/librte_vhost/rte_vhost_version.map | 24 + lib/librte_vhost/socket.c | 288 +- lib/librte_vhost/vdpa.c | 115 + lib/librte_vhost/vhost.c | 312 +- lib/librte_vhost/vhost.h | 358 +- lib/librte_vhost/vhost_crypto.c | 1372 +++ lib/librte_vhost/vhost_user.c | 581 +- lib/librte_vhost/vhost_user.h | 56 +- lib/librte_vhost/virtio_crypto.h | 422 + lib/librte_vhost/virtio_net.c | 1542 +-- lib/meson.build | 30 +- 419 files changed, 61429 insertions(+), 90612 deletions(-) create mode 100644 lib/librte_bpf/Makefile create mode 100644 lib/librte_bpf/bpf.c create mode 100644 lib/librte_bpf/bpf_def.h create mode 100644 lib/librte_bpf/bpf_exec.c create mode 100644 lib/librte_bpf/bpf_impl.h create mode 100644 lib/librte_bpf/bpf_jit_x86.c create mode 100644 lib/librte_bpf/bpf_load.c create mode 100644 lib/librte_bpf/bpf_load_elf.c create mode 100644 lib/librte_bpf/bpf_pkt.c create mode 100644 lib/librte_bpf/bpf_validate.c create mode 100644 lib/librte_bpf/meson.build create mode 100644 lib/librte_bpf/rte_bpf.h create mode 100644 lib/librte_bpf/rte_bpf_ethdev.h create mode 100644 lib/librte_bpf/rte_bpf_version.map create mode 100644 lib/librte_compressdev/Makefile create mode 100644 lib/librte_compressdev/meson.build create mode 100644 lib/librte_compressdev/rte_comp.c create mode 100644 lib/librte_compressdev/rte_comp.h create mode 100644 lib/librte_compressdev/rte_compressdev.c create mode 100644 lib/librte_compressdev/rte_compressdev.h create mode 100644 lib/librte_compressdev/rte_compressdev_internal.h create mode 100644 lib/librte_compressdev/rte_compressdev_pmd.c create mode 100644 lib/librte_compressdev/rte_compressdev_pmd.h create mode 100644 lib/librte_compressdev/rte_compressdev_version.map create mode 100644 lib/librte_cryptodev/rte_crypto_asym.h delete mode 100644 lib/librte_eal/bsdapp/contigmem/BSDmakefile delete mode 100644 lib/librte_eal/bsdapp/contigmem/Makefile delete mode 100644 lib/librte_eal/bsdapp/contigmem/contigmem.c delete mode 100644 lib/librte_eal/bsdapp/contigmem/meson.build create mode 100644 lib/librte_eal/bsdapp/eal/eal_alarm_private.h create mode 100644 lib/librte_eal/bsdapp/eal/eal_cpuflags.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_dev.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_memalloc.c delete mode 100644 lib/librte_eal/bsdapp/nic_uio/BSDmakefile delete mode 100644 lib/librte_eal/bsdapp/nic_uio/Makefile delete mode 100644 lib/librte_eal/bsdapp/nic_uio/meson.build delete mode 100644 lib/librte_eal/bsdapp/nic_uio/nic_uio.c create mode 100644 lib/librte_eal/common/eal_common_class.c create mode 100644 lib/librte_eal/common/eal_common_fbarray.c create mode 100644 lib/librte_eal/common/eal_common_memalloc.c create mode 100644 lib/librte_eal/common/eal_common_uuid.c create mode 100644 lib/librte_eal/common/eal_memalloc.h create mode 100644 lib/librte_eal/common/include/rte_class.h create mode 100644 lib/librte_eal/common/include/rte_fbarray.h create mode 100644 lib/librte_eal/common/include/rte_uuid.h create mode 100644 lib/librte_eal/common/malloc_mp.c create mode 100644 lib/librte_eal/common/malloc_mp.h create mode 100644 lib/librte_eal/linuxapp/eal/eal_cpuflags.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_dev.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_memalloc.c delete mode 100644 lib/librte_eal/linuxapp/igb_uio/Kbuild delete mode 100644 lib/librte_eal/linuxapp/igb_uio/Makefile delete mode 100644 lib/librte_eal/linuxapp/igb_uio/compat.h delete mode 100644 lib/librte_eal/linuxapp/igb_uio/igb_uio.c delete mode 100644 lib/librte_eal/linuxapp/igb_uio/meson.build delete mode 100644 lib/librte_eal/linuxapp/kni/Makefile delete mode 100644 lib/librte_eal/linuxapp/kni/compat.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/README delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h delete mode 100644 lib/librte_eal/linuxapp/kni/kni_dev.h delete mode 100644 lib/librte_eal/linuxapp/kni/kni_ethtool.c delete mode 100644 lib/librte_eal/linuxapp/kni/kni_fifo.h delete mode 100644 lib/librte_eal/linuxapp/kni/kni_misc.c delete mode 100644 lib/librte_eal/linuxapp/kni/kni_net.c create mode 100644 lib/librte_ethdev/Makefile create mode 100644 lib/librte_ethdev/ethdev_profile.c create mode 100644 lib/librte_ethdev/ethdev_profile.h create mode 100644 lib/librte_ethdev/meson.build create mode 100644 lib/librte_ethdev/rte_dev_info.h create mode 100644 lib/librte_ethdev/rte_eth_ctrl.h create mode 100644 lib/librte_ethdev/rte_ethdev.c create mode 100644 lib/librte_ethdev/rte_ethdev.h create mode 100644 lib/librte_ethdev/rte_ethdev_core.h create mode 100644 lib/librte_ethdev/rte_ethdev_driver.h create mode 100644 lib/librte_ethdev/rte_ethdev_pci.h create mode 100644 lib/librte_ethdev/rte_ethdev_vdev.h create mode 100644 lib/librte_ethdev/rte_ethdev_version.map create mode 100644 lib/librte_ethdev/rte_flow.c create mode 100644 lib/librte_ethdev/rte_flow.h create mode 100644 lib/librte_ethdev/rte_flow_driver.h create mode 100644 lib/librte_ethdev/rte_mtr.c create mode 100644 lib/librte_ethdev/rte_mtr.h create mode 100644 lib/librte_ethdev/rte_mtr_driver.h create mode 100644 lib/librte_ethdev/rte_tm.c create mode 100644 lib/librte_ethdev/rte_tm.h create mode 100644 lib/librte_ethdev/rte_tm_driver.h delete mode 100644 lib/librte_ether/Makefile delete mode 100644 lib/librte_ether/ethdev_profile.c delete mode 100644 lib/librte_ether/ethdev_profile.h delete mode 100644 lib/librte_ether/meson.build delete mode 100644 lib/librte_ether/rte_dev_info.h delete mode 100644 lib/librte_ether/rte_eth_ctrl.h delete mode 100644 lib/librte_ether/rte_ethdev.c delete mode 100644 lib/librte_ether/rte_ethdev.h delete mode 100644 lib/librte_ether/rte_ethdev_core.h delete mode 100644 lib/librte_ether/rte_ethdev_driver.h delete mode 100644 lib/librte_ether/rte_ethdev_pci.h delete mode 100644 lib/librte_ether/rte_ethdev_vdev.h delete mode 100644 lib/librte_ether/rte_ethdev_version.map delete mode 100644 lib/librte_ether/rte_flow.c delete mode 100644 lib/librte_ether/rte_flow.h delete mode 100644 lib/librte_ether/rte_flow_driver.h delete mode 100644 lib/librte_ether/rte_mtr.c delete mode 100644 lib/librte_ether/rte_mtr.h delete mode 100644 lib/librte_ether/rte_mtr_driver.h delete mode 100644 lib/librte_ether/rte_tm.c delete mode 100644 lib/librte_ether/rte_tm.h delete mode 100644 lib/librte_ether/rte_tm_driver.h create mode 100644 lib/librte_eventdev/rte_event_crypto_adapter.c create mode 100644 lib/librte_eventdev/rte_event_crypto_adapter.h create mode 100644 lib/librte_eventdev/rte_event_timer_adapter.c create mode 100644 lib/librte_eventdev/rte_event_timer_adapter.h create mode 100644 lib/librte_eventdev/rte_event_timer_adapter_pmd.h create mode 100644 lib/librte_gso/gso_udp4.c create mode 100644 lib/librte_gso/gso_udp4.h delete mode 100644 lib/librte_hash/rte_cuckoo_hash_x86.h create mode 100644 lib/librte_mempool/rte_mempool_ops_default.c create mode 100644 lib/librte_pipeline/rte_port_in_action.c create mode 100644 lib/librte_pipeline/rte_port_in_action.h create mode 100644 lib/librte_pipeline/rte_table_action.c create mode 100644 lib/librte_pipeline/rte_table_action.h create mode 100644 lib/librte_rawdev/meson.build create mode 100644 lib/librte_table/rte_table_hash_cuckoo.h create mode 100644 lib/librte_vhost/rte_vdpa.h create mode 100644 lib/librte_vhost/rte_vhost_crypto.h create mode 100644 lib/librte_vhost/vdpa.c create mode 100644 lib/librte_vhost/vhost_crypto.c create mode 100644 lib/librte_vhost/virtio_crypto.h (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index ec965a60..afa604e2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -4,7 +4,10 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-y += librte_compat +DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs +DEPDIRS-librte_kvargs := librte_compat DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal +DEPDIRS-librte_eal := librte_kvargs DIRS-$(CONFIG_RTE_LIBRTE_PCI) += librte_pci DEPDIRS-librte_pci := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring @@ -18,9 +21,10 @@ DEPDIRS-librte_timer := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline DEPDIRS-librte_cmdline := librte_eal -DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether -DEPDIRS-librte_ether := librte_net librte_eal librte_mempool librte_ring -DEPDIRS-librte_ether += librte_mbuf +DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ethdev +DEPDIRS-librte_ethdev := librte_net librte_eal librte_mempool librte_ring +DEPDIRS-librte_ethdev += librte_mbuf +DEPDIRS-librte_ethdev += librte_kvargs DIRS-$(CONFIG_RTE_LIBRTE_BBDEV) += librte_bbdev DEPDIRS-librte_bbdev := librte_eal librte_mempool librte_mbuf DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev @@ -28,14 +32,18 @@ DEPDIRS-librte_cryptodev := librte_eal librte_mempool librte_ring librte_mbuf DEPDIRS-librte_cryptodev += librte_kvargs DIRS-$(CONFIG_RTE_LIBRTE_SECURITY) += librte_security DEPDIRS-librte_security := librte_eal librte_mempool librte_ring librte_mbuf -DEPDIRS-librte_security += librte_ether +DEPDIRS-librte_security += librte_ethdev DEPDIRS-librte_security += librte_cryptodev +DIRS-$(CONFIG_RTE_LIBRTE_COMPRESSDEV) += librte_compressdev +DEPDIRS-librte_compressdev := librte_eal librte_mempool librte_ring librte_mbuf +DEPDIRS-librte_compressdev += librte_kvargs DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev -DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ether librte_hash +DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ethdev librte_hash \ + librte_mempool librte_timer librte_cryptodev DIRS-$(CONFIG_RTE_LIBRTE_RAWDEV) += librte_rawdev -DEPDIRS-librte_rawdev := librte_eal librte_ether +DEPDIRS-librte_rawdev := librte_eal librte_ethdev DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost -DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether \ +DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ethdev \ librte_net DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash DEPDIRS-librte_hash := librte_eal librte_ring @@ -50,18 +58,18 @@ DEPDIRS-librte_member := librte_eal librte_hash DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net DEPDIRS-librte_net := librte_mbuf librte_eal DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag -DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ether +DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ethdev DEPDIRS-librte_ip_frag += librte_hash DIRS-$(CONFIG_RTE_LIBRTE_GRO) += librte_gro -DEPDIRS-librte_gro := librte_eal librte_mbuf librte_ether librte_net +DEPDIRS-librte_gro := librte_eal librte_mbuf librte_ethdev librte_net DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += librte_jobstats DEPDIRS-librte_jobstats := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_METRICS) += librte_metrics DEPDIRS-librte_metrics := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_BITRATE) += librte_bitratestats -DEPDIRS-librte_bitratestats := librte_eal librte_metrics librte_ether +DEPDIRS-librte_bitratestats := librte_eal librte_metrics librte_ethdev DIRS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += librte_latencystats -DEPDIRS-librte_latencystats := librte_eal librte_metrics librte_ether librte_mbuf +DEPDIRS-librte_latencystats := librte_eal librte_metrics librte_ethdev librte_mbuf DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power DEPDIRS-librte_power := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter @@ -71,12 +79,10 @@ DEPDIRS-librte_flow_classify := librte_net librte_table librte_acl DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched DEPDIRS-librte_sched := librte_eal librte_mempool librte_mbuf librte_net DEPDIRS-librte_sched += librte_timer -DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs -DEPDIRS-librte_kvargs := librte_eal DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor -DEPDIRS-librte_distributor := librte_eal librte_mbuf librte_ether +DEPDIRS-librte_distributor := librte_eal librte_mbuf librte_ethdev DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port -DEPDIRS-librte_port := librte_eal librte_mempool librte_mbuf librte_ether +DEPDIRS-librte_port := librte_eal librte_mempool librte_mbuf librte_ethdev DEPDIRS-librte_port += librte_ip_frag librte_sched ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) DEPDIRS-librte_port += librte_kni @@ -93,15 +99,17 @@ DEPDIRS-librte_pipeline += librte_table librte_port DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump -DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether +DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ethdev DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso -DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net +DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ethdev librte_net DEPDIRS-librte_gso += librte_mempool +DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf +DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ethdev ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni endif -DEPDIRS-librte_kni := librte_eal librte_mempool librte_mbuf librte_ether +DEPDIRS-librte_kni := librte_eal librte_mempool librte_mbuf librte_ethdev DEPDIRS-librte_kni += librte_pci include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_bbdev/rte_bbdev.c b/lib/librte_bbdev/rte_bbdev.c index 74ecc490..c4cc18d9 100644 --- a/lib/librte_bbdev/rte_bbdev.c +++ b/lib/librte_bbdev/rte_bbdev.c @@ -495,11 +495,20 @@ rte_bbdev_queue_configure(uint16_t dev_id, uint16_t queue_id, conf->queue_size, queue_id, dev_id); return -EINVAL; } - if (conf->priority > dev_info.max_queue_priority) { + if (conf->op_type == RTE_BBDEV_OP_TURBO_DEC && + conf->priority > dev_info.max_ul_queue_priority) { rte_bbdev_log(ERR, "Priority (%u) of queue %u of bdev %u must be <= %u", conf->priority, queue_id, dev_id, - dev_info.max_queue_priority); + dev_info.max_ul_queue_priority); + return -EINVAL; + } + if (conf->op_type == RTE_BBDEV_OP_TURBO_ENC && + conf->priority > dev_info.max_dl_queue_priority) { + rte_bbdev_log(ERR, + "Priority (%u) of queue %u of bdev %u must be <= %u", + conf->priority, queue_id, dev_id, + dev_info.max_dl_queue_priority); return -EINVAL; } } @@ -1116,9 +1125,7 @@ rte_bbdev_op_type_str(enum rte_bbdev_op_type op_type) return NULL; } -RTE_INIT(rte_bbdev_init_log); -static void -rte_bbdev_init_log(void) +RTE_INIT(rte_bbdev_init_log) { bbdev_logtype = rte_log_register("lib.bbdev"); if (bbdev_logtype >= 0) diff --git a/lib/librte_bbdev/rte_bbdev.h b/lib/librte_bbdev/rte_bbdev.h index 5e7e4954..25ef409f 100644 --- a/lib/librte_bbdev/rte_bbdev.h +++ b/lib/librte_bbdev/rte_bbdev.h @@ -239,6 +239,8 @@ struct rte_bbdev_stats { uint64_t enqueue_err_count; /** Total error count on operations dequeued */ uint64_t dequeue_err_count; + /** Offload time */ + uint64_t offload_time; }; /** @@ -279,8 +281,10 @@ struct rte_bbdev_driver_info { uint32_t queue_size_lim; /** Set if device off-loads operation to hardware */ bool hardware_accelerated; - /** Max value supported by queue priority */ - uint8_t max_queue_priority; + /** Max value supported by queue priority for DL */ + uint8_t max_dl_queue_priority; + /** Max value supported by queue priority for UL */ + uint8_t max_ul_queue_priority; /** Set if device supports per-queue interrupts */ bool queue_intr_supported; /** Minimum alignment of buffers, in bytes */ diff --git a/lib/librte_bbdev/rte_bbdev_op.h b/lib/librte_bbdev/rte_bbdev_op.h index 9a80c64a..83f62c2d 100644 --- a/lib/librte_bbdev/rte_bbdev_op.h +++ b/lib/librte_bbdev/rte_bbdev_op.h @@ -25,7 +25,23 @@ extern "C" { #include #include -#define RTE_BBDEV_MAX_CODE_BLOCKS 64 +/* Number of columns in sub-block interleaver (36.212, section 5.1.4.1.1) */ +#define RTE_BBDEV_C_SUBBLOCK (32) +/* Maximum size of Transport Block (36.213, Table, Table 7.1.7.2.5-1) */ +#define RTE_BBDEV_MAX_TB_SIZE (391656) +/* Maximum size of Code Block (36.212, Table 5.1.3-3) */ +#define RTE_BBDEV_MAX_CB_SIZE (6144) +/* Minimum size of Code Block (36.212, Table 5.1.3-3) */ +#define RTE_BBDEV_MIN_CB_SIZE (40) +/* Maximum size of circular buffer */ +#define RTE_BBDEV_MAX_KW (18528) +/* + * Maximum number of Code Blocks in Transport Block. It is calculated based on + * maximum size of one Code Block and one Transport Block (considering CRC24A + * and CRC24B): + * (391656 + 24) / (6144 - 24) = 64 + */ +#define RTE_BBDEV_MAX_CODE_BLOCKS (64) /** Flags for turbo decoder operation and capability structure */ enum rte_bbdev_op_td_flag_bitmasks { @@ -86,7 +102,11 @@ enum rte_bbdev_op_td_flag_bitmasks { */ RTE_BBDEV_TURBO_MAP_DEC = (1ULL << 14), /**< Set if a device supports scatter-gather functionality */ - RTE_BBDEV_TURBO_DEC_SCATTER_GATHER = (1ULL << 15) + RTE_BBDEV_TURBO_DEC_SCATTER_GATHER = (1ULL << 15), + /**< Set to keep CRC24B bits appended while decoding. Only usable when + * decoding Transport Blocks (code_block_mode = 0). + */ + RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP = (1ULL << 16) }; /** Flags for turbo encoder operation and capability structure */ @@ -363,6 +383,10 @@ struct rte_bbdev_op_turbo_enc { struct rte_bbdev_op_cap_turbo_dec { /**< Flags from rte_bbdev_op_td_flag_bitmasks */ uint32_t capability_flags; + /** Maximal LLR absolute value. Acceptable LLR values lie in range + * [-max_llr_modulus, max_llr_modulus]. + */ + int8_t max_llr_modulus; uint8_t num_buffers_src; /**< Num input code block buffers */ /**< Num hard output code block buffers */ uint8_t num_buffers_hard_out; diff --git a/lib/librte_bitratestats/meson.build b/lib/librte_bitratestats/meson.build index ede7e0a5..c35b62b3 100644 --- a/lib/librte_bitratestats/meson.build +++ b/lib/librte_bitratestats/meson.build @@ -1,6 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation +version = 2 sources = files('rte_bitrate.c') headers = files('rte_bitrate.h') deps += ['ethdev', 'metrics'] diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c index 964e3c39..c4b28f62 100644 --- a/lib/librte_bitratestats/rte_bitrate.c +++ b/lib/librte_bitratestats/rte_bitrate.c @@ -47,6 +47,9 @@ rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data) }; int return_value; + if (bitrate_data == NULL) + return -EINVAL; + return_value = rte_metrics_reg_names(&names[0], ARRAY_SIZE(names)); if (return_value >= 0) bitrate_data->id_stats_set = return_value; @@ -65,6 +68,9 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data, const int64_t alpha_percent = 20; uint64_t values[6]; + if (bitrate_data == NULL) + return -EINVAL; + ret_code = rte_eth_stats_get(port_id, ð_stats); if (ret_code != 0) return ret_code; diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile new file mode 100644 index 00000000..c0e8aaa6 --- /dev/null +++ b/lib/librte_bpf/Makefile @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_bpf.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +CFLAGS += -DALLOW_EXPERIMENTAL_API +LDLIBS += -lrte_net -lrte_eal +LDLIBS += -lrte_mempool -lrte_ring +LDLIBS += -lrte_mbuf -lrte_ethdev +ifeq ($(CONFIG_RTE_LIBRTE_BPF_ELF),y) +LDLIBS += -lelf +endif + +EXPORT_MAP := rte_bpf_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_pkt.c +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c +ifeq ($(CONFIG_RTE_LIBRTE_BPF_ELF),y) +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load_elf.c +endif +ifeq ($(CONFIG_RTE_ARCH_X86_64),y) +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_jit_x86.c +endif + +# install header files +SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += bpf_def.h +SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h +SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf_ethdev.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c new file mode 100644 index 00000000..f590c8c3 --- /dev/null +++ b/lib/librte_bpf/bpf.c @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bpf_impl.h" + +int rte_bpf_logtype; + +__rte_experimental void +rte_bpf_destroy(struct rte_bpf *bpf) +{ + if (bpf != NULL) { + if (bpf->jit.func != NULL) + munmap(bpf->jit.func, bpf->jit.sz); + munmap(bpf, bpf->sz); + } +} + +__rte_experimental int +rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit) +{ + if (bpf == NULL || jit == NULL) + return -EINVAL; + + jit[0] = bpf->jit; + return 0; +} + +int +bpf_jit(struct rte_bpf *bpf) +{ + int32_t rc; + +#ifdef RTE_ARCH_X86_64 + rc = bpf_jit_x86(bpf); +#else + rc = -ENOTSUP; +#endif + + if (rc != 0) + RTE_BPF_LOG(WARNING, "%s(%p) failed, error code: %d;\n", + __func__, bpf, rc); + return rc; +} + +RTE_INIT(rte_bpf_init_log) +{ + rte_bpf_logtype = rte_log_register("lib.bpf"); + if (rte_bpf_logtype >= 0) + rte_log_set_level(rte_bpf_logtype, RTE_LOG_INFO); +} diff --git a/lib/librte_bpf/bpf_def.h b/lib/librte_bpf/bpf_def.h new file mode 100644 index 00000000..c10f3aec --- /dev/null +++ b/lib/librte_bpf/bpf_def.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. + * Copyright(c) 2018 Intel Corporation. + */ + +#ifndef _RTE_BPF_DEF_H_ +#define _RTE_BPF_DEF_H_ + +/** + * @file + * + * classic BPF (cBPF) and extended BPF (eBPF) related defines. + * For more information regarding cBPF and eBPF ISA and their differences, + * please refer to: + * https://www.kernel.org/doc/Documentation/networking/filter.txt. + * As a rule of thumb for that file: + * all definitions used by both cBPF and eBPF start with bpf(BPF)_ prefix, + * while eBPF only ones start with ebpf(EBPF)) prefix. + */ + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The instruction encodings. + */ + +/* Instruction classes */ +#define BPF_CLASS(code) ((code) & 0x07) +#define BPF_LD 0x00 +#define BPF_LDX 0x01 +#define BPF_ST 0x02 +#define BPF_STX 0x03 +#define BPF_ALU 0x04 +#define BPF_JMP 0x05 +#define BPF_RET 0x06 +#define BPF_MISC 0x07 + +#define EBPF_ALU64 0x07 + +/* ld/ldx fields */ +#define BPF_SIZE(code) ((code) & 0x18) +#define BPF_W 0x00 +#define BPF_H 0x08 +#define BPF_B 0x10 +#define EBPF_DW 0x18 + +#define BPF_MODE(code) ((code) & 0xe0) +#define BPF_IMM 0x00 +#define BPF_ABS 0x20 +#define BPF_IND 0x40 +#define BPF_MEM 0x60 +#define BPF_LEN 0x80 +#define BPF_MSH 0xa0 + +#define EBPF_XADD 0xc0 + +/* alu/jmp fields */ +#define BPF_OP(code) ((code) & 0xf0) +#define BPF_ADD 0x00 +#define BPF_SUB 0x10 +#define BPF_MUL 0x20 +#define BPF_DIV 0x30 +#define BPF_OR 0x40 +#define BPF_AND 0x50 +#define BPF_LSH 0x60 +#define BPF_RSH 0x70 +#define BPF_NEG 0x80 +#define BPF_MOD 0x90 +#define BPF_XOR 0xa0 + +#define EBPF_MOV 0xb0 +#define EBPF_ARSH 0xc0 +#define EBPF_END 0xd0 + +#define BPF_JA 0x00 +#define BPF_JEQ 0x10 +#define BPF_JGT 0x20 +#define BPF_JGE 0x30 +#define BPF_JSET 0x40 + +#define EBPF_JNE 0x50 +#define EBPF_JSGT 0x60 +#define EBPF_JSGE 0x70 +#define EBPF_CALL 0x80 +#define EBPF_EXIT 0x90 +#define EBPF_JLT 0xa0 +#define EBPF_JLE 0xb0 +#define EBPF_JSLT 0xc0 +#define EBPF_JSLE 0xd0 + +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_K 0x00 +#define BPF_X 0x08 + +/* if BPF_OP(code) == EBPF_END */ +#define EBPF_TO_LE 0x00 /* convert to little-endian */ +#define EBPF_TO_BE 0x08 /* convert to big-endian */ + +/* + * eBPF registers + */ +enum { + EBPF_REG_0, /* return value from internal function/for eBPF program */ + EBPF_REG_1, /* 0-th argument to internal function */ + EBPF_REG_2, /* 1-th argument to internal function */ + EBPF_REG_3, /* 2-th argument to internal function */ + EBPF_REG_4, /* 3-th argument to internal function */ + EBPF_REG_5, /* 4-th argument to internal function */ + EBPF_REG_6, /* callee saved register */ + EBPF_REG_7, /* callee saved register */ + EBPF_REG_8, /* callee saved register */ + EBPF_REG_9, /* callee saved register */ + EBPF_REG_10, /* stack pointer (read-only) */ + EBPF_REG_NUM, +}; + +/* + * eBPF instruction format + */ +struct ebpf_insn { + uint8_t code; + uint8_t dst_reg:4; + uint8_t src_reg:4; + int16_t off; + int32_t imm; +}; + +/* + * eBPF allows functions with R1-R5 as arguments. + */ +#define EBPF_FUNC_MAX_ARGS (EBPF_REG_6 - EBPF_REG_1) + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_BPF_DEF_H_ */ diff --git a/lib/librte_bpf/bpf_exec.c b/lib/librte_bpf/bpf_exec.c new file mode 100644 index 00000000..6a79139c --- /dev/null +++ b/lib/librte_bpf/bpf_exec.c @@ -0,0 +1,453 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "bpf_impl.h" + +#define BPF_JMP_UNC(ins) ((ins) += (ins)->off) + +#define BPF_JMP_CND_REG(reg, ins, op, type) \ + ((ins) += \ + ((type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg]) ? \ + (ins)->off : 0) + +#define BPF_JMP_CND_IMM(reg, ins, op, type) \ + ((ins) += \ + ((type)(reg)[(ins)->dst_reg] op (type)(ins)->imm) ? \ + (ins)->off : 0) + +#define BPF_NEG_ALU(reg, ins, type) \ + ((reg)[(ins)->dst_reg] = (type)(-(reg)[(ins)->dst_reg])) + +#define EBPF_MOV_ALU_REG(reg, ins, type) \ + ((reg)[(ins)->dst_reg] = (type)(reg)[(ins)->src_reg]) + +#define BPF_OP_ALU_REG(reg, ins, op, type) \ + ((reg)[(ins)->dst_reg] = \ + (type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg]) + +#define EBPF_MOV_ALU_IMM(reg, ins, type) \ + ((reg)[(ins)->dst_reg] = (type)(ins)->imm) + +#define BPF_OP_ALU_IMM(reg, ins, op, type) \ + ((reg)[(ins)->dst_reg] = \ + (type)(reg)[(ins)->dst_reg] op (type)(ins)->imm) + +#define BPF_DIV_ZERO_CHECK(bpf, reg, ins, type) do { \ + if ((type)(reg)[(ins)->src_reg] == 0) { \ + RTE_BPF_LOG(ERR, \ + "%s(%p): division by 0 at pc: %#zx;\n", \ + __func__, bpf, \ + (uintptr_t)(ins) - (uintptr_t)(bpf)->prm.ins); \ + return 0; \ + } \ +} while (0) + +#define BPF_LD_REG(reg, ins, type) \ + ((reg)[(ins)->dst_reg] = \ + *(type *)(uintptr_t)((reg)[(ins)->src_reg] + (ins)->off)) + +#define BPF_ST_IMM(reg, ins, type) \ + (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \ + (type)(ins)->imm) + +#define BPF_ST_REG(reg, ins, type) \ + (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \ + (type)(reg)[(ins)->src_reg]) + +#define BPF_ST_XADD_REG(reg, ins, tp) \ + (rte_atomic##tp##_add((rte_atomic##tp##_t *) \ + (uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off), \ + reg[ins->src_reg])) + +static inline void +bpf_alu_be(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins) +{ + uint64_t *v; + + v = reg + ins->dst_reg; + switch (ins->imm) { + case 16: + *v = rte_cpu_to_be_16(*v); + break; + case 32: + *v = rte_cpu_to_be_32(*v); + break; + case 64: + *v = rte_cpu_to_be_64(*v); + break; + } +} + +static inline void +bpf_alu_le(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins) +{ + uint64_t *v; + + v = reg + ins->dst_reg; + switch (ins->imm) { + case 16: + *v = rte_cpu_to_le_16(*v); + break; + case 32: + *v = rte_cpu_to_le_32(*v); + break; + case 64: + *v = rte_cpu_to_le_64(*v); + break; + } +} + +static inline uint64_t +bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM]) +{ + const struct ebpf_insn *ins; + + for (ins = bpf->prm.ins; ; ins++) { + switch (ins->code) { + /* 32 bit ALU IMM operations */ + case (BPF_ALU | BPF_ADD | BPF_K): + BPF_OP_ALU_IMM(reg, ins, +, uint32_t); + break; + case (BPF_ALU | BPF_SUB | BPF_K): + BPF_OP_ALU_IMM(reg, ins, -, uint32_t); + break; + case (BPF_ALU | BPF_AND | BPF_K): + BPF_OP_ALU_IMM(reg, ins, &, uint32_t); + break; + case (BPF_ALU | BPF_OR | BPF_K): + BPF_OP_ALU_IMM(reg, ins, |, uint32_t); + break; + case (BPF_ALU | BPF_LSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, <<, uint32_t); + break; + case (BPF_ALU | BPF_RSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, >>, uint32_t); + break; + case (BPF_ALU | BPF_XOR | BPF_K): + BPF_OP_ALU_IMM(reg, ins, ^, uint32_t); + break; + case (BPF_ALU | BPF_MUL | BPF_K): + BPF_OP_ALU_IMM(reg, ins, *, uint32_t); + break; + case (BPF_ALU | BPF_DIV | BPF_K): + BPF_OP_ALU_IMM(reg, ins, /, uint32_t); + break; + case (BPF_ALU | BPF_MOD | BPF_K): + BPF_OP_ALU_IMM(reg, ins, %, uint32_t); + break; + case (BPF_ALU | EBPF_MOV | BPF_K): + EBPF_MOV_ALU_IMM(reg, ins, uint32_t); + break; + /* 32 bit ALU REG operations */ + case (BPF_ALU | BPF_ADD | BPF_X): + BPF_OP_ALU_REG(reg, ins, +, uint32_t); + break; + case (BPF_ALU | BPF_SUB | BPF_X): + BPF_OP_ALU_REG(reg, ins, -, uint32_t); + break; + case (BPF_ALU | BPF_AND | BPF_X): + BPF_OP_ALU_REG(reg, ins, &, uint32_t); + break; + case (BPF_ALU | BPF_OR | BPF_X): + BPF_OP_ALU_REG(reg, ins, |, uint32_t); + break; + case (BPF_ALU | BPF_LSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, <<, uint32_t); + break; + case (BPF_ALU | BPF_RSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, >>, uint32_t); + break; + case (BPF_ALU | BPF_XOR | BPF_X): + BPF_OP_ALU_REG(reg, ins, ^, uint32_t); + break; + case (BPF_ALU | BPF_MUL | BPF_X): + BPF_OP_ALU_REG(reg, ins, *, uint32_t); + break; + case (BPF_ALU | BPF_DIV | BPF_X): + BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t); + BPF_OP_ALU_REG(reg, ins, /, uint32_t); + break; + case (BPF_ALU | BPF_MOD | BPF_X): + BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t); + BPF_OP_ALU_REG(reg, ins, %, uint32_t); + break; + case (BPF_ALU | EBPF_MOV | BPF_X): + EBPF_MOV_ALU_REG(reg, ins, uint32_t); + break; + case (BPF_ALU | BPF_NEG): + BPF_NEG_ALU(reg, ins, uint32_t); + break; + case (BPF_ALU | EBPF_END | EBPF_TO_BE): + bpf_alu_be(reg, ins); + break; + case (BPF_ALU | EBPF_END | EBPF_TO_LE): + bpf_alu_le(reg, ins); + break; + /* 64 bit ALU IMM operations */ + case (EBPF_ALU64 | BPF_ADD | BPF_K): + BPF_OP_ALU_IMM(reg, ins, +, uint64_t); + break; + case (EBPF_ALU64 | BPF_SUB | BPF_K): + BPF_OP_ALU_IMM(reg, ins, -, uint64_t); + break; + case (EBPF_ALU64 | BPF_AND | BPF_K): + BPF_OP_ALU_IMM(reg, ins, &, uint64_t); + break; + case (EBPF_ALU64 | BPF_OR | BPF_K): + BPF_OP_ALU_IMM(reg, ins, |, uint64_t); + break; + case (EBPF_ALU64 | BPF_LSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, <<, uint64_t); + break; + case (EBPF_ALU64 | BPF_RSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, >>, uint64_t); + break; + case (EBPF_ALU64 | EBPF_ARSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, >>, int64_t); + break; + case (EBPF_ALU64 | BPF_XOR | BPF_K): + BPF_OP_ALU_IMM(reg, ins, ^, uint64_t); + break; + case (EBPF_ALU64 | BPF_MUL | BPF_K): + BPF_OP_ALU_IMM(reg, ins, *, uint64_t); + break; + case (EBPF_ALU64 | BPF_DIV | BPF_K): + BPF_OP_ALU_IMM(reg, ins, /, uint64_t); + break; + case (EBPF_ALU64 | BPF_MOD | BPF_K): + BPF_OP_ALU_IMM(reg, ins, %, uint64_t); + break; + case (EBPF_ALU64 | EBPF_MOV | BPF_K): + EBPF_MOV_ALU_IMM(reg, ins, uint64_t); + break; + /* 64 bit ALU REG operations */ + case (EBPF_ALU64 | BPF_ADD | BPF_X): + BPF_OP_ALU_REG(reg, ins, +, uint64_t); + break; + case (EBPF_ALU64 | BPF_SUB | BPF_X): + BPF_OP_ALU_REG(reg, ins, -, uint64_t); + break; + case (EBPF_ALU64 | BPF_AND | BPF_X): + BPF_OP_ALU_REG(reg, ins, &, uint64_t); + break; + case (EBPF_ALU64 | BPF_OR | BPF_X): + BPF_OP_ALU_REG(reg, ins, |, uint64_t); + break; + case (EBPF_ALU64 | BPF_LSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, <<, uint64_t); + break; + case (EBPF_ALU64 | BPF_RSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, >>, uint64_t); + break; + case (EBPF_ALU64 | EBPF_ARSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, >>, int64_t); + break; + case (EBPF_ALU64 | BPF_XOR | BPF_X): + BPF_OP_ALU_REG(reg, ins, ^, uint64_t); + break; + case (EBPF_ALU64 | BPF_MUL | BPF_X): + BPF_OP_ALU_REG(reg, ins, *, uint64_t); + break; + case (EBPF_ALU64 | BPF_DIV | BPF_X): + BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t); + BPF_OP_ALU_REG(reg, ins, /, uint64_t); + break; + case (EBPF_ALU64 | BPF_MOD | BPF_X): + BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t); + BPF_OP_ALU_REG(reg, ins, %, uint64_t); + break; + case (EBPF_ALU64 | EBPF_MOV | BPF_X): + EBPF_MOV_ALU_REG(reg, ins, uint64_t); + break; + case (EBPF_ALU64 | BPF_NEG): + BPF_NEG_ALU(reg, ins, uint64_t); + break; + /* load instructions */ + case (BPF_LDX | BPF_MEM | BPF_B): + BPF_LD_REG(reg, ins, uint8_t); + break; + case (BPF_LDX | BPF_MEM | BPF_H): + BPF_LD_REG(reg, ins, uint16_t); + break; + case (BPF_LDX | BPF_MEM | BPF_W): + BPF_LD_REG(reg, ins, uint32_t); + break; + case (BPF_LDX | BPF_MEM | EBPF_DW): + BPF_LD_REG(reg, ins, uint64_t); + break; + /* load 64 bit immediate value */ + case (BPF_LD | BPF_IMM | EBPF_DW): + reg[ins->dst_reg] = (uint32_t)ins[0].imm | + (uint64_t)(uint32_t)ins[1].imm << 32; + ins++; + break; + /* store instructions */ + case (BPF_STX | BPF_MEM | BPF_B): + BPF_ST_REG(reg, ins, uint8_t); + break; + case (BPF_STX | BPF_MEM | BPF_H): + BPF_ST_REG(reg, ins, uint16_t); + break; + case (BPF_STX | BPF_MEM | BPF_W): + BPF_ST_REG(reg, ins, uint32_t); + break; + case (BPF_STX | BPF_MEM | EBPF_DW): + BPF_ST_REG(reg, ins, uint64_t); + break; + case (BPF_ST | BPF_MEM | BPF_B): + BPF_ST_IMM(reg, ins, uint8_t); + break; + case (BPF_ST | BPF_MEM | BPF_H): + BPF_ST_IMM(reg, ins, uint16_t); + break; + case (BPF_ST | BPF_MEM | BPF_W): + BPF_ST_IMM(reg, ins, uint32_t); + break; + case (BPF_ST | BPF_MEM | EBPF_DW): + BPF_ST_IMM(reg, ins, uint64_t); + break; + /* atomic add instructions */ + case (BPF_STX | EBPF_XADD | BPF_W): + BPF_ST_XADD_REG(reg, ins, 32); + break; + case (BPF_STX | EBPF_XADD | EBPF_DW): + BPF_ST_XADD_REG(reg, ins, 64); + break; + /* jump instructions */ + case (BPF_JMP | BPF_JA): + BPF_JMP_UNC(ins); + break; + /* jump IMM instructions */ + case (BPF_JMP | BPF_JEQ | BPF_K): + BPF_JMP_CND_IMM(reg, ins, ==, uint64_t); + break; + case (BPF_JMP | EBPF_JNE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, !=, uint64_t); + break; + case (BPF_JMP | BPF_JGT | BPF_K): + BPF_JMP_CND_IMM(reg, ins, >, uint64_t); + break; + case (BPF_JMP | EBPF_JLT | BPF_K): + BPF_JMP_CND_IMM(reg, ins, <, uint64_t); + break; + case (BPF_JMP | BPF_JGE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, >=, uint64_t); + break; + case (BPF_JMP | EBPF_JLE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, <=, uint64_t); + break; + case (BPF_JMP | EBPF_JSGT | BPF_K): + BPF_JMP_CND_IMM(reg, ins, >, int64_t); + break; + case (BPF_JMP | EBPF_JSLT | BPF_K): + BPF_JMP_CND_IMM(reg, ins, <, int64_t); + break; + case (BPF_JMP | EBPF_JSGE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, >=, int64_t); + break; + case (BPF_JMP | EBPF_JSLE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, <=, int64_t); + break; + case (BPF_JMP | BPF_JSET | BPF_K): + BPF_JMP_CND_IMM(reg, ins, &, uint64_t); + break; + /* jump REG instructions */ + case (BPF_JMP | BPF_JEQ | BPF_X): + BPF_JMP_CND_REG(reg, ins, ==, uint64_t); + break; + case (BPF_JMP | EBPF_JNE | BPF_X): + BPF_JMP_CND_REG(reg, ins, !=, uint64_t); + break; + case (BPF_JMP | BPF_JGT | BPF_X): + BPF_JMP_CND_REG(reg, ins, >, uint64_t); + break; + case (BPF_JMP | EBPF_JLT | BPF_X): + BPF_JMP_CND_REG(reg, ins, <, uint64_t); + break; + case (BPF_JMP | BPF_JGE | BPF_X): + BPF_JMP_CND_REG(reg, ins, >=, uint64_t); + break; + case (BPF_JMP | EBPF_JLE | BPF_X): + BPF_JMP_CND_REG(reg, ins, <=, uint64_t); + break; + case (BPF_JMP | EBPF_JSGT | BPF_X): + BPF_JMP_CND_REG(reg, ins, >, int64_t); + break; + case (BPF_JMP | EBPF_JSLT | BPF_X): + BPF_JMP_CND_REG(reg, ins, <, int64_t); + break; + case (BPF_JMP | EBPF_JSGE | BPF_X): + BPF_JMP_CND_REG(reg, ins, >=, int64_t); + break; + case (BPF_JMP | EBPF_JSLE | BPF_X): + BPF_JMP_CND_REG(reg, ins, <=, int64_t); + break; + case (BPF_JMP | BPF_JSET | BPF_X): + BPF_JMP_CND_REG(reg, ins, &, uint64_t); + break; + /* call instructions */ + case (BPF_JMP | EBPF_CALL): + reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func.val( + reg[EBPF_REG_1], reg[EBPF_REG_2], + reg[EBPF_REG_3], reg[EBPF_REG_4], + reg[EBPF_REG_5]); + break; + /* return instruction */ + case (BPF_JMP | EBPF_EXIT): + return reg[EBPF_REG_0]; + default: + RTE_BPF_LOG(ERR, + "%s(%p): invalid opcode %#x at pc: %#zx;\n", + __func__, bpf, ins->code, + (uintptr_t)ins - (uintptr_t)bpf->prm.ins); + return 0; + } + } + + /* should never be reached */ + RTE_VERIFY(0); + return 0; +} + +__rte_experimental uint32_t +rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[], + uint32_t num) +{ + uint32_t i; + uint64_t reg[EBPF_REG_NUM]; + uint64_t stack[MAX_BPF_STACK_SIZE / sizeof(uint64_t)]; + + for (i = 0; i != num; i++) { + + reg[EBPF_REG_1] = (uintptr_t)ctx[i]; + reg[EBPF_REG_10] = (uintptr_t)(stack + RTE_DIM(stack)); + + rc[i] = bpf_exec(bpf, reg); + } + + return i; +} + +__rte_experimental uint64_t +rte_bpf_exec(const struct rte_bpf *bpf, void *ctx) +{ + uint64_t rc; + + rte_bpf_exec_burst(bpf, &ctx, &rc, 1); + return rc; +} diff --git a/lib/librte_bpf/bpf_impl.h b/lib/librte_bpf/bpf_impl.h new file mode 100644 index 00000000..b577e2cb --- /dev/null +++ b/lib/librte_bpf/bpf_impl.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _BPF_H_ +#define _BPF_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_BPF_STACK_SIZE 0x200 + +struct rte_bpf { + struct rte_bpf_prm prm; + struct rte_bpf_jit jit; + size_t sz; + uint32_t stack_sz; +}; + +extern int bpf_validate(struct rte_bpf *bpf); + +extern int bpf_jit(struct rte_bpf *bpf); + +#ifdef RTE_ARCH_X86_64 +extern int bpf_jit_x86(struct rte_bpf *); +#endif + +extern int rte_bpf_logtype; + +#define RTE_BPF_LOG(lvl, fmt, args...) \ + rte_log(RTE_LOG_## lvl, rte_bpf_logtype, fmt, ##args) + +static inline size_t +bpf_size(uint32_t bpf_op_sz) +{ + if (bpf_op_sz == BPF_B) + return sizeof(uint8_t); + else if (bpf_op_sz == BPF_H) + return sizeof(uint16_t); + else if (bpf_op_sz == BPF_W) + return sizeof(uint32_t); + else if (bpf_op_sz == EBPF_DW) + return sizeof(uint64_t); + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _BPF_H_ */ diff --git a/lib/librte_bpf/bpf_jit_x86.c b/lib/librte_bpf/bpf_jit_x86.c new file mode 100644 index 00000000..68ea389f --- /dev/null +++ b/lib/librte_bpf/bpf_jit_x86.c @@ -0,0 +1,1356 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "bpf_impl.h" + +#define GET_BPF_OP(op) (BPF_OP(op) >> 4) + +enum { + RAX = 0, /* scratch, return value */ + RCX = 1, /* scratch, 4th arg */ + RDX = 2, /* scratch, 3rd arg */ + RBX = 3, /* callee saved */ + RSP = 4, /* stack pointer */ + RBP = 5, /* frame pointer, callee saved */ + RSI = 6, /* scratch, 2nd arg */ + RDI = 7, /* scratch, 1st arg */ + R8 = 8, /* scratch, 5th arg */ + R9 = 9, /* scratch, 6th arg */ + R10 = 10, /* scratch */ + R11 = 11, /* scratch */ + R12 = 12, /* callee saved */ + R13 = 13, /* callee saved */ + R14 = 14, /* callee saved */ + R15 = 15, /* callee saved */ +}; + +#define IS_EXT_REG(r) ((r) >= R8) + +enum { + REX_PREFIX = 0x40, /* fixed value 0100 */ + REX_W = 0x8, /* 64bit operand size */ + REX_R = 0x4, /* extension of the ModRM.reg field */ + REX_X = 0x2, /* extension of the SIB.index field */ + REX_B = 0x1, /* extension of the ModRM.rm field */ +}; + +enum { + MOD_INDIRECT = 0, + MOD_IDISP8 = 1, + MOD_IDISP32 = 2, + MOD_DIRECT = 3, +}; + +enum { + SIB_SCALE_1 = 0, + SIB_SCALE_2 = 1, + SIB_SCALE_4 = 2, + SIB_SCALE_8 = 3, +}; + +/* + * eBPF to x86_64 register mappings. + */ +static const uint32_t ebpf2x86[] = { + [EBPF_REG_0] = RAX, + [EBPF_REG_1] = RDI, + [EBPF_REG_2] = RSI, + [EBPF_REG_3] = RDX, + [EBPF_REG_4] = RCX, + [EBPF_REG_5] = R8, + [EBPF_REG_6] = RBX, + [EBPF_REG_7] = R13, + [EBPF_REG_8] = R14, + [EBPF_REG_9] = R15, + [EBPF_REG_10] = RBP, +}; + +/* + * r10 and r11 are used as a scratch temporary registers. + */ +enum { + REG_DIV_IMM = R9, + REG_TMP0 = R11, + REG_TMP1 = R10, +}; + +/* + * callee saved registers list. + * keep RBP as the last one. + */ +static const uint32_t save_regs[] = {RBX, R12, R13, R14, R15, RBP}; + +struct bpf_jit_state { + uint32_t idx; + size_t sz; + struct { + uint32_t num; + int32_t off; + } exit; + uint32_t reguse; + int32_t *off; + uint8_t *ins; +}; + +#define INUSE(v, r) (((v) >> (r)) & 1) +#define USED(v, r) ((v) |= 1 << (r)) + +union bpf_jit_imm { + uint32_t u32; + uint8_t u8[4]; +}; + +/* + * In many cases for imm8 we can produce shorter code. + */ +static size_t +imm_size(int32_t v) +{ + if (v == (int8_t)v) + return sizeof(int8_t); + return sizeof(int32_t); +} + +static void +emit_bytes(struct bpf_jit_state *st, const uint8_t ins[], uint32_t sz) +{ + uint32_t i; + + if (st->ins != NULL) { + for (i = 0; i != sz; i++) + st->ins[st->sz + i] = ins[i]; + } + st->sz += sz; +} + +static void +emit_imm(struct bpf_jit_state *st, const uint32_t imm, uint32_t sz) +{ + union bpf_jit_imm v; + + v.u32 = imm; + emit_bytes(st, v.u8, sz); +} + +/* + * emit REX byte + */ +static void +emit_rex(struct bpf_jit_state *st, uint32_t op, uint32_t reg, uint32_t rm) +{ + uint8_t rex; + + /* mark operand registers as used*/ + USED(st->reguse, reg); + USED(st->reguse, rm); + + rex = 0; + if (BPF_CLASS(op) == EBPF_ALU64 || + op == (BPF_ST | BPF_MEM | EBPF_DW) || + op == (BPF_STX | BPF_MEM | EBPF_DW) || + op == (BPF_STX | EBPF_XADD | EBPF_DW) || + op == (BPF_LD | BPF_IMM | EBPF_DW) || + (BPF_CLASS(op) == BPF_LDX && + BPF_MODE(op) == BPF_MEM && + BPF_SIZE(op) != BPF_W)) + rex |= REX_W; + + if (IS_EXT_REG(reg)) + rex |= REX_R; + + if (IS_EXT_REG(rm)) + rex |= REX_B; + + /* store using SIL, DIL */ + if (op == (BPF_STX | BPF_MEM | BPF_B) && (reg == RDI || reg == RSI)) + rex |= REX_PREFIX; + + if (rex != 0) { + rex |= REX_PREFIX; + emit_bytes(st, &rex, sizeof(rex)); + } +} + +/* + * emit MODRegRM byte + */ +static void +emit_modregrm(struct bpf_jit_state *st, uint32_t mod, uint32_t reg, uint32_t rm) +{ + uint8_t v; + + v = mod << 6 | (reg & 7) << 3 | (rm & 7); + emit_bytes(st, &v, sizeof(v)); +} + +/* + * emit SIB byte + */ +static void +emit_sib(struct bpf_jit_state *st, uint32_t scale, uint32_t idx, uint32_t base) +{ + uint8_t v; + + v = scale << 6 | (idx & 7) << 3 | (base & 7); + emit_bytes(st, &v, sizeof(v)); +} + +/* + * emit xchg %, % + */ +static void +emit_xchg_reg(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg) +{ + const uint8_t ops = 0x87; + + emit_rex(st, EBPF_ALU64, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); +} + +/* + * emit neg % + */ +static void +emit_neg(struct bpf_jit_state *st, uint32_t op, uint32_t dreg) +{ + const uint8_t ops = 0xF7; + const uint8_t mods = 3; + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); +} + +/* + * emit mov %, % + */ +static void +emit_mov_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + const uint8_t ops = 0x89; + + /* if operands are 32-bit, then it can be used to clear upper 32-bit */ + if (sreg != dreg || BPF_CLASS(op) == BPF_ALU) { + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); + } +} + +/* + * emit movzwl %, % + */ +static void +emit_movzwl(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg) +{ + static const uint8_t ops[] = {0x0F, 0xB7}; + + emit_rex(st, BPF_ALU, sreg, dreg); + emit_bytes(st, ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); +} + +/* + * emit ror , % + */ +static void +emit_ror_imm(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm) +{ + const uint8_t prfx = 0x66; + const uint8_t ops = 0xC1; + const uint8_t mods = 1; + + emit_bytes(st, &prfx, sizeof(prfx)); + emit_rex(st, BPF_ALU, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); + emit_imm(st, imm, imm_size(imm)); +} + +/* + * emit bswap % + */ +static void +emit_be2le_48(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm) +{ + uint32_t rop; + + const uint8_t ops = 0x0F; + const uint8_t mods = 1; + + rop = (imm == 64) ? EBPF_ALU64 : BPF_ALU; + emit_rex(st, rop, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); +} + +static void +emit_be2le(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm) +{ + if (imm == 16) { + emit_ror_imm(st, dreg, 8); + emit_movzwl(st, dreg, dreg); + } else + emit_be2le_48(st, dreg, imm); +} + +/* + * In general it is NOP for x86. + * Just clear the upper bits. + */ +static void +emit_le2be(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm) +{ + if (imm == 16) + emit_movzwl(st, dreg, dreg); + else if (imm == 32) + emit_mov_reg(st, BPF_ALU | EBPF_MOV | BPF_X, dreg, dreg); +} + +/* + * emit one of: + * add , % + * and , % + * or , % + * sub , % + * xor , % + */ +static void +emit_alu_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm) +{ + uint8_t mod, opcode; + uint32_t bop, imsz; + + const uint8_t op8 = 0x83; + const uint8_t op32 = 0x81; + static const uint8_t mods[] = { + [GET_BPF_OP(BPF_ADD)] = 0, + [GET_BPF_OP(BPF_AND)] = 4, + [GET_BPF_OP(BPF_OR)] = 1, + [GET_BPF_OP(BPF_SUB)] = 5, + [GET_BPF_OP(BPF_XOR)] = 6, + }; + + bop = GET_BPF_OP(op); + mod = mods[bop]; + + imsz = imm_size(imm); + opcode = (imsz == 1) ? op8 : op32; + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &opcode, sizeof(opcode)); + emit_modregrm(st, MOD_DIRECT, mod, dreg); + emit_imm(st, imm, imsz); +} + +/* + * emit one of: + * add %, % + * and %, % + * or %, % + * sub %, % + * xor %, % + */ +static void +emit_alu_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + uint32_t bop; + + static const uint8_t ops[] = { + [GET_BPF_OP(BPF_ADD)] = 0x01, + [GET_BPF_OP(BPF_AND)] = 0x21, + [GET_BPF_OP(BPF_OR)] = 0x09, + [GET_BPF_OP(BPF_SUB)] = 0x29, + [GET_BPF_OP(BPF_XOR)] = 0x31, + }; + + bop = GET_BPF_OP(op); + + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops[bop], sizeof(ops[bop])); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); +} + +static void +emit_shift(struct bpf_jit_state *st, uint32_t op, uint32_t dreg) +{ + uint8_t mod; + uint32_t bop, opx; + + static const uint8_t ops[] = {0xC1, 0xD3}; + static const uint8_t mods[] = { + [GET_BPF_OP(BPF_LSH)] = 4, + [GET_BPF_OP(BPF_RSH)] = 5, + [GET_BPF_OP(EBPF_ARSH)] = 7, + }; + + bop = GET_BPF_OP(op); + mod = mods[bop]; + opx = (BPF_SRC(op) == BPF_X); + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops[opx], sizeof(ops[opx])); + emit_modregrm(st, MOD_DIRECT, mod, dreg); +} + +/* + * emit one of: + * shl , % + * shr , % + * sar , % + */ +static void +emit_shift_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, + uint32_t imm) +{ + emit_shift(st, op, dreg); + emit_imm(st, imm, imm_size(imm)); +} + +/* + * emit one of: + * shl % + * shr % + * sar % + * note that rcx is implicitly used as a source register, so few extra + * instructions for register spillage might be necessary. + */ +static void +emit_shift_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + if (sreg != RCX) + emit_xchg_reg(st, RCX, sreg); + + emit_shift(st, op, (dreg == RCX) ? sreg : dreg); + + if (sreg != RCX) + emit_xchg_reg(st, RCX, sreg); +} + +/* + * emit mov , % + */ +static void +emit_mov_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm) +{ + const uint8_t ops = 0xC7; + + if (imm == 0) { + /* replace 'mov 0, %' with 'xor %, %' */ + op = BPF_CLASS(op) | BPF_XOR | BPF_X; + emit_alu_reg(st, op, dreg, dreg); + return; + } + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, 0, dreg); + emit_imm(st, imm, sizeof(imm)); +} + +/* + * emit mov , % + */ +static void +emit_ld_imm64(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm0, + uint32_t imm1) +{ + const uint8_t ops = 0xB8; + + if (imm1 == 0) { + emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, dreg, imm0); + return; + } + + emit_rex(st, EBPF_ALU64, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, 0, dreg); + + emit_imm(st, imm0, sizeof(imm0)); + emit_imm(st, imm1, sizeof(imm1)); +} + +/* + * note that rax:rdx are implicitly used as source/destination registers, + * so some reg spillage is necessary. + * emit: + * mov %rax, %r11 + * mov %rdx, %r10 + * mov %, %rax + * either: + * mov %, %rdx + * OR + * mov , %rdx + * mul %rdx + * mov %r10, %rdx + * mov %rax, % + * mov %r11, %rax + */ +static void +emit_mul(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg, + uint32_t imm) +{ + const uint8_t ops = 0xF7; + const uint8_t mods = 4; + + /* save rax & rdx */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, REG_TMP0); + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, REG_TMP1); + + /* rax = dreg */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, dreg, RAX); + + if (BPF_SRC(op) == BPF_X) + /* rdx = sreg */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, + sreg == RAX ? REG_TMP0 : sreg, RDX); + else + /* rdx = imm */ + emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, RDX, imm); + + emit_rex(st, op, RAX, RDX); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, RDX); + + if (dreg != RDX) + /* restore rdx */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP1, RDX); + + if (dreg != RAX) { + /* dreg = rax */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, dreg); + /* restore rax */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP0, RAX); + } +} + +/* + * emit mov (%), % + * note that for non 64-bit ops, higher bits have to be cleared. + */ +static void +emit_ld_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg, + int32_t ofs) +{ + uint32_t mods, opsz; + const uint8_t op32 = 0x8B; + const uint8_t op16[] = {0x0F, 0xB7}; + const uint8_t op8[] = {0x0F, 0xB6}; + + emit_rex(st, op, dreg, sreg); + + opsz = BPF_SIZE(op); + if (opsz == BPF_B) + emit_bytes(st, op8, sizeof(op8)); + else if (opsz == BPF_H) + emit_bytes(st, op16, sizeof(op16)); + else + emit_bytes(st, &op32, sizeof(op32)); + + mods = (imm_size(ofs) == 1) ? MOD_IDISP8 : MOD_IDISP32; + + emit_modregrm(st, mods, dreg, sreg); + if (sreg == RSP || sreg == R12) + emit_sib(st, SIB_SCALE_1, sreg, sreg); + emit_imm(st, ofs, imm_size(ofs)); +} + +/* + * emit one of: + * mov %, (%) + * mov , (%) + */ +static void +emit_st_common(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg, uint32_t imm, int32_t ofs) +{ + uint32_t mods, imsz, opsz, opx; + const uint8_t prfx16 = 0x66; + + /* 8 bit instruction opcodes */ + static const uint8_t op8[] = {0xC6, 0x88}; + + /* 16/32/64 bit instruction opcodes */ + static const uint8_t ops[] = {0xC7, 0x89}; + + /* is the instruction has immediate value or src reg? */ + opx = (BPF_CLASS(op) == BPF_STX); + + opsz = BPF_SIZE(op); + if (opsz == BPF_H) + emit_bytes(st, &prfx16, sizeof(prfx16)); + + emit_rex(st, op, sreg, dreg); + + if (opsz == BPF_B) + emit_bytes(st, &op8[opx], sizeof(op8[opx])); + else + emit_bytes(st, &ops[opx], sizeof(ops[opx])); + + imsz = imm_size(ofs); + mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32; + + emit_modregrm(st, mods, sreg, dreg); + + if (dreg == RSP || dreg == R12) + emit_sib(st, SIB_SCALE_1, dreg, dreg); + + emit_imm(st, ofs, imsz); + + if (opx == 0) { + imsz = RTE_MIN(bpf_size(opsz), sizeof(imm)); + emit_imm(st, imm, imsz); + } +} + +static void +emit_st_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm, + int32_t ofs) +{ + emit_st_common(st, op, 0, dreg, imm, ofs); +} + +static void +emit_st_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg, + int32_t ofs) +{ + emit_st_common(st, op, sreg, dreg, 0, ofs); +} + +/* + * emit lock add %, (%) + */ +static void +emit_st_xadd(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg, int32_t ofs) +{ + uint32_t imsz, mods; + + const uint8_t lck = 0xF0; /* lock prefix */ + const uint8_t ops = 0x01; /* add opcode */ + + imsz = imm_size(ofs); + mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32; + + emit_bytes(st, &lck, sizeof(lck)); + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, mods, sreg, dreg); + emit_imm(st, ofs, imsz); +} + +/* + * emit: + * mov , (%rax) + * call *%rax + */ +static void +emit_call(struct bpf_jit_state *st, uintptr_t trg) +{ + const uint8_t ops = 0xFF; + const uint8_t mods = 2; + + emit_ld_imm64(st, RAX, trg, trg >> 32); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, RAX); +} + +/* + * emit jmp + * where 'ofs' is the target offset for the native code. + */ +static void +emit_abs_jmp(struct bpf_jit_state *st, int32_t ofs) +{ + int32_t joff; + uint32_t imsz; + + const uint8_t op8 = 0xEB; + const uint8_t op32 = 0xE9; + + const int32_t sz8 = sizeof(op8) + sizeof(uint8_t); + const int32_t sz32 = sizeof(op32) + sizeof(uint32_t); + + /* max possible jmp instruction size */ + const int32_t iszm = RTE_MAX(sz8, sz32); + + joff = ofs - st->sz; + imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm)); + + if (imsz == 1) { + emit_bytes(st, &op8, sizeof(op8)); + joff -= sz8; + } else { + emit_bytes(st, &op32, sizeof(op32)); + joff -= sz32; + } + + emit_imm(st, joff, imsz); +} + +/* + * emit jmp + * where 'ofs' is the target offset for the BPF bytecode. + */ +static void +emit_jmp(struct bpf_jit_state *st, int32_t ofs) +{ + emit_abs_jmp(st, st->off[st->idx + ofs]); +} + +/* + * emit one of: + * cmovz %, <%dreg> + * cmovne %, <%dreg> + * cmova %, <%dreg> + * cmovb %, <%dreg> + * cmovae %, <%dreg> + * cmovbe %, <%dreg> + * cmovg %, <%dreg> + * cmovl %, <%dreg> + * cmovge %, <%dreg> + * cmovle %, <%dreg> + */ +static void +emit_movcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + uint32_t bop; + + static const uint8_t ops[][2] = { + [GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x44}, /* CMOVZ */ + [GET_BPF_OP(EBPF_JNE)] = {0x0F, 0x45}, /* CMOVNE */ + [GET_BPF_OP(BPF_JGT)] = {0x0F, 0x47}, /* CMOVA */ + [GET_BPF_OP(EBPF_JLT)] = {0x0F, 0x42}, /* CMOVB */ + [GET_BPF_OP(BPF_JGE)] = {0x0F, 0x43}, /* CMOVAE */ + [GET_BPF_OP(EBPF_JLE)] = {0x0F, 0x46}, /* CMOVBE */ + [GET_BPF_OP(EBPF_JSGT)] = {0x0F, 0x4F}, /* CMOVG */ + [GET_BPF_OP(EBPF_JSLT)] = {0x0F, 0x4C}, /* CMOVL */ + [GET_BPF_OP(EBPF_JSGE)] = {0x0F, 0x4D}, /* CMOVGE */ + [GET_BPF_OP(EBPF_JSLE)] = {0x0F, 0x4E}, /* CMOVLE */ + [GET_BPF_OP(BPF_JSET)] = {0x0F, 0x45}, /* CMOVNE */ + }; + + bop = GET_BPF_OP(op); + + emit_rex(st, op, dreg, sreg); + emit_bytes(st, ops[bop], sizeof(ops[bop])); + emit_modregrm(st, MOD_DIRECT, dreg, sreg); +} + +/* + * emit one of: + * je + * jne + * ja + * jb + * jae + * jbe + * jg + * jl + * jge + * jle + * where 'ofs' is the target offset for the native code. + */ +static void +emit_abs_jcc(struct bpf_jit_state *st, uint32_t op, int32_t ofs) +{ + uint32_t bop, imsz; + int32_t joff; + + static const uint8_t op8[] = { + [GET_BPF_OP(BPF_JEQ)] = 0x74, /* JE */ + [GET_BPF_OP(EBPF_JNE)] = 0x75, /* JNE */ + [GET_BPF_OP(BPF_JGT)] = 0x77, /* JA */ + [GET_BPF_OP(EBPF_JLT)] = 0x72, /* JB */ + [GET_BPF_OP(BPF_JGE)] = 0x73, /* JAE */ + [GET_BPF_OP(EBPF_JLE)] = 0x76, /* JBE */ + [GET_BPF_OP(EBPF_JSGT)] = 0x7F, /* JG */ + [GET_BPF_OP(EBPF_JSLT)] = 0x7C, /* JL */ + [GET_BPF_OP(EBPF_JSGE)] = 0x7D, /*JGE */ + [GET_BPF_OP(EBPF_JSLE)] = 0x7E, /* JLE */ + [GET_BPF_OP(BPF_JSET)] = 0x75, /*JNE */ + }; + + static const uint8_t op32[][2] = { + [GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x84}, /* JE */ + [GET_BPF_OP(EBPF_JNE)] = {0x0F, 0x85}, /* JNE */ + [GET_BPF_OP(BPF_JGT)] = {0x0F, 0x87}, /* JA */ + [GET_BPF_OP(EBPF_JLT)] = {0x0F, 0x82}, /* JB */ + [GET_BPF_OP(BPF_JGE)] = {0x0F, 0x83}, /* JAE */ + [GET_BPF_OP(EBPF_JLE)] = {0x0F, 0x86}, /* JBE */ + [GET_BPF_OP(EBPF_JSGT)] = {0x0F, 0x8F}, /* JG */ + [GET_BPF_OP(EBPF_JSLT)] = {0x0F, 0x8C}, /* JL */ + [GET_BPF_OP(EBPF_JSGE)] = {0x0F, 0x8D}, /*JGE */ + [GET_BPF_OP(EBPF_JSLE)] = {0x0F, 0x8E}, /* JLE */ + [GET_BPF_OP(BPF_JSET)] = {0x0F, 0x85}, /*JNE */ + }; + + const int32_t sz8 = sizeof(op8[0]) + sizeof(uint8_t); + const int32_t sz32 = sizeof(op32[0]) + sizeof(uint32_t); + + /* max possible jcc instruction size */ + const int32_t iszm = RTE_MAX(sz8, sz32); + + joff = ofs - st->sz; + imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm)); + + bop = GET_BPF_OP(op); + + if (imsz == 1) { + emit_bytes(st, &op8[bop], sizeof(op8[bop])); + joff -= sz8; + } else { + emit_bytes(st, op32[bop], sizeof(op32[bop])); + joff -= sz32; + } + + emit_imm(st, joff, imsz); +} + +/* + * emit one of: + * je + * jne + * ja + * jb + * jae + * jbe + * jg + * jl + * jge + * jle + * where 'ofs' is the target offset for the BPF bytecode. + */ +static void +emit_jcc(struct bpf_jit_state *st, uint32_t op, int32_t ofs) +{ + emit_abs_jcc(st, op, st->off[st->idx + ofs]); +} + + +/* + * emit cmp , % + */ +static void +emit_cmp_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm) +{ + uint8_t ops; + uint32_t imsz; + + const uint8_t op8 = 0x83; + const uint8_t op32 = 0x81; + const uint8_t mods = 7; + + imsz = imm_size(imm); + ops = (imsz == 1) ? op8 : op32; + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); + emit_imm(st, imm, imsz); +} + +/* + * emit test , % + */ +static void +emit_tst_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm) +{ + const uint8_t ops = 0xF7; + const uint8_t mods = 0; + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); + emit_imm(st, imm, imm_size(imm)); +} + +static void +emit_jcc_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, + uint32_t imm, int32_t ofs) +{ + if (BPF_OP(op) == BPF_JSET) + emit_tst_imm(st, EBPF_ALU64, dreg, imm); + else + emit_cmp_imm(st, EBPF_ALU64, dreg, imm); + + emit_jcc(st, op, ofs); +} + +/* + * emit test %, % + */ +static void +emit_tst_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + const uint8_t ops = 0x85; + + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); +} + +/* + * emit cmp %, % + */ +static void +emit_cmp_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + const uint8_t ops = 0x39; + + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); + +} + +static void +emit_jcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg, int32_t ofs) +{ + if (BPF_OP(op) == BPF_JSET) + emit_tst_reg(st, EBPF_ALU64, sreg, dreg); + else + emit_cmp_reg(st, EBPF_ALU64, sreg, dreg); + + emit_jcc(st, op, ofs); +} + +/* + * note that rax:rdx are implicitly used as source/destination registers, + * so some reg spillage is necessary. + * emit: + * mov %rax, %r11 + * mov %rdx, %r10 + * mov %, %rax + * xor %rdx, %rdx + * for divisor as immediate value: + * mov , %r9 + * div % + * mov %r10, %rdx + * mov %rax, % + * mov %r11, %rax + * either: + * mov %rax, % + * OR + * mov %rdx, % + * mov %r11, %rax + * mov %r10, %rdx + */ +static void +emit_div(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg, + uint32_t imm) +{ + uint32_t sr; + + const uint8_t ops = 0xF7; + const uint8_t mods = 6; + + if (BPF_SRC(op) == BPF_X) { + + /* check that src divisor is not zero */ + emit_tst_reg(st, BPF_CLASS(op), sreg, sreg); + + /* exit with return value zero */ + emit_movcc_reg(st, BPF_CLASS(op) | BPF_JEQ | BPF_X, sreg, RAX); + emit_abs_jcc(st, BPF_JMP | BPF_JEQ | BPF_K, st->exit.off); + } + + /* save rax & rdx */ + if (dreg != RAX) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, REG_TMP0); + if (dreg != RDX) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, REG_TMP1); + + /* fill rax & rdx */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, dreg, RAX); + emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, RDX, 0); + + if (BPF_SRC(op) == BPF_X) { + sr = sreg; + if (sr == RAX) + sr = REG_TMP0; + else if (sr == RDX) + sr = REG_TMP1; + } else { + sr = REG_DIV_IMM; + emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, sr, imm); + } + + emit_rex(st, op, 0, sr); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, sr); + + if (BPF_OP(op) == BPF_DIV) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, dreg); + else + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, dreg); + + if (dreg != RAX) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP0, RAX); + if (dreg != RDX) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP1, RDX); +} + +static void +emit_prolog(struct bpf_jit_state *st, int32_t stack_size) +{ + uint32_t i; + int32_t spil, ofs; + + spil = 0; + for (i = 0; i != RTE_DIM(save_regs); i++) + spil += INUSE(st->reguse, save_regs[i]); + + /* we can avoid touching the stack at all */ + if (spil == 0) + return; + + + emit_alu_imm(st, EBPF_ALU64 | BPF_SUB | BPF_K, RSP, + spil * sizeof(uint64_t)); + + ofs = 0; + for (i = 0; i != RTE_DIM(save_regs); i++) { + if (INUSE(st->reguse, save_regs[i]) != 0) { + emit_st_reg(st, BPF_STX | BPF_MEM | EBPF_DW, + save_regs[i], RSP, ofs); + ofs += sizeof(uint64_t); + } + } + + if (INUSE(st->reguse, RBP) != 0) { + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RSP, RBP); + emit_alu_imm(st, EBPF_ALU64 | BPF_SUB | BPF_K, RSP, stack_size); + } +} + +/* + * emit ret + */ +static void +emit_ret(struct bpf_jit_state *st) +{ + const uint8_t ops = 0xC3; + + emit_bytes(st, &ops, sizeof(ops)); +} + +static void +emit_epilog(struct bpf_jit_state *st) +{ + uint32_t i; + int32_t spil, ofs; + + /* if we allready have an epilog generate a jump to it */ + if (st->exit.num++ != 0) { + emit_abs_jmp(st, st->exit.off); + return; + } + + /* store offset of epilog block */ + st->exit.off = st->sz; + + spil = 0; + for (i = 0; i != RTE_DIM(save_regs); i++) + spil += INUSE(st->reguse, save_regs[i]); + + if (spil != 0) { + + if (INUSE(st->reguse, RBP) != 0) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, + RBP, RSP); + + ofs = 0; + for (i = 0; i != RTE_DIM(save_regs); i++) { + if (INUSE(st->reguse, save_regs[i]) != 0) { + emit_ld_reg(st, BPF_LDX | BPF_MEM | EBPF_DW, + RSP, save_regs[i], ofs); + ofs += sizeof(uint64_t); + } + } + + emit_alu_imm(st, EBPF_ALU64 | BPF_ADD | BPF_K, RSP, + spil * sizeof(uint64_t)); + } + + emit_ret(st); +} + +/* + * walk through bpf code and translate them x86_64 one. + */ +static int +emit(struct bpf_jit_state *st, const struct rte_bpf *bpf) +{ + uint32_t i, dr, op, sr; + const struct ebpf_insn *ins; + + /* reset state fields */ + st->sz = 0; + st->exit.num = 0; + + emit_prolog(st, bpf->stack_sz); + + for (i = 0; i != bpf->prm.nb_ins; i++) { + + st->idx = i; + st->off[i] = st->sz; + + ins = bpf->prm.ins + i; + + dr = ebpf2x86[ins->dst_reg]; + sr = ebpf2x86[ins->src_reg]; + op = ins->code; + + switch (op) { + /* 32 bit ALU IMM operations */ + case (BPF_ALU | BPF_ADD | BPF_K): + case (BPF_ALU | BPF_SUB | BPF_K): + case (BPF_ALU | BPF_AND | BPF_K): + case (BPF_ALU | BPF_OR | BPF_K): + case (BPF_ALU | BPF_XOR | BPF_K): + emit_alu_imm(st, op, dr, ins->imm); + break; + case (BPF_ALU | BPF_LSH | BPF_K): + case (BPF_ALU | BPF_RSH | BPF_K): + emit_shift_imm(st, op, dr, ins->imm); + break; + case (BPF_ALU | EBPF_MOV | BPF_K): + emit_mov_imm(st, op, dr, ins->imm); + break; + /* 32 bit ALU REG operations */ + case (BPF_ALU | BPF_ADD | BPF_X): + case (BPF_ALU | BPF_SUB | BPF_X): + case (BPF_ALU | BPF_AND | BPF_X): + case (BPF_ALU | BPF_OR | BPF_X): + case (BPF_ALU | BPF_XOR | BPF_X): + emit_alu_reg(st, op, sr, dr); + break; + case (BPF_ALU | BPF_LSH | BPF_X): + case (BPF_ALU | BPF_RSH | BPF_X): + emit_shift_reg(st, op, sr, dr); + break; + case (BPF_ALU | EBPF_MOV | BPF_X): + emit_mov_reg(st, op, sr, dr); + break; + case (BPF_ALU | BPF_NEG): + emit_neg(st, op, dr); + break; + case (BPF_ALU | EBPF_END | EBPF_TO_BE): + emit_be2le(st, dr, ins->imm); + break; + case (BPF_ALU | EBPF_END | EBPF_TO_LE): + emit_le2be(st, dr, ins->imm); + break; + /* 64 bit ALU IMM operations */ + case (EBPF_ALU64 | BPF_ADD | BPF_K): + case (EBPF_ALU64 | BPF_SUB | BPF_K): + case (EBPF_ALU64 | BPF_AND | BPF_K): + case (EBPF_ALU64 | BPF_OR | BPF_K): + case (EBPF_ALU64 | BPF_XOR | BPF_K): + emit_alu_imm(st, op, dr, ins->imm); + break; + case (EBPF_ALU64 | BPF_LSH | BPF_K): + case (EBPF_ALU64 | BPF_RSH | BPF_K): + case (EBPF_ALU64 | EBPF_ARSH | BPF_K): + emit_shift_imm(st, op, dr, ins->imm); + break; + case (EBPF_ALU64 | EBPF_MOV | BPF_K): + emit_mov_imm(st, op, dr, ins->imm); + break; + /* 64 bit ALU REG operations */ + case (EBPF_ALU64 | BPF_ADD | BPF_X): + case (EBPF_ALU64 | BPF_SUB | BPF_X): + case (EBPF_ALU64 | BPF_AND | BPF_X): + case (EBPF_ALU64 | BPF_OR | BPF_X): + case (EBPF_ALU64 | BPF_XOR | BPF_X): + emit_alu_reg(st, op, sr, dr); + break; + case (EBPF_ALU64 | BPF_LSH | BPF_X): + case (EBPF_ALU64 | BPF_RSH | BPF_X): + case (EBPF_ALU64 | EBPF_ARSH | BPF_X): + emit_shift_reg(st, op, sr, dr); + break; + case (EBPF_ALU64 | EBPF_MOV | BPF_X): + emit_mov_reg(st, op, sr, dr); + break; + case (EBPF_ALU64 | BPF_NEG): + emit_neg(st, op, dr); + break; + /* multiply instructions */ + case (BPF_ALU | BPF_MUL | BPF_K): + case (BPF_ALU | BPF_MUL | BPF_X): + case (EBPF_ALU64 | BPF_MUL | BPF_K): + case (EBPF_ALU64 | BPF_MUL | BPF_X): + emit_mul(st, op, sr, dr, ins->imm); + break; + /* divide instructions */ + case (BPF_ALU | BPF_DIV | BPF_K): + case (BPF_ALU | BPF_MOD | BPF_K): + case (BPF_ALU | BPF_DIV | BPF_X): + case (BPF_ALU | BPF_MOD | BPF_X): + case (EBPF_ALU64 | BPF_DIV | BPF_K): + case (EBPF_ALU64 | BPF_MOD | BPF_K): + case (EBPF_ALU64 | BPF_DIV | BPF_X): + case (EBPF_ALU64 | BPF_MOD | BPF_X): + emit_div(st, op, sr, dr, ins->imm); + break; + /* load instructions */ + case (BPF_LDX | BPF_MEM | BPF_B): + case (BPF_LDX | BPF_MEM | BPF_H): + case (BPF_LDX | BPF_MEM | BPF_W): + case (BPF_LDX | BPF_MEM | EBPF_DW): + emit_ld_reg(st, op, sr, dr, ins->off); + break; + /* load 64 bit immediate value */ + case (BPF_LD | BPF_IMM | EBPF_DW): + emit_ld_imm64(st, dr, ins[0].imm, ins[1].imm); + i++; + break; + /* store instructions */ + case (BPF_STX | BPF_MEM | BPF_B): + case (BPF_STX | BPF_MEM | BPF_H): + case (BPF_STX | BPF_MEM | BPF_W): + case (BPF_STX | BPF_MEM | EBPF_DW): + emit_st_reg(st, op, sr, dr, ins->off); + break; + case (BPF_ST | BPF_MEM | BPF_B): + case (BPF_ST | BPF_MEM | BPF_H): + case (BPF_ST | BPF_MEM | BPF_W): + case (BPF_ST | BPF_MEM | EBPF_DW): + emit_st_imm(st, op, dr, ins->imm, ins->off); + break; + /* atomic add instructions */ + case (BPF_STX | EBPF_XADD | BPF_W): + case (BPF_STX | EBPF_XADD | EBPF_DW): + emit_st_xadd(st, op, sr, dr, ins->off); + break; + /* jump instructions */ + case (BPF_JMP | BPF_JA): + emit_jmp(st, ins->off + 1); + break; + /* jump IMM instructions */ + case (BPF_JMP | BPF_JEQ | BPF_K): + case (BPF_JMP | EBPF_JNE | BPF_K): + case (BPF_JMP | BPF_JGT | BPF_K): + case (BPF_JMP | EBPF_JLT | BPF_K): + case (BPF_JMP | BPF_JGE | BPF_K): + case (BPF_JMP | EBPF_JLE | BPF_K): + case (BPF_JMP | EBPF_JSGT | BPF_K): + case (BPF_JMP | EBPF_JSLT | BPF_K): + case (BPF_JMP | EBPF_JSGE | BPF_K): + case (BPF_JMP | EBPF_JSLE | BPF_K): + case (BPF_JMP | BPF_JSET | BPF_K): + emit_jcc_imm(st, op, dr, ins->imm, ins->off + 1); + break; + /* jump REG instructions */ + case (BPF_JMP | BPF_JEQ | BPF_X): + case (BPF_JMP | EBPF_JNE | BPF_X): + case (BPF_JMP | BPF_JGT | BPF_X): + case (BPF_JMP | EBPF_JLT | BPF_X): + case (BPF_JMP | BPF_JGE | BPF_X): + case (BPF_JMP | EBPF_JLE | BPF_X): + case (BPF_JMP | EBPF_JSGT | BPF_X): + case (BPF_JMP | EBPF_JSLT | BPF_X): + case (BPF_JMP | EBPF_JSGE | BPF_X): + case (BPF_JMP | EBPF_JSLE | BPF_X): + case (BPF_JMP | BPF_JSET | BPF_X): + emit_jcc_reg(st, op, sr, dr, ins->off + 1); + break; + /* call instructions */ + case (BPF_JMP | EBPF_CALL): + emit_call(st, + (uintptr_t)bpf->prm.xsym[ins->imm].func.val); + break; + /* return instruction */ + case (BPF_JMP | EBPF_EXIT): + emit_epilog(st); + break; + default: + RTE_BPF_LOG(ERR, + "%s(%p): invalid opcode %#x at pc: %u;\n", + __func__, bpf, ins->code, i); + return -EINVAL; + } + } + + return 0; +} + +/* + * produce a native ISA version of the given BPF code. + */ +int +bpf_jit_x86(struct rte_bpf *bpf) +{ + int32_t rc; + uint32_t i; + size_t sz; + struct bpf_jit_state st; + + /* init state */ + memset(&st, 0, sizeof(st)); + st.off = malloc(bpf->prm.nb_ins * sizeof(st.off[0])); + if (st.off == NULL) + return -ENOMEM; + + /* fill with fake offsets */ + st.exit.off = INT32_MAX; + for (i = 0; i != bpf->prm.nb_ins; i++) + st.off[i] = INT32_MAX; + + /* + * dry runs, used to calculate total code size and valid jump offsets. + * stop when we get minimal possible size + */ + do { + sz = st.sz; + rc = emit(&st, bpf); + } while (rc == 0 && sz != st.sz); + + if (rc == 0) { + + /* allocate memory needed */ + st.ins = mmap(NULL, st.sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (st.ins == MAP_FAILED) + rc = -ENOMEM; + else + /* generate code */ + rc = emit(&st, bpf); + } + + if (rc == 0 && mprotect(st.ins, st.sz, PROT_READ | PROT_EXEC) != 0) + rc = -ENOMEM; + + if (rc != 0) + munmap(st.ins, st.sz); + else { + bpf->jit.func = (void *)st.ins; + bpf->jit.sz = st.sz; + } + + free(st.off); + return rc; +} diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c new file mode 100644 index 00000000..2b84fe72 --- /dev/null +++ b/lib/librte_bpf/bpf_load.c @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_impl.h" + +static struct rte_bpf * +bpf_load(const struct rte_bpf_prm *prm) +{ + uint8_t *buf; + struct rte_bpf *bpf; + size_t sz, bsz, insz, xsz; + + xsz = prm->nb_xsym * sizeof(prm->xsym[0]); + insz = prm->nb_ins * sizeof(prm->ins[0]); + bsz = sizeof(bpf[0]); + sz = insz + xsz + bsz; + + buf = mmap(NULL, sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buf == MAP_FAILED) + return NULL; + + bpf = (void *)buf; + bpf->sz = sz; + + memcpy(&bpf->prm, prm, sizeof(bpf->prm)); + + memcpy(buf + bsz, prm->xsym, xsz); + memcpy(buf + bsz + xsz, prm->ins, insz); + + bpf->prm.xsym = (void *)(buf + bsz); + bpf->prm.ins = (void *)(buf + bsz + xsz); + + return bpf; +} + +/* + * Check that user provided external symbol. + */ +static int +bpf_check_xsym(const struct rte_bpf_xsym *xsym) +{ + uint32_t i; + + if (xsym->name == NULL) + return -EINVAL; + + if (xsym->type == RTE_BPF_XTYPE_VAR) { + if (xsym->var.desc.type == RTE_BPF_ARG_UNDEF) + return -EINVAL; + } else if (xsym->type == RTE_BPF_XTYPE_FUNC) { + + if (xsym->func.nb_args > EBPF_FUNC_MAX_ARGS) + return -EINVAL; + + /* check function arguments */ + for (i = 0; i != xsym->func.nb_args; i++) { + if (xsym->func.args[i].type == RTE_BPF_ARG_UNDEF) + return -EINVAL; + } + + /* check return value info */ + if (xsym->func.ret.type != RTE_BPF_ARG_UNDEF && + xsym->func.ret.size == 0) + return -EINVAL; + } else + return -EINVAL; + + return 0; +} + +__rte_experimental struct rte_bpf * +rte_bpf_load(const struct rte_bpf_prm *prm) +{ + struct rte_bpf *bpf; + int32_t rc; + uint32_t i; + + if (prm == NULL || prm->ins == NULL || + (prm->nb_xsym != 0 && prm->xsym == NULL)) { + rte_errno = EINVAL; + return NULL; + } + + rc = 0; + for (i = 0; i != prm->nb_xsym && rc == 0; i++) + rc = bpf_check_xsym(prm->xsym + i); + + if (rc != 0) { + rte_errno = -rc; + RTE_BPF_LOG(ERR, "%s: %d-th xsym is invalid\n", __func__, i); + return NULL; + } + + bpf = bpf_load(prm); + if (bpf == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + rc = bpf_validate(bpf); + if (rc == 0) { + bpf_jit(bpf); + if (mprotect(bpf, bpf->sz, PROT_READ) != 0) + rc = -ENOMEM; + } + + if (rc != 0) { + rte_bpf_destroy(bpf); + rte_errno = -rc; + return NULL; + } + + return bpf; +} + +__rte_experimental __attribute__ ((weak)) struct rte_bpf * +rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname, + const char *sname) +{ + if (prm == NULL || fname == NULL || sname == NULL) { + rte_errno = EINVAL; + return NULL; + } + + RTE_BPF_LOG(ERR, "%s() is not supported with current config\n" + "rebuild with libelf installed\n", + __func__); + rte_errno = ENOTSUP; + return NULL; +} diff --git a/lib/librte_bpf/bpf_load_elf.c b/lib/librte_bpf/bpf_load_elf.c new file mode 100644 index 00000000..96d3630f --- /dev/null +++ b/lib/librte_bpf/bpf_load_elf.c @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_impl.h" + +/* To overcome compatibility issue */ +#ifndef EM_BPF +#define EM_BPF 247 +#endif + +static uint32_t +bpf_find_xsym(const char *sn, enum rte_bpf_xtype type, + const struct rte_bpf_xsym fp[], uint32_t fn) +{ + uint32_t i; + + if (sn == NULL || fp == NULL) + return UINT32_MAX; + + for (i = 0; i != fn; i++) { + if (fp[i].type == type && strcmp(sn, fp[i].name) == 0) + break; + } + + return (i != fn) ? i : UINT32_MAX; +} + +/* + * update BPF code at offset *ofs* with a proper address(index) for external + * symbol *sn* + */ +static int +resolve_xsym(const char *sn, size_t ofs, struct ebpf_insn *ins, size_t ins_sz, + const struct rte_bpf_prm *prm) +{ + uint32_t idx, fidx; + enum rte_bpf_xtype type; + + if (ofs % sizeof(ins[0]) != 0 || ofs >= ins_sz) + return -EINVAL; + + idx = ofs / sizeof(ins[0]); + if (ins[idx].code == (BPF_JMP | EBPF_CALL)) + type = RTE_BPF_XTYPE_FUNC; + else if (ins[idx].code == (BPF_LD | BPF_IMM | EBPF_DW) && + ofs < ins_sz - sizeof(ins[idx])) + type = RTE_BPF_XTYPE_VAR; + else + return -EINVAL; + + fidx = bpf_find_xsym(sn, type, prm->xsym, prm->nb_xsym); + if (fidx == UINT32_MAX) + return -ENOENT; + + /* for function we just need an index in our xsym table */ + if (type == RTE_BPF_XTYPE_FUNC) + ins[idx].imm = fidx; + /* for variable we need to store its absolute address */ + else { + ins[idx].imm = (uintptr_t)prm->xsym[fidx].var.val; + ins[idx + 1].imm = + (uint64_t)(uintptr_t)prm->xsym[fidx].var.val >> 32; + } + + return 0; +} + +static int +check_elf_header(const Elf64_Ehdr *eh) +{ + const char *err; + + err = NULL; + +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + if (eh->e_ident[EI_DATA] != ELFDATA2LSB) +#else + if (eh->e_ident[EI_DATA] != ELFDATA2MSB) +#endif + err = "not native byte order"; + else if (eh->e_ident[EI_OSABI] != ELFOSABI_NONE) + err = "unexpected OS ABI"; + else if (eh->e_type != ET_REL) + err = "unexpected ELF type"; + else if (eh->e_machine != EM_NONE && eh->e_machine != EM_BPF) + err = "unexpected machine type"; + + if (err != NULL) { + RTE_BPF_LOG(ERR, "%s(): %s\n", __func__, err); + return -EINVAL; + } + + return 0; +} + +/* + * helper function, find executable section by name. + */ +static int +find_elf_code(Elf *elf, const char *section, Elf_Data **psd, size_t *pidx) +{ + Elf_Scn *sc; + const Elf64_Ehdr *eh; + const Elf64_Shdr *sh; + Elf_Data *sd; + const char *sn; + int32_t rc; + + eh = elf64_getehdr(elf); + if (eh == NULL) { + rc = elf_errno(); + RTE_BPF_LOG(ERR, "%s(%p, %s) error code: %d(%s)\n", + __func__, elf, section, rc, elf_errmsg(rc)); + return -EINVAL; + } + + if (check_elf_header(eh) != 0) + return -EINVAL; + + /* find given section by name */ + for (sc = elf_nextscn(elf, NULL); sc != NULL; + sc = elf_nextscn(elf, sc)) { + sh = elf64_getshdr(sc); + sn = elf_strptr(elf, eh->e_shstrndx, sh->sh_name); + if (sn != NULL && strcmp(section, sn) == 0 && + sh->sh_type == SHT_PROGBITS && + sh->sh_flags == (SHF_ALLOC | SHF_EXECINSTR)) + break; + } + + sd = elf_getdata(sc, NULL); + if (sd == NULL || sd->d_size == 0 || + sd->d_size % sizeof(struct ebpf_insn) != 0) { + rc = elf_errno(); + RTE_BPF_LOG(ERR, "%s(%p, %s) error code: %d(%s)\n", + __func__, elf, section, rc, elf_errmsg(rc)); + return -EINVAL; + } + + *psd = sd; + *pidx = elf_ndxscn(sc); + return 0; +} + +/* + * helper function to process data from relocation table. + */ +static int +process_reloc(Elf *elf, size_t sym_idx, Elf64_Rel *re, size_t re_sz, + struct ebpf_insn *ins, size_t ins_sz, const struct rte_bpf_prm *prm) +{ + int32_t rc; + uint32_t i, n; + size_t ofs, sym; + const char *sn; + const Elf64_Ehdr *eh; + Elf_Scn *sc; + const Elf_Data *sd; + Elf64_Sym *sm; + + eh = elf64_getehdr(elf); + + /* get symtable by section index */ + sc = elf_getscn(elf, sym_idx); + sd = elf_getdata(sc, NULL); + if (sd == NULL) + return -EINVAL; + sm = sd->d_buf; + + n = re_sz / sizeof(re[0]); + for (i = 0; i != n; i++) { + + ofs = re[i].r_offset; + + /* retrieve index in the symtable */ + sym = ELF64_R_SYM(re[i].r_info); + if (sym * sizeof(sm[0]) >= sd->d_size) + return -EINVAL; + + sn = elf_strptr(elf, eh->e_shstrndx, sm[sym].st_name); + + rc = resolve_xsym(sn, ofs, ins, ins_sz, prm); + if (rc != 0) { + RTE_BPF_LOG(ERR, + "resolve_xsym(%s, %zu) error code: %d\n", + sn, ofs, rc); + return rc; + } + } + + return 0; +} + +/* + * helper function, find relocation information (if any) + * and update bpf code. + */ +static int +elf_reloc_code(Elf *elf, Elf_Data *ed, size_t sidx, + const struct rte_bpf_prm *prm) +{ + Elf64_Rel *re; + Elf_Scn *sc; + const Elf64_Shdr *sh; + const Elf_Data *sd; + int32_t rc; + + rc = 0; + + /* walk through all sections */ + for (sc = elf_nextscn(elf, NULL); sc != NULL && rc == 0; + sc = elf_nextscn(elf, sc)) { + + sh = elf64_getshdr(sc); + + /* relocation data for our code section */ + if (sh->sh_type == SHT_REL && sh->sh_info == sidx) { + sd = elf_getdata(sc, NULL); + if (sd == NULL || sd->d_size == 0 || + sd->d_size % sizeof(re[0]) != 0) + return -EINVAL; + rc = process_reloc(elf, sh->sh_link, + sd->d_buf, sd->d_size, ed->d_buf, ed->d_size, + prm); + } + } + + return rc; +} + +static struct rte_bpf * +bpf_load_elf(const struct rte_bpf_prm *prm, int32_t fd, const char *section) +{ + Elf *elf; + Elf_Data *sd; + size_t sidx; + int32_t rc; + struct rte_bpf *bpf; + struct rte_bpf_prm np; + + elf_version(EV_CURRENT); + elf = elf_begin(fd, ELF_C_READ, NULL); + + rc = find_elf_code(elf, section, &sd, &sidx); + if (rc == 0) + rc = elf_reloc_code(elf, sd, sidx, prm); + + if (rc == 0) { + np = prm[0]; + np.ins = sd->d_buf; + np.nb_ins = sd->d_size / sizeof(struct ebpf_insn); + bpf = rte_bpf_load(&np); + } else { + bpf = NULL; + rte_errno = -rc; + } + + elf_end(elf); + return bpf; +} + +__rte_experimental struct rte_bpf * +rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname, + const char *sname) +{ + int32_t fd, rc; + struct rte_bpf *bpf; + + if (prm == NULL || fname == NULL || sname == NULL) { + rte_errno = EINVAL; + return NULL; + } + + fd = open(fname, O_RDONLY); + if (fd < 0) { + rc = errno; + RTE_BPF_LOG(ERR, "%s(%s) error code: %d(%s)\n", + __func__, fname, rc, strerror(rc)); + rte_errno = EINVAL; + return NULL; + } + + bpf = bpf_load_elf(prm, fd, sname); + close(fd); + + if (bpf == NULL) { + RTE_BPF_LOG(ERR, + "%s(fname=\"%s\", sname=\"%s\") failed, " + "error code: %d\n", + __func__, fname, sname, rte_errno); + return NULL; + } + + RTE_BPF_LOG(INFO, "%s(fname=\"%s\", sname=\"%s\") " + "successfully creates %p(jit={.func=%p,.sz=%zu});\n", + __func__, fname, sname, bpf, bpf->jit.func, bpf->jit.sz); + return bpf; +} diff --git a/lib/librte_bpf/bpf_pkt.c b/lib/librte_bpf/bpf_pkt.c new file mode 100644 index 00000000..ab9daa52 --- /dev/null +++ b/lib/librte_bpf/bpf_pkt.c @@ -0,0 +1,605 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "bpf_impl.h" + +/* + * information about installed BPF rx/tx callback + */ + +struct bpf_eth_cbi { + /* used by both data & control path */ + uint32_t use; /*usage counter */ + const struct rte_eth_rxtx_callback *cb; /* callback handle */ + struct rte_bpf *bpf; + struct rte_bpf_jit jit; + /* used by control path only */ + LIST_ENTRY(bpf_eth_cbi) link; + uint16_t port; + uint16_t queue; +} __rte_cache_aligned; + +/* + * Odd number means that callback is used by datapath. + * Even number means that callback is not used by datapath. + */ +#define BPF_ETH_CBI_INUSE 1 + +/* + * List to manage RX/TX installed callbacks. + */ +LIST_HEAD(bpf_eth_cbi_list, bpf_eth_cbi); + +enum { + BPF_ETH_RX, + BPF_ETH_TX, + BPF_ETH_NUM, +}; + +/* + * information about all installed BPF rx/tx callbacks + */ +struct bpf_eth_cbh { + rte_spinlock_t lock; + struct bpf_eth_cbi_list list; + uint32_t type; +}; + +static struct bpf_eth_cbh rx_cbh = { + .lock = RTE_SPINLOCK_INITIALIZER, + .list = LIST_HEAD_INITIALIZER(list), + .type = BPF_ETH_RX, +}; + +static struct bpf_eth_cbh tx_cbh = { + .lock = RTE_SPINLOCK_INITIALIZER, + .list = LIST_HEAD_INITIALIZER(list), + .type = BPF_ETH_TX, +}; + +/* + * Marks given callback as used by datapath. + */ +static __rte_always_inline void +bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi) +{ + cbi->use++; + /* make sure no store/load reordering could happen */ + rte_smp_mb(); +} + +/* + * Marks given callback list as not used by datapath. + */ +static __rte_always_inline void +bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi) +{ + /* make sure all previous loads are completed */ + rte_smp_rmb(); + cbi->use++; +} + +/* + * Waits till datapath finished using given callback. + */ +static void +bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) +{ + uint32_t nuse, puse; + + /* make sure all previous loads and stores are completed */ + rte_smp_mb(); + + puse = cbi->use; + + /* in use, busy wait till current RX/TX iteration is finished */ + if ((puse & BPF_ETH_CBI_INUSE) != 0) { + do { + rte_pause(); + rte_compiler_barrier(); + nuse = cbi->use; + } while (nuse == puse); + } +} + +static void +bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc) +{ + bc->bpf = NULL; + memset(&bc->jit, 0, sizeof(bc->jit)); +} + +static struct bpf_eth_cbi * +bpf_eth_cbh_find(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue) +{ + struct bpf_eth_cbi *cbi; + + LIST_FOREACH(cbi, &cbh->list, link) { + if (cbi->port == port && cbi->queue == queue) + break; + } + return cbi; +} + +static struct bpf_eth_cbi * +bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue) +{ + struct bpf_eth_cbi *cbi; + + /* return an existing one */ + cbi = bpf_eth_cbh_find(cbh, port, queue); + if (cbi != NULL) + return cbi; + + cbi = rte_zmalloc(NULL, sizeof(*cbi), RTE_CACHE_LINE_SIZE); + if (cbi != NULL) { + cbi->port = port; + cbi->queue = queue; + LIST_INSERT_HEAD(&cbh->list, cbi, link); + } + return cbi; +} + +/* + * BPF packet processing routinies. + */ + +static inline uint32_t +apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num, + uint32_t drop) +{ + uint32_t i, j, k; + struct rte_mbuf *dr[num]; + + for (i = 0, j = 0, k = 0; i != num; i++) { + + /* filter matches */ + if (rc[i] != 0) + mb[j++] = mb[i]; + /* no match */ + else + dr[k++] = mb[i]; + } + + if (drop != 0) { + /* free filtered out mbufs */ + for (i = 0; i != k; i++) + rte_pktmbuf_free(dr[i]); + } else { + /* copy filtered out mbufs beyond good ones */ + for (i = 0; i != k; i++) + mb[j + i] = dr[i]; + } + + return j; +} + +static inline uint32_t +pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num, + uint32_t drop) +{ + uint32_t i; + void *dp[num]; + uint64_t rc[num]; + + for (i = 0; i != num; i++) + dp[i] = rte_pktmbuf_mtod(mb[i], void *); + + rte_bpf_exec_burst(bpf, dp, rc, num); + return apply_filter(mb, rc, num, drop); +} + +static inline uint32_t +pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[], + uint32_t num, uint32_t drop) +{ + uint32_t i, n; + void *dp; + uint64_t rc[num]; + + n = 0; + for (i = 0; i != num; i++) { + dp = rte_pktmbuf_mtod(mb[i], void *); + rc[i] = jit->func(dp); + n += (rc[i] == 0); + } + + if (n != 0) + num = apply_filter(mb, rc, num, drop); + + return num; +} + +static inline uint32_t +pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num, + uint32_t drop) +{ + uint64_t rc[num]; + + rte_bpf_exec_burst(bpf, (void **)mb, rc, num); + return apply_filter(mb, rc, num, drop); +} + +static inline uint32_t +pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[], + uint32_t num, uint32_t drop) +{ + uint32_t i, n; + uint64_t rc[num]; + + n = 0; + for (i = 0; i != num; i++) { + rc[i] = jit->func(mb[i]); + n += (rc[i] == 0); + } + + if (n != 0) + num = apply_filter(mb, rc, num, drop); + + return num; +} + +/* + * RX/TX callbacks for raw data bpf. + */ + +static uint16_t +bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +/* + * RX/TX callbacks for mbuf. + */ + +static uint16_t +bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static rte_rx_callback_fn +select_rx_callback(enum rte_bpf_arg_type type, uint32_t flags) +{ + if (flags & RTE_BPF_ETH_F_JIT) { + if (type == RTE_BPF_ARG_PTR) + return bpf_rx_callback_jit; + else if (type == RTE_BPF_ARG_PTR_MBUF) + return bpf_rx_callback_mb_jit; + } else if (type == RTE_BPF_ARG_PTR) + return bpf_rx_callback_vm; + else if (type == RTE_BPF_ARG_PTR_MBUF) + return bpf_rx_callback_mb_vm; + + return NULL; +} + +static rte_tx_callback_fn +select_tx_callback(enum rte_bpf_arg_type type, uint32_t flags) +{ + if (flags & RTE_BPF_ETH_F_JIT) { + if (type == RTE_BPF_ARG_PTR) + return bpf_tx_callback_jit; + else if (type == RTE_BPF_ARG_PTR_MBUF) + return bpf_tx_callback_mb_jit; + } else if (type == RTE_BPF_ARG_PTR) + return bpf_tx_callback_vm; + else if (type == RTE_BPF_ARG_PTR_MBUF) + return bpf_tx_callback_mb_vm; + + return NULL; +} + +/* + * helper function to perform BPF unload for given port/queue. + * have to introduce extra complexity (and possible slowdown) here, + * as right now there is no safe generic way to remove RX/TX callback + * while IO is active. + * Still don't free memory allocated for callback handle itself, + * again right now there is no safe way to do that without stopping RX/TX + * on given port/queue first. + */ +static void +bpf_eth_cbi_unload(struct bpf_eth_cbi *bc) +{ + /* mark this cbi as empty */ + bc->cb = NULL; + rte_smp_mb(); + + /* make sure datapath doesn't use bpf anymore, then destroy bpf */ + bpf_eth_cbi_wait(bc); + rte_bpf_destroy(bc->bpf); + bpf_eth_cbi_cleanup(bc); +} + +static void +bpf_eth_unload(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue) +{ + struct bpf_eth_cbi *bc; + + bc = bpf_eth_cbh_find(cbh, port, queue); + if (bc == NULL || bc->cb == NULL) + return; + + if (cbh->type == BPF_ETH_RX) + rte_eth_remove_rx_callback(port, queue, bc->cb); + else + rte_eth_remove_tx_callback(port, queue, bc->cb); + + bpf_eth_cbi_unload(bc); +} + + +__rte_experimental void +rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue) +{ + struct bpf_eth_cbh *cbh; + + cbh = &rx_cbh; + rte_spinlock_lock(&cbh->lock); + bpf_eth_unload(cbh, port, queue); + rte_spinlock_unlock(&cbh->lock); +} + +__rte_experimental void +rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue) +{ + struct bpf_eth_cbh *cbh; + + cbh = &tx_cbh; + rte_spinlock_lock(&cbh->lock); + bpf_eth_unload(cbh, port, queue); + rte_spinlock_unlock(&cbh->lock); +} + +static int +bpf_eth_elf_load(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm, const char *fname, const char *sname, + uint32_t flags) +{ + int32_t rc; + struct bpf_eth_cbi *bc; + struct rte_bpf *bpf; + rte_rx_callback_fn frx; + rte_tx_callback_fn ftx; + struct rte_bpf_jit jit; + + frx = NULL; + ftx = NULL; + + if (prm == NULL || rte_eth_dev_is_valid_port(port) == 0 || + queue >= RTE_MAX_QUEUES_PER_PORT) + return -EINVAL; + + if (cbh->type == BPF_ETH_RX) + frx = select_rx_callback(prm->prog_arg.type, flags); + else + ftx = select_tx_callback(prm->prog_arg.type, flags); + + if (frx == NULL && ftx == NULL) { + RTE_BPF_LOG(ERR, "%s(%u, %u): no callback selected;\n", + __func__, port, queue); + return -EINVAL; + } + + bpf = rte_bpf_elf_load(prm, fname, sname); + if (bpf == NULL) + return -rte_errno; + + rte_bpf_get_jit(bpf, &jit); + + if ((flags & RTE_BPF_ETH_F_JIT) != 0 && jit.func == NULL) { + RTE_BPF_LOG(ERR, "%s(%u, %u): no JIT generated;\n", + __func__, port, queue); + rte_bpf_destroy(bpf); + return -ENOTSUP; + } + + /* setup/update global callback info */ + bc = bpf_eth_cbh_add(cbh, port, queue); + if (bc == NULL) + return -ENOMEM; + + /* remove old one, if any */ + if (bc->cb != NULL) + bpf_eth_unload(cbh, port, queue); + + bc->bpf = bpf; + bc->jit = jit; + + if (cbh->type == BPF_ETH_RX) + bc->cb = rte_eth_add_rx_callback(port, queue, frx, bc); + else + bc->cb = rte_eth_add_tx_callback(port, queue, ftx, bc); + + if (bc->cb == NULL) { + rc = -rte_errno; + rte_bpf_destroy(bpf); + bpf_eth_cbi_cleanup(bc); + } else + rc = 0; + + return rc; +} + +__rte_experimental int +rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm, const char *fname, const char *sname, + uint32_t flags) +{ + int32_t rc; + struct bpf_eth_cbh *cbh; + + cbh = &rx_cbh; + rte_spinlock_lock(&cbh->lock); + rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags); + rte_spinlock_unlock(&cbh->lock); + + return rc; +} + +__rte_experimental int +rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm, const char *fname, const char *sname, + uint32_t flags) +{ + int32_t rc; + struct bpf_eth_cbh *cbh; + + cbh = &tx_cbh; + rte_spinlock_lock(&cbh->lock); + rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags); + rte_spinlock_unlock(&cbh->lock); + + return rc; +} diff --git a/lib/librte_bpf/bpf_validate.c b/lib/librte_bpf/bpf_validate.c new file mode 100644 index 00000000..83983efc --- /dev/null +++ b/lib/librte_bpf/bpf_validate.c @@ -0,0 +1,2248 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bpf_impl.h" + +struct bpf_reg_val { + struct rte_bpf_arg v; + uint64_t mask; + struct { + int64_t min; + int64_t max; + } s; + struct { + uint64_t min; + uint64_t max; + } u; +}; + +struct bpf_eval_state { + struct bpf_reg_val rv[EBPF_REG_NUM]; + struct bpf_reg_val sv[MAX_BPF_STACK_SIZE / sizeof(uint64_t)]; +}; + +/* possible instruction node colour */ +enum { + WHITE, + GREY, + BLACK, + MAX_NODE_COLOUR +}; + +/* possible edge types */ +enum { + UNKNOWN_EDGE, + TREE_EDGE, + BACK_EDGE, + CROSS_EDGE, + MAX_EDGE_TYPE +}; + +#define MAX_EDGES 2 + +struct inst_node { + uint8_t colour; + uint8_t nb_edge:4; + uint8_t cur_edge:4; + uint8_t edge_type[MAX_EDGES]; + uint32_t edge_dest[MAX_EDGES]; + uint32_t prev_node; + struct bpf_eval_state *evst; +}; + +struct bpf_verifier { + const struct rte_bpf_prm *prm; + struct inst_node *in; + uint64_t stack_sz; + uint32_t nb_nodes; + uint32_t nb_jcc_nodes; + uint32_t node_colour[MAX_NODE_COLOUR]; + uint32_t edge_type[MAX_EDGE_TYPE]; + struct bpf_eval_state *evst; + struct inst_node *evin; + struct { + uint32_t num; + uint32_t cur; + struct bpf_eval_state *ent; + } evst_pool; +}; + +struct bpf_ins_check { + struct { + uint16_t dreg; + uint16_t sreg; + } mask; + struct { + uint16_t min; + uint16_t max; + } off; + struct { + uint32_t min; + uint32_t max; + } imm; + const char * (*check)(const struct ebpf_insn *); + const char * (*eval)(struct bpf_verifier *, const struct ebpf_insn *); +}; + +#define ALL_REGS RTE_LEN2MASK(EBPF_REG_NUM, uint16_t) +#define WRT_REGS RTE_LEN2MASK(EBPF_REG_10, uint16_t) +#define ZERO_REG RTE_LEN2MASK(EBPF_REG_1, uint16_t) + +/* + * check and evaluate functions for particular instruction types. + */ + +static const char * +check_alu_bele(const struct ebpf_insn *ins) +{ + if (ins->imm != 16 && ins->imm != 32 && ins->imm != 64) + return "invalid imm field"; + return NULL; +} + +static const char * +eval_exit(struct bpf_verifier *bvf, const struct ebpf_insn *ins) +{ + RTE_SET_USED(ins); + if (bvf->evst->rv[EBPF_REG_0].v.type == RTE_BPF_ARG_UNDEF) + return "undefined return value"; + return NULL; +} + +/* setup max possible with this mask bounds */ +static void +eval_umax_bound(struct bpf_reg_val *rv, uint64_t mask) +{ + rv->u.max = mask; + rv->u.min = 0; +} + +static void +eval_smax_bound(struct bpf_reg_val *rv, uint64_t mask) +{ + rv->s.max = mask >> 1; + rv->s.min = rv->s.max ^ UINT64_MAX; +} + +static void +eval_max_bound(struct bpf_reg_val *rv, uint64_t mask) +{ + eval_umax_bound(rv, mask); + eval_smax_bound(rv, mask); +} + +static void +eval_fill_max_bound(struct bpf_reg_val *rv, uint64_t mask) +{ + eval_max_bound(rv, mask); + rv->v.type = RTE_BPF_ARG_RAW; + rv->mask = mask; +} + +static void +eval_fill_imm64(struct bpf_reg_val *rv, uint64_t mask, uint64_t val) +{ + rv->mask = mask; + rv->s.min = val; + rv->s.max = val; + rv->u.min = val; + rv->u.max = val; +} + +static void +eval_fill_imm(struct bpf_reg_val *rv, uint64_t mask, int32_t imm) +{ + uint64_t v; + + v = (uint64_t)imm & mask; + + rv->v.type = RTE_BPF_ARG_RAW; + eval_fill_imm64(rv, mask, v); +} + +static const char * +eval_ld_imm64(struct bpf_verifier *bvf, const struct ebpf_insn *ins) +{ + uint32_t i; + uint64_t val; + struct bpf_reg_val *rd; + + val = (uint32_t)ins[0].imm | (uint64_t)(uint32_t)ins[1].imm << 32; + + rd = bvf->evst->rv + ins->dst_reg; + rd->v.type = RTE_BPF_ARG_RAW; + eval_fill_imm64(rd, UINT64_MAX, val); + + for (i = 0; i != bvf->prm->nb_xsym; i++) { + + /* load of external variable */ + if (bvf->prm->xsym[i].type == RTE_BPF_XTYPE_VAR && + (uintptr_t)bvf->prm->xsym[i].var.val == val) { + rd->v = bvf->prm->xsym[i].var.desc; + eval_fill_imm64(rd, UINT64_MAX, 0); + break; + } + } + + return NULL; +} + +static void +eval_apply_mask(struct bpf_reg_val *rv, uint64_t mask) +{ + struct bpf_reg_val rt; + + rt.u.min = rv->u.min & mask; + rt.u.max = rv->u.max & mask; + if (rt.u.min != rv->u.min || rt.u.max != rv->u.max) { + rv->u.max = RTE_MAX(rt.u.max, mask); + rv->u.min = 0; + } + + eval_smax_bound(&rt, mask); + rv->s.max = RTE_MIN(rt.s.max, rv->s.max); + rv->s.min = RTE_MAX(rt.s.min, rv->s.min); + + rv->mask = mask; +} + +static void +eval_add(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk) +{ + struct bpf_reg_val rv; + + rv.u.min = (rd->u.min + rs->u.min) & msk; + rv.u.max = (rd->u.min + rs->u.max) & msk; + rv.s.min = (rd->s.min + rs->s.min) & msk; + rv.s.max = (rd->s.max + rs->s.max) & msk; + + /* + * if at least one of the operands is not constant, + * then check for overflow + */ + if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) && + (rv.u.min < rd->u.min || rv.u.max < rd->u.max)) + eval_umax_bound(&rv, msk); + + if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) && + (((rs->s.min < 0 && rv.s.min > rd->s.min) || + rv.s.min < rd->s.min) || + ((rs->s.max < 0 && rv.s.max > rd->s.max) || + rv.s.max < rd->s.max))) + eval_smax_bound(&rv, msk); + + rd->s = rv.s; + rd->u = rv.u; +} + +static void +eval_sub(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk) +{ + struct bpf_reg_val rv; + + rv.u.min = (rd->u.min - rs->u.min) & msk; + rv.u.max = (rd->u.min - rs->u.max) & msk; + rv.s.min = (rd->s.min - rs->s.min) & msk; + rv.s.max = (rd->s.max - rs->s.max) & msk; + + /* + * if at least one of the operands is not constant, + * then check for overflow + */ + if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) && + (rv.u.min > rd->u.min || rv.u.max > rd->u.max)) + eval_umax_bound(&rv, msk); + + if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) && + (((rs->s.min < 0 && rv.s.min < rd->s.min) || + rv.s.min > rd->s.min) || + ((rs->s.max < 0 && rv.s.max < rd->s.max) || + rv.s.max > rd->s.max))) + eval_smax_bound(&rv, msk); + + rd->s = rv.s; + rd->u = rv.u; +} + +static void +eval_lsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz, + uint64_t msk) +{ + /* check if shift value is less then max result bits */ + if (rs->u.max >= opsz) { + eval_max_bound(rd, msk); + return; + } + + /* check for overflow */ + if (rd->u.max > RTE_LEN2MASK(opsz - rs->u.max, uint64_t)) + eval_umax_bound(rd, msk); + else { + rd->u.max <<= rs->u.max; + rd->u.min <<= rs->u.min; + } + + /* check that dreg values are and would remain always positive */ + if ((uint64_t)rd->s.min >> (opsz - 1) != 0 || rd->s.max >= + RTE_LEN2MASK(opsz - rs->u.max - 1, int64_t)) + eval_smax_bound(rd, msk); + else { + rd->s.max <<= rs->u.max; + rd->s.min <<= rs->u.min; + } +} + +static void +eval_rsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz, + uint64_t msk) +{ + /* check if shift value is less then max result bits */ + if (rs->u.max >= opsz) { + eval_max_bound(rd, msk); + return; + } + + rd->u.max >>= rs->u.min; + rd->u.min >>= rs->u.max; + + /* check that dreg values are always positive */ + if ((uint64_t)rd->s.min >> (opsz - 1) != 0) + eval_smax_bound(rd, msk); + else { + rd->s.max >>= rs->u.min; + rd->s.min >>= rs->u.max; + } +} + +static void +eval_arsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz, + uint64_t msk) +{ + uint32_t shv; + + /* check if shift value is less then max result bits */ + if (rs->u.max >= opsz) { + eval_max_bound(rd, msk); + return; + } + + rd->u.max = (int64_t)rd->u.max >> rs->u.min; + rd->u.min = (int64_t)rd->u.min >> rs->u.max; + + /* if we have 32-bit values - extend them to 64-bit */ + if (opsz == sizeof(uint32_t) * CHAR_BIT) { + rd->s.min <<= opsz; + rd->s.max <<= opsz; + shv = opsz; + } else + shv = 0; + + if (rd->s.min < 0) + rd->s.min = (rd->s.min >> (rs->u.min + shv)) & msk; + else + rd->s.min = (rd->s.min >> (rs->u.max + shv)) & msk; + + if (rd->s.max < 0) + rd->s.max = (rd->s.max >> (rs->u.max + shv)) & msk; + else + rd->s.max = (rd->s.max >> (rs->u.min + shv)) & msk; +} + +static uint64_t +eval_umax_bits(uint64_t v, size_t opsz) +{ + if (v == 0) + return 0; + + v = __builtin_clzll(v); + return RTE_LEN2MASK(opsz - v, uint64_t); +} + +/* estimate max possible value for (v1 & v2) */ +static uint64_t +eval_uand_max(uint64_t v1, uint64_t v2, size_t opsz) +{ + v1 = eval_umax_bits(v1, opsz); + v2 = eval_umax_bits(v2, opsz); + return (v1 & v2); +} + +/* estimate max possible value for (v1 | v2) */ +static uint64_t +eval_uor_max(uint64_t v1, uint64_t v2, size_t opsz) +{ + v1 = eval_umax_bits(v1, opsz); + v2 = eval_umax_bits(v2, opsz); + return (v1 | v2); +} + +static void +eval_and(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz, + uint64_t msk) +{ + /* both operands are constants */ + if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) { + rd->u.min &= rs->u.min; + rd->u.max &= rs->u.max; + } else { + rd->u.max = eval_uand_max(rd->u.max, rs->u.max, opsz); + rd->u.min &= rs->u.min; + } + + /* both operands are constants */ + if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) { + rd->s.min &= rs->s.min; + rd->s.max &= rs->s.max; + /* at least one of operand is non-negative */ + } else if (rd->s.min >= 0 || rs->s.min >= 0) { + rd->s.max = eval_uand_max(rd->s.max & (msk >> 1), + rs->s.max & (msk >> 1), opsz); + rd->s.min &= rs->s.min; + } else + eval_smax_bound(rd, msk); +} + +static void +eval_or(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz, + uint64_t msk) +{ + /* both operands are constants */ + if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) { + rd->u.min |= rs->u.min; + rd->u.max |= rs->u.max; + } else { + rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz); + rd->u.min |= rs->u.min; + } + + /* both operands are constants */ + if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) { + rd->s.min |= rs->s.min; + rd->s.max |= rs->s.max; + + /* both operands are non-negative */ + } else if (rd->s.min >= 0 || rs->s.min >= 0) { + rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz); + rd->s.min |= rs->s.min; + } else + eval_smax_bound(rd, msk); +} + +static void +eval_xor(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz, + uint64_t msk) +{ + /* both operands are constants */ + if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) { + rd->u.min ^= rs->u.min; + rd->u.max ^= rs->u.max; + } else { + rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz); + rd->u.min = 0; + } + + /* both operands are constants */ + if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) { + rd->s.min ^= rs->s.min; + rd->s.max ^= rs->s.max; + + /* both operands are non-negative */ + } else if (rd->s.min >= 0 || rs->s.min >= 0) { + rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz); + rd->s.min = 0; + } else + eval_smax_bound(rd, msk); +} + +static void +eval_mul(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz, + uint64_t msk) +{ + /* both operands are constants */ + if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) { + rd->u.min = (rd->u.min * rs->u.min) & msk; + rd->u.max = (rd->u.max * rs->u.max) & msk; + /* check for overflow */ + } else if (rd->u.max <= msk >> opsz / 2 && rs->u.max <= msk >> opsz) { + rd->u.max *= rs->u.max; + rd->u.min *= rd->u.min; + } else + eval_umax_bound(rd, msk); + + /* both operands are constants */ + if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) { + rd->s.min = (rd->s.min * rs->s.min) & msk; + rd->s.max = (rd->s.max * rs->s.max) & msk; + /* check that both operands are positive and no overflow */ + } else if (rd->s.min >= 0 && rs->s.min >= 0) { + rd->s.max *= rs->s.max; + rd->s.min *= rd->s.min; + } else + eval_smax_bound(rd, msk); +} + +static const char * +eval_divmod(uint32_t op, struct bpf_reg_val *rd, struct bpf_reg_val *rs, + size_t opsz, uint64_t msk) +{ + /* both operands are constants */ + if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) { + if (rs->u.max == 0) + return "division by 0"; + if (op == BPF_DIV) { + rd->u.min /= rs->u.min; + rd->u.max /= rs->u.max; + } else { + rd->u.min %= rs->u.min; + rd->u.max %= rs->u.max; + } + } else { + if (op == BPF_MOD) + rd->u.max = RTE_MIN(rd->u.max, rs->u.max - 1); + else + rd->u.max = rd->u.max; + rd->u.min = 0; + } + + /* if we have 32-bit values - extend them to 64-bit */ + if (opsz == sizeof(uint32_t) * CHAR_BIT) { + rd->s.min = (int32_t)rd->s.min; + rd->s.max = (int32_t)rd->s.max; + rs->s.min = (int32_t)rs->s.min; + rs->s.max = (int32_t)rs->s.max; + } + + /* both operands are constants */ + if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) { + if (rs->s.max == 0) + return "division by 0"; + if (op == BPF_DIV) { + rd->s.min /= rs->s.min; + rd->s.max /= rs->s.max; + } else { + rd->s.min %= rs->s.min; + rd->s.max %= rs->s.max; + } + } else if (op == BPF_MOD) { + rd->s.min = RTE_MAX(rd->s.max, 0); + rd->s.min = RTE_MIN(rd->s.min, 0); + } else + eval_smax_bound(rd, msk); + + rd->s.max &= msk; + rd->s.min &= msk; + + return NULL; +} + +static void +eval_neg(struct bpf_reg_val *rd, size_t opsz, uint64_t msk) +{ + uint64_t ux, uy; + int64_t sx, sy; + + /* if we have 32-bit values - extend them to 64-bit */ + if (opsz == sizeof(uint32_t) * CHAR_BIT) { + rd->u.min = (int32_t)rd->u.min; + rd->u.max = (int32_t)rd->u.max; + } + + ux = -(int64_t)rd->u.min & msk; + uy = -(int64_t)rd->u.max & msk; + + rd->u.max = RTE_MAX(ux, uy); + rd->u.min = RTE_MIN(ux, uy); + + /* if we have 32-bit values - extend them to 64-bit */ + if (opsz == sizeof(uint32_t) * CHAR_BIT) { + rd->s.min = (int32_t)rd->s.min; + rd->s.max = (int32_t)rd->s.max; + } + + sx = -rd->s.min & msk; + sy = -rd->s.max & msk; + + rd->s.max = RTE_MAX(sx, sy); + rd->s.min = RTE_MIN(sx, sy); +} + +/* + * check that destination and source operand are in defined state. + */ +static const char * +eval_defined(const struct bpf_reg_val *dst, const struct bpf_reg_val *src) +{ + if (dst != NULL && dst->v.type == RTE_BPF_ARG_UNDEF) + return "dest reg value is undefined"; + if (src != NULL && src->v.type == RTE_BPF_ARG_UNDEF) + return "src reg value is undefined"; + return NULL; +} + +static const char * +eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins) +{ + uint64_t msk; + uint32_t op; + size_t opsz; + const char *err; + struct bpf_eval_state *st; + struct bpf_reg_val *rd, rs; + + opsz = (BPF_CLASS(ins->code) == BPF_ALU) ? + sizeof(uint32_t) : sizeof(uint64_t); + opsz = opsz * CHAR_BIT; + msk = RTE_LEN2MASK(opsz, uint64_t); + + st = bvf->evst; + rd = st->rv + ins->dst_reg; + + if (BPF_SRC(ins->code) == BPF_X) { + rs = st->rv[ins->src_reg]; + eval_apply_mask(&rs, msk); + } else + eval_fill_imm(&rs, msk, ins->imm); + + eval_apply_mask(rd, msk); + + op = BPF_OP(ins->code); + + err = eval_defined((op != EBPF_MOV) ? rd : NULL, + (op != BPF_NEG) ? &rs : NULL); + if (err != NULL) + return err; + + if (op == BPF_ADD) + eval_add(rd, &rs, msk); + else if (op == BPF_SUB) + eval_sub(rd, &rs, msk); + else if (op == BPF_LSH) + eval_lsh(rd, &rs, opsz, msk); + else if (op == BPF_RSH) + eval_rsh(rd, &rs, opsz, msk); + else if (op == EBPF_ARSH) + eval_arsh(rd, &rs, opsz, msk); + else if (op == BPF_AND) + eval_and(rd, &rs, opsz, msk); + else if (op == BPF_OR) + eval_or(rd, &rs, opsz, msk); + else if (op == BPF_XOR) + eval_xor(rd, &rs, opsz, msk); + else if (op == BPF_MUL) + eval_mul(rd, &rs, opsz, msk); + else if (op == BPF_DIV || op == BPF_MOD) + err = eval_divmod(op, rd, &rs, opsz, msk); + else if (op == BPF_NEG) + eval_neg(rd, opsz, msk); + else if (op == EBPF_MOV) + *rd = rs; + else + eval_max_bound(rd, msk); + + return err; +} + +static const char * +eval_bele(struct bpf_verifier *bvf, const struct ebpf_insn *ins) +{ + uint64_t msk; + struct bpf_eval_state *st; + struct bpf_reg_val *rd; + const char *err; + + msk = RTE_LEN2MASK(ins->imm, uint64_t); + + st = bvf->evst; + rd = st->rv + ins->dst_reg; + + err = eval_defined(rd, NULL); + if (err != NULL) + return err; + +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_BE)) + eval_max_bound(rd, msk); + else + eval_apply_mask(rd, msk); +#else + if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_LE)) + eval_max_bound(rd, msk); + else + eval_apply_mask(rd, msk); +#endif + + return NULL; +} + +static const char * +eval_ptr(struct bpf_verifier *bvf, struct bpf_reg_val *rm, uint32_t opsz, + uint32_t align, int16_t off) +{ + struct bpf_reg_val rv; + + /* calculate reg + offset */ + eval_fill_imm(&rv, rm->mask, off); + eval_add(rm, &rv, rm->mask); + + if (RTE_BPF_ARG_PTR_TYPE(rm->v.type) == 0) + return "destination is not a pointer"; + + if (rm->mask != UINT64_MAX) + return "pointer truncation"; + + if (rm->u.max + opsz > rm->v.size || + (uint64_t)rm->s.max + opsz > rm->v.size || + rm->s.min < 0) + return "memory boundary violation"; + + if (rm->u.max % align != 0) + return "unaligned memory access"; + + if (rm->v.type == RTE_BPF_ARG_PTR_STACK) { + + if (rm->u.max != rm->u.min || rm->s.max != rm->s.min || + rm->u.max != (uint64_t)rm->s.max) + return "stack access with variable offset"; + + bvf->stack_sz = RTE_MAX(bvf->stack_sz, rm->v.size - rm->u.max); + + /* pointer to mbuf */ + } else if (rm->v.type == RTE_BPF_ARG_PTR_MBUF) { + + if (rm->u.max != rm->u.min || rm->s.max != rm->s.min || + rm->u.max != (uint64_t)rm->s.max) + return "mbuf access with variable offset"; + } + + return NULL; +} + +static void +eval_max_load(struct bpf_reg_val *rv, uint64_t mask) +{ + eval_umax_bound(rv, mask); + + /* full 64-bit load */ + if (mask == UINT64_MAX) + eval_smax_bound(rv, mask); + + /* zero-extend load */ + rv->s.min = rv->u.min; + rv->s.max = rv->u.max; +} + + +static const char * +eval_load(struct bpf_verifier *bvf, const struct ebpf_insn *ins) +{ + uint32_t opsz; + uint64_t msk; + const char *err; + struct bpf_eval_state *st; + struct bpf_reg_val *rd, rs; + const struct bpf_reg_val *sv; + + st = bvf->evst; + rd = st->rv + ins->dst_reg; + rs = st->rv[ins->src_reg]; + opsz = bpf_size(BPF_SIZE(ins->code)); + msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t); + + err = eval_ptr(bvf, &rs, opsz, 1, ins->off); + if (err != NULL) + return err; + + if (rs.v.type == RTE_BPF_ARG_PTR_STACK) { + + sv = st->sv + rs.u.max / sizeof(uint64_t); + if (sv->v.type == RTE_BPF_ARG_UNDEF || sv->mask < msk) + return "undefined value on the stack"; + + *rd = *sv; + + /* pointer to mbuf */ + } else if (rs.v.type == RTE_BPF_ARG_PTR_MBUF) { + + if (rs.u.max == offsetof(struct rte_mbuf, next)) { + eval_fill_imm(rd, msk, 0); + rd->v = rs.v; + } else if (rs.u.max == offsetof(struct rte_mbuf, buf_addr)) { + eval_fill_imm(rd, msk, 0); + rd->v.type = RTE_BPF_ARG_PTR; + rd->v.size = rs.v.buf_size; + } else if (rs.u.max == offsetof(struct rte_mbuf, data_off)) { + eval_fill_imm(rd, msk, RTE_PKTMBUF_HEADROOM); + rd->v.type = RTE_BPF_ARG_RAW; + } else { + eval_max_load(rd, msk); + rd->v.type = RTE_BPF_ARG_RAW; + } + + /* pointer to raw data */ + } else { + eval_max_load(rd, msk); + rd->v.type = RTE_BPF_ARG_RAW; + } + + return NULL; +} + +static const char * +eval_mbuf_store(const struct bpf_reg_val *rv, uint32_t opsz) +{ + uint32_t i; + + static const struct { + size_t off; + size_t sz; + } mbuf_ro_fileds[] = { + { .off = offsetof(struct rte_mbuf, buf_addr), }, + { .off = offsetof(struct rte_mbuf, refcnt), }, + { .off = offsetof(struct rte_mbuf, nb_segs), }, + { .off = offsetof(struct rte_mbuf, buf_len), }, + { .off = offsetof(struct rte_mbuf, pool), }, + { .off = offsetof(struct rte_mbuf, next), }, + { .off = offsetof(struct rte_mbuf, priv_size), }, + }; + + for (i = 0; i != RTE_DIM(mbuf_ro_fileds) && + (mbuf_ro_fileds[i].off + mbuf_ro_fileds[i].sz <= + rv->u.max || rv->u.max + opsz <= mbuf_ro_fileds[i].off); + i++) + ; + + if (i != RTE_DIM(mbuf_ro_fileds)) + return "store to the read-only mbuf field"; + + return NULL; + +} + +static const char * +eval_store(struct bpf_verifier *bvf, const struct ebpf_insn *ins) +{ + uint32_t opsz; + uint64_t msk; + const char *err; + struct bpf_eval_state *st; + struct bpf_reg_val rd, rs, *sv; + + opsz = bpf_size(BPF_SIZE(ins->code)); + msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t); + + st = bvf->evst; + rd = st->rv[ins->dst_reg]; + + if (BPF_CLASS(ins->code) == BPF_STX) { + rs = st->rv[ins->src_reg]; + eval_apply_mask(&rs, msk); + } else + eval_fill_imm(&rs, msk, ins->imm); + + err = eval_defined(NULL, &rs); + if (err != NULL) + return err; + + err = eval_ptr(bvf, &rd, opsz, 1, ins->off); + if (err != NULL) + return err; + + if (rd.v.type == RTE_BPF_ARG_PTR_STACK) { + + sv = st->sv + rd.u.max / sizeof(uint64_t); + if (BPF_CLASS(ins->code) == BPF_STX && + BPF_MODE(ins->code) == EBPF_XADD) + eval_max_bound(sv, msk); + else + *sv = rs; + + /* pointer to mbuf */ + } else if (rd.v.type == RTE_BPF_ARG_PTR_MBUF) { + err = eval_mbuf_store(&rd, opsz); + if (err != NULL) + return err; + } + + return NULL; +} + +static const char * +eval_func_arg(struct bpf_verifier *bvf, const struct rte_bpf_arg *arg, + struct bpf_reg_val *rv) +{ + uint32_t i, n; + struct bpf_eval_state *st; + const char *err; + + st = bvf->evst; + + if (rv->v.type == RTE_BPF_ARG_UNDEF) + return "Undefined argument type"; + + if (arg->type != rv->v.type && + arg->type != RTE_BPF_ARG_RAW && + (arg->type != RTE_BPF_ARG_PTR || + RTE_BPF_ARG_PTR_TYPE(rv->v.type) == 0)) + return "Invalid argument type"; + + err = NULL; + + /* argument is a pointer */ + if (RTE_BPF_ARG_PTR_TYPE(arg->type) != 0) { + + err = eval_ptr(bvf, rv, arg->size, 1, 0); + + /* + * pointer to the variable on the stack is passed + * as an argument, mark stack space it occupies as initialized. + */ + if (err == NULL && rv->v.type == RTE_BPF_ARG_PTR_STACK) { + + i = rv->u.max / sizeof(uint64_t); + n = i + arg->size / sizeof(uint64_t); + while (i != n) { + eval_fill_max_bound(st->sv + i, UINT64_MAX); + i++; + }; + } + } + + return err; +} + +static const char * +eval_call(struct bpf_verifier *bvf, const struct ebpf_insn *ins) +{ + uint64_t msk; + uint32_t i, idx; + struct bpf_reg_val *rv; + const struct rte_bpf_xsym *xsym; + const char *err; + + idx = ins->imm; + + if (idx >= bvf->prm->nb_xsym || + bvf->prm->xsym[idx].type != RTE_BPF_XTYPE_FUNC) + return "invalid external function index"; + + /* for now don't support function calls on 32 bit platform */ + if (sizeof(uint64_t) != sizeof(uintptr_t)) + return "function calls are supported only for 64 bit apps"; + + xsym = bvf->prm->xsym + idx; + + /* evaluate function arguments */ + err = NULL; + for (i = 0; i != xsym->func.nb_args && err == NULL; i++) { + err = eval_func_arg(bvf, xsym->func.args + i, + bvf->evst->rv + EBPF_REG_1 + i); + } + + /* R1-R5 argument/scratch registers */ + for (i = EBPF_REG_1; i != EBPF_REG_6; i++) + bvf->evst->rv[i].v.type = RTE_BPF_ARG_UNDEF; + + /* update return value */ + + rv = bvf->evst->rv + EBPF_REG_0; + rv->v = xsym->func.ret; + msk = (rv->v.type == RTE_BPF_ARG_RAW) ? + RTE_LEN2MASK(rv->v.size * CHAR_BIT, uint64_t) : UINTPTR_MAX; + eval_max_bound(rv, msk); + rv->mask = msk; + + return err; +} + +static void +eval_jeq_jne(struct bpf_reg_val *trd, struct bpf_reg_val *trs) +{ + /* sreg is constant */ + if (trs->u.min == trs->u.max) { + trd->u = trs->u; + /* dreg is constant */ + } else if (trd->u.min == trd->u.max) { + trs->u = trd->u; + } else { + trd->u.max = RTE_MIN(trd->u.max, trs->u.max); + trd->u.min = RTE_MAX(trd->u.min, trs->u.min); + trs->u = trd->u; + } + + /* sreg is constant */ + if (trs->s.min == trs->s.max) { + trd->s = trs->s; + /* dreg is constant */ + } else if (trd->s.min == trd->s.max) { + trs->s = trd->s; + } else { + trd->s.max = RTE_MIN(trd->s.max, trs->s.max); + trd->s.min = RTE_MAX(trd->s.min, trs->s.min); + trs->s = trd->s; + } +} + +static void +eval_jgt_jle(struct bpf_reg_val *trd, struct bpf_reg_val *trs, + struct bpf_reg_val *frd, struct bpf_reg_val *frs) +{ + frd->u.max = RTE_MIN(frd->u.max, frs->u.min); + trd->u.min = RTE_MAX(trd->u.min, trs->u.min + 1); +} + +static void +eval_jlt_jge(struct bpf_reg_val *trd, struct bpf_reg_val *trs, + struct bpf_reg_val *frd, struct bpf_reg_val *frs) +{ + frd->u.min = RTE_MAX(frd->u.min, frs->u.min); + trd->u.max = RTE_MIN(trd->u.max, trs->u.max - 1); +} + +static void +eval_jsgt_jsle(struct bpf_reg_val *trd, struct bpf_reg_val *trs, + struct bpf_reg_val *frd, struct bpf_reg_val *frs) +{ + frd->s.max = RTE_MIN(frd->s.max, frs->s.min); + trd->s.min = RTE_MAX(trd->s.min, trs->s.min + 1); +} + +static void +eval_jslt_jsge(struct bpf_reg_val *trd, struct bpf_reg_val *trs, + struct bpf_reg_val *frd, struct bpf_reg_val *frs) +{ + frd->s.min = RTE_MAX(frd->s.min, frs->s.min); + trd->s.max = RTE_MIN(trd->s.max, trs->s.max - 1); +} + +static const char * +eval_jcc(struct bpf_verifier *bvf, const struct ebpf_insn *ins) +{ + uint32_t op; + const char *err; + struct bpf_eval_state *fst, *tst; + struct bpf_reg_val *frd, *frs, *trd, *trs; + struct bpf_reg_val rvf, rvt; + + tst = bvf->evst; + fst = bvf->evin->evst; + + frd = fst->rv + ins->dst_reg; + trd = tst->rv + ins->dst_reg; + + if (BPF_SRC(ins->code) == BPF_X) { + frs = fst->rv + ins->src_reg; + trs = tst->rv + ins->src_reg; + } else { + frs = &rvf; + trs = &rvt; + eval_fill_imm(frs, UINT64_MAX, ins->imm); + eval_fill_imm(trs, UINT64_MAX, ins->imm); + } + + err = eval_defined(trd, trs); + if (err != NULL) + return err; + + op = BPF_OP(ins->code); + + if (op == BPF_JEQ) + eval_jeq_jne(trd, trs); + else if (op == EBPF_JNE) + eval_jeq_jne(frd, frs); + else if (op == BPF_JGT) + eval_jgt_jle(trd, trs, frd, frs); + else if (op == EBPF_JLE) + eval_jgt_jle(frd, frs, trd, trs); + else if (op == EBPF_JLT) + eval_jlt_jge(trd, trs, frd, frs); + else if (op == BPF_JGE) + eval_jlt_jge(frd, frs, trd, trs); + else if (op == EBPF_JSGT) + eval_jsgt_jsle(trd, trs, frd, frs); + else if (op == EBPF_JSLE) + eval_jsgt_jsle(frd, frs, trd, trs); + else if (op == EBPF_JLT) + eval_jslt_jsge(trd, trs, frd, frs); + else if (op == EBPF_JSGE) + eval_jslt_jsge(frd, frs, trd, trs); + + return NULL; +} + +/* + * validate parameters for each instruction type. + */ +static const struct bpf_ins_check ins_chk[UINT8_MAX] = { + /* ALU IMM 32-bit instructions */ + [(BPF_ALU | BPF_ADD | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_SUB | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_AND | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_OR | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_LSH | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_RSH | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_XOR | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_MUL | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(BPF_ALU | EBPF_MOV | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_DIV | BPF_K)] = { + .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 1, .max = UINT32_MAX}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_MOD | BPF_K)] = { + .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 1, .max = UINT32_MAX}, + .eval = eval_alu, + }, + /* ALU IMM 64-bit instructions */ + [(EBPF_ALU64 | BPF_ADD | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_SUB | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_AND | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_OR | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_LSH | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_RSH | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | EBPF_ARSH | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_XOR | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_MUL | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | EBPF_MOV | BPF_K)] = { + .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX,}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_DIV | BPF_K)] = { + .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 1, .max = UINT32_MAX}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_MOD | BPF_K)] = { + .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 1, .max = UINT32_MAX}, + .eval = eval_alu, + }, + /* ALU REG 32-bit instructions */ + [(BPF_ALU | BPF_ADD | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_SUB | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_AND | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_OR | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_LSH | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_RSH | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_XOR | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_MUL | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_DIV | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_MOD | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | EBPF_MOV | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | BPF_NEG)] = { + .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(BPF_ALU | EBPF_END | EBPF_TO_BE)] = { + .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 16, .max = 64}, + .check = check_alu_bele, + .eval = eval_bele, + }, + [(BPF_ALU | EBPF_END | EBPF_TO_LE)] = { + .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 16, .max = 64}, + .check = check_alu_bele, + .eval = eval_bele, + }, + /* ALU REG 64-bit instructions */ + [(EBPF_ALU64 | BPF_ADD | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_SUB | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_AND | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_OR | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_LSH | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_RSH | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | EBPF_ARSH | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_XOR | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_MUL | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_DIV | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_MOD | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | EBPF_MOV | BPF_X)] = { + .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + [(EBPF_ALU64 | BPF_NEG)] = { + .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_alu, + }, + /* load instructions */ + [(BPF_LDX | BPF_MEM | BPF_B)] = { + .mask = {. dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_load, + }, + [(BPF_LDX | BPF_MEM | BPF_H)] = { + .mask = {. dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_load, + }, + [(BPF_LDX | BPF_MEM | BPF_W)] = { + .mask = {. dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_load, + }, + [(BPF_LDX | BPF_MEM | EBPF_DW)] = { + .mask = {. dreg = WRT_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_load, + }, + /* load 64 bit immediate value */ + [(BPF_LD | BPF_IMM | EBPF_DW)] = { + .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_ld_imm64, + }, + /* store REG instructions */ + [(BPF_STX | BPF_MEM | BPF_B)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_store, + }, + [(BPF_STX | BPF_MEM | BPF_H)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_store, + }, + [(BPF_STX | BPF_MEM | BPF_W)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_store, + }, + [(BPF_STX | BPF_MEM | EBPF_DW)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_store, + }, + /* atomic add instructions */ + [(BPF_STX | EBPF_XADD | BPF_W)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_store, + }, + [(BPF_STX | EBPF_XADD | EBPF_DW)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_store, + }, + /* store IMM instructions */ + [(BPF_ST | BPF_MEM | BPF_B)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_store, + }, + [(BPF_ST | BPF_MEM | BPF_H)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_store, + }, + [(BPF_ST | BPF_MEM | BPF_W)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_store, + }, + [(BPF_ST | BPF_MEM | EBPF_DW)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_store, + }, + /* jump instruction */ + [(BPF_JMP | BPF_JA)] = { + .mask = { .dreg = ZERO_REG, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + }, + /* jcc IMM instructions */ + [(BPF_JMP | BPF_JEQ | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JNE | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | BPF_JGT | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JLT | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | BPF_JGE | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JLE | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JSGT | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JSLT | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JSGE | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JSLE | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + [(BPF_JMP | BPF_JSET | BPF_K)] = { + .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_jcc, + }, + /* jcc REG instructions */ + [(BPF_JMP | BPF_JEQ | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JNE | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + [(BPF_JMP | BPF_JGT | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JLT | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + [(BPF_JMP | BPF_JGE | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JLE | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JSGT | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JSLT | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + }, + [(BPF_JMP | EBPF_JSGE | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + [(BPF_JMP | EBPF_JSLE | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + [(BPF_JMP | BPF_JSET | BPF_X)] = { + .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS}, + .off = { .min = 0, .max = UINT16_MAX}, + .imm = { .min = 0, .max = 0}, + .eval = eval_jcc, + }, + /* call instruction */ + [(BPF_JMP | EBPF_CALL)] = { + .mask = { .dreg = ZERO_REG, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = UINT32_MAX}, + .eval = eval_call, + }, + /* ret instruction */ + [(BPF_JMP | EBPF_EXIT)] = { + .mask = { .dreg = ZERO_REG, .sreg = ZERO_REG}, + .off = { .min = 0, .max = 0}, + .imm = { .min = 0, .max = 0}, + .eval = eval_exit, + }, +}; + +/* + * make sure that instruction syntax is valid, + * and it fields don't violate partciular instrcution type restrictions. + */ +static const char * +check_syntax(const struct ebpf_insn *ins) +{ + + uint8_t op; + uint16_t off; + uint32_t imm; + + op = ins->code; + + if (ins_chk[op].mask.dreg == 0) + return "invalid opcode"; + + if ((ins_chk[op].mask.dreg & 1 << ins->dst_reg) == 0) + return "invalid dst-reg field"; + + if ((ins_chk[op].mask.sreg & 1 << ins->src_reg) == 0) + return "invalid src-reg field"; + + off = ins->off; + if (ins_chk[op].off.min > off || ins_chk[op].off.max < off) + return "invalid off field"; + + imm = ins->imm; + if (ins_chk[op].imm.min > imm || ins_chk[op].imm.max < imm) + return "invalid imm field"; + + if (ins_chk[op].check != NULL) + return ins_chk[op].check(ins); + + return NULL; +} + +/* + * helper function, return instruction index for the given node. + */ +static uint32_t +get_node_idx(const struct bpf_verifier *bvf, const struct inst_node *node) +{ + return node - bvf->in; +} + +/* + * helper function, used to walk through constructed CFG. + */ +static struct inst_node * +get_next_node(struct bpf_verifier *bvf, struct inst_node *node) +{ + uint32_t ce, ne, dst; + + ne = node->nb_edge; + ce = node->cur_edge; + if (ce == ne) + return NULL; + + node->cur_edge++; + dst = node->edge_dest[ce]; + return bvf->in + dst; +} + +static void +set_node_colour(struct bpf_verifier *bvf, struct inst_node *node, + uint32_t new) +{ + uint32_t prev; + + prev = node->colour; + node->colour = new; + + bvf->node_colour[prev]--; + bvf->node_colour[new]++; +} + +/* + * helper function, add new edge between two nodes. + */ +static int +add_edge(struct bpf_verifier *bvf, struct inst_node *node, uint32_t nidx) +{ + uint32_t ne; + + if (nidx > bvf->prm->nb_ins) { + RTE_BPF_LOG(ERR, "%s: program boundary violation at pc: %u, " + "next pc: %u\n", + __func__, get_node_idx(bvf, node), nidx); + return -EINVAL; + } + + ne = node->nb_edge; + if (ne >= RTE_DIM(node->edge_dest)) { + RTE_BPF_LOG(ERR, "%s: internal error at pc: %u\n", + __func__, get_node_idx(bvf, node)); + return -EINVAL; + } + + node->edge_dest[ne] = nidx; + node->nb_edge = ne + 1; + return 0; +} + +/* + * helper function, determine type of edge between two nodes. + */ +static void +set_edge_type(struct bpf_verifier *bvf, struct inst_node *node, + const struct inst_node *next) +{ + uint32_t ce, clr, type; + + ce = node->cur_edge - 1; + clr = next->colour; + + type = UNKNOWN_EDGE; + + if (clr == WHITE) + type = TREE_EDGE; + else if (clr == GREY) + type = BACK_EDGE; + else if (clr == BLACK) + /* + * in fact it could be either direct or cross edge, + * but for now, we don't need to distinguish between them. + */ + type = CROSS_EDGE; + + node->edge_type[ce] = type; + bvf->edge_type[type]++; +} + +static struct inst_node * +get_prev_node(struct bpf_verifier *bvf, struct inst_node *node) +{ + return bvf->in + node->prev_node; +} + +/* + * Depth-First Search (DFS) through previously constructed + * Control Flow Graph (CFG). + * Information collected at this path would be used later + * to determine is there any loops, and/or unreachable instructions. + */ +static void +dfs(struct bpf_verifier *bvf) +{ + struct inst_node *next, *node; + + node = bvf->in; + while (node != NULL) { + + if (node->colour == WHITE) + set_node_colour(bvf, node, GREY); + + if (node->colour == GREY) { + + /* find next unprocessed child node */ + do { + next = get_next_node(bvf, node); + if (next == NULL) + break; + set_edge_type(bvf, node, next); + } while (next->colour != WHITE); + + if (next != NULL) { + /* proceed with next child */ + next->prev_node = get_node_idx(bvf, node); + node = next; + } else { + /* + * finished with current node and all it's kids, + * proceed with parent + */ + set_node_colour(bvf, node, BLACK); + node->cur_edge = 0; + node = get_prev_node(bvf, node); + } + } else + node = NULL; + } +} + +/* + * report unreachable instructions. + */ +static void +log_unreachable(const struct bpf_verifier *bvf) +{ + uint32_t i; + struct inst_node *node; + const struct ebpf_insn *ins; + + for (i = 0; i != bvf->prm->nb_ins; i++) { + + node = bvf->in + i; + ins = bvf->prm->ins + i; + + if (node->colour == WHITE && + ins->code != (BPF_LD | BPF_IMM | EBPF_DW)) + RTE_BPF_LOG(ERR, "unreachable code at pc: %u;\n", i); + } +} + +/* + * report loops detected. + */ +static void +log_loop(const struct bpf_verifier *bvf) +{ + uint32_t i, j; + struct inst_node *node; + + for (i = 0; i != bvf->prm->nb_ins; i++) { + + node = bvf->in + i; + if (node->colour != BLACK) + continue; + + for (j = 0; j != node->nb_edge; j++) { + if (node->edge_type[j] == BACK_EDGE) + RTE_BPF_LOG(ERR, + "loop at pc:%u --> pc:%u;\n", + i, node->edge_dest[j]); + } + } +} + +/* + * First pass goes though all instructions in the set, checks that each + * instruction is a valid one (correct syntax, valid field values, etc.) + * and constructs control flow graph (CFG). + * Then deapth-first search is performed over the constructed graph. + * Programs with unreachable instructions and/or loops will be rejected. + */ +static int +validate(struct bpf_verifier *bvf) +{ + int32_t rc; + uint32_t i; + struct inst_node *node; + const struct ebpf_insn *ins; + const char *err; + + rc = 0; + for (i = 0; i < bvf->prm->nb_ins; i++) { + + ins = bvf->prm->ins + i; + node = bvf->in + i; + + err = check_syntax(ins); + if (err != 0) { + RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n", + __func__, err, i); + rc |= -EINVAL; + } + + /* + * construct CFG, jcc nodes have to outgoing edges, + * 'exit' nodes - none, all others nodes have exaclty one + * outgoing edge. + */ + switch (ins->code) { + case (BPF_JMP | EBPF_EXIT): + break; + case (BPF_JMP | BPF_JEQ | BPF_K): + case (BPF_JMP | EBPF_JNE | BPF_K): + case (BPF_JMP | BPF_JGT | BPF_K): + case (BPF_JMP | EBPF_JLT | BPF_K): + case (BPF_JMP | BPF_JGE | BPF_K): + case (BPF_JMP | EBPF_JLE | BPF_K): + case (BPF_JMP | EBPF_JSGT | BPF_K): + case (BPF_JMP | EBPF_JSLT | BPF_K): + case (BPF_JMP | EBPF_JSGE | BPF_K): + case (BPF_JMP | EBPF_JSLE | BPF_K): + case (BPF_JMP | BPF_JSET | BPF_K): + case (BPF_JMP | BPF_JEQ | BPF_X): + case (BPF_JMP | EBPF_JNE | BPF_X): + case (BPF_JMP | BPF_JGT | BPF_X): + case (BPF_JMP | EBPF_JLT | BPF_X): + case (BPF_JMP | BPF_JGE | BPF_X): + case (BPF_JMP | EBPF_JLE | BPF_X): + case (BPF_JMP | EBPF_JSGT | BPF_X): + case (BPF_JMP | EBPF_JSLT | BPF_X): + case (BPF_JMP | EBPF_JSGE | BPF_X): + case (BPF_JMP | EBPF_JSLE | BPF_X): + case (BPF_JMP | BPF_JSET | BPF_X): + rc |= add_edge(bvf, node, i + ins->off + 1); + rc |= add_edge(bvf, node, i + 1); + bvf->nb_jcc_nodes++; + break; + case (BPF_JMP | BPF_JA): + rc |= add_edge(bvf, node, i + ins->off + 1); + break; + /* load 64 bit immediate value */ + case (BPF_LD | BPF_IMM | EBPF_DW): + rc |= add_edge(bvf, node, i + 2); + i++; + break; + default: + rc |= add_edge(bvf, node, i + 1); + break; + } + + bvf->nb_nodes++; + bvf->node_colour[WHITE]++; + } + + if (rc != 0) + return rc; + + dfs(bvf); + + RTE_BPF_LOG(DEBUG, "%s(%p) stats:\n" + "nb_nodes=%u;\n" + "nb_jcc_nodes=%u;\n" + "node_color={[WHITE]=%u, [GREY]=%u,, [BLACK]=%u};\n" + "edge_type={[UNKNOWN]=%u, [TREE]=%u, [BACK]=%u, [CROSS]=%u};\n", + __func__, bvf, + bvf->nb_nodes, + bvf->nb_jcc_nodes, + bvf->node_colour[WHITE], bvf->node_colour[GREY], + bvf->node_colour[BLACK], + bvf->edge_type[UNKNOWN_EDGE], bvf->edge_type[TREE_EDGE], + bvf->edge_type[BACK_EDGE], bvf->edge_type[CROSS_EDGE]); + + if (bvf->node_colour[BLACK] != bvf->nb_nodes) { + RTE_BPF_LOG(ERR, "%s(%p) unreachable instructions;\n", + __func__, bvf); + log_unreachable(bvf); + return -EINVAL; + } + + if (bvf->node_colour[GREY] != 0 || bvf->node_colour[WHITE] != 0 || + bvf->edge_type[UNKNOWN_EDGE] != 0) { + RTE_BPF_LOG(ERR, "%s(%p) DFS internal error;\n", + __func__, bvf); + return -EINVAL; + } + + if (bvf->edge_type[BACK_EDGE] != 0) { + RTE_BPF_LOG(ERR, "%s(%p) loops detected;\n", + __func__, bvf); + log_loop(bvf); + return -EINVAL; + } + + return 0; +} + +/* + * helper functions get/free eval states. + */ +static struct bpf_eval_state * +pull_eval_state(struct bpf_verifier *bvf) +{ + uint32_t n; + + n = bvf->evst_pool.cur; + if (n == bvf->evst_pool.num) + return NULL; + + bvf->evst_pool.cur = n + 1; + return bvf->evst_pool.ent + n; +} + +static void +push_eval_state(struct bpf_verifier *bvf) +{ + bvf->evst_pool.cur--; +} + +static void +evst_pool_fini(struct bpf_verifier *bvf) +{ + bvf->evst = NULL; + free(bvf->evst_pool.ent); + memset(&bvf->evst_pool, 0, sizeof(bvf->evst_pool)); +} + +static int +evst_pool_init(struct bpf_verifier *bvf) +{ + uint32_t n; + + n = bvf->nb_jcc_nodes + 1; + + bvf->evst_pool.ent = calloc(n, sizeof(bvf->evst_pool.ent[0])); + if (bvf->evst_pool.ent == NULL) + return -ENOMEM; + + bvf->evst_pool.num = n; + bvf->evst_pool.cur = 0; + + bvf->evst = pull_eval_state(bvf); + return 0; +} + +/* + * Save current eval state. + */ +static int +save_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +{ + struct bpf_eval_state *st; + + /* get new eval_state for this node */ + st = pull_eval_state(bvf); + if (st == NULL) { + RTE_BPF_LOG(ERR, + "%s: internal error (out of space) at pc: %u\n", + __func__, get_node_idx(bvf, node)); + return -ENOMEM; + } + + /* make a copy of current state */ + memcpy(st, bvf->evst, sizeof(*st)); + + /* swap current state with new one */ + node->evst = bvf->evst; + bvf->evst = st; + + RTE_BPF_LOG(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;\n", + __func__, bvf, get_node_idx(bvf, node), node->evst, bvf->evst); + + return 0; +} + +/* + * Restore previous eval state and mark current eval state as free. + */ +static void +restore_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +{ + RTE_BPF_LOG(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;\n", + __func__, bvf, get_node_idx(bvf, node), bvf->evst, node->evst); + + bvf->evst = node->evst; + node->evst = NULL; + push_eval_state(bvf); +} + +static void +log_eval_state(const struct bpf_verifier *bvf, const struct ebpf_insn *ins, + uint32_t pc, int32_t loglvl) +{ + const struct bpf_eval_state *st; + const struct bpf_reg_val *rv; + + rte_log(loglvl, rte_bpf_logtype, "%s(pc=%u):\n", __func__, pc); + + st = bvf->evst; + rv = st->rv + ins->dst_reg; + + rte_log(loglvl, rte_bpf_logtype, + "r%u={\n" + "\tv={type=%u, size=%zu},\n" + "\tmask=0x%" PRIx64 ",\n" + "\tu={min=0x%" PRIx64 ", max=0x%" PRIx64 "},\n" + "\ts={min=%" PRId64 ", max=%" PRId64 "},\n" + "};\n", + ins->dst_reg, + rv->v.type, rv->v.size, + rv->mask, + rv->u.min, rv->u.max, + rv->s.min, rv->s.max); +} + +/* + * Do second pass through CFG and try to evaluate instructions + * via each possible path. + * Right now evaluation functionality is quite limited. + * Still need to add extra checks for: + * - use/return uninitialized registers. + * - use uninitialized data from the stack. + * - memory boundaries violation. + */ +static int +evaluate(struct bpf_verifier *bvf) +{ + int32_t rc; + uint32_t idx, op; + const char *err; + const struct ebpf_insn *ins; + struct inst_node *next, *node; + + /* initial state of frame pointer */ + static const struct bpf_reg_val rvfp = { + .v = { + .type = RTE_BPF_ARG_PTR_STACK, + .size = MAX_BPF_STACK_SIZE, + }, + .mask = UINT64_MAX, + .u = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE}, + .s = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE}, + }; + + bvf->evst->rv[EBPF_REG_1].v = bvf->prm->prog_arg; + bvf->evst->rv[EBPF_REG_1].mask = UINT64_MAX; + if (bvf->prm->prog_arg.type == RTE_BPF_ARG_RAW) + eval_max_bound(bvf->evst->rv + EBPF_REG_1, UINT64_MAX); + + bvf->evst->rv[EBPF_REG_10] = rvfp; + + ins = bvf->prm->ins; + node = bvf->in; + next = node; + rc = 0; + + while (node != NULL && rc == 0) { + + /* + * current node evaluation, make sure we evaluate + * each node only once. + */ + if (next != NULL) { + + bvf->evin = node; + idx = get_node_idx(bvf, node); + op = ins[idx].code; + + /* for jcc node make a copy of evaluatoion state */ + if (node->nb_edge > 1) + rc |= save_eval_state(bvf, node); + + if (ins_chk[op].eval != NULL && rc == 0) { + err = ins_chk[op].eval(bvf, ins + idx); + if (err != NULL) { + RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n", + __func__, err, idx); + rc = -EINVAL; + } + } + + log_eval_state(bvf, ins + idx, idx, RTE_LOG_DEBUG); + bvf->evin = NULL; + } + + /* proceed through CFG */ + next = get_next_node(bvf, node); + if (next != NULL) { + + /* proceed with next child */ + if (node->cur_edge == node->nb_edge && + node->evst != NULL) + restore_eval_state(bvf, node); + + next->prev_node = get_node_idx(bvf, node); + node = next; + } else { + /* + * finished with current node and all it's kids, + * proceed with parent + */ + node->cur_edge = 0; + node = get_prev_node(bvf, node); + + /* finished */ + if (node == bvf->in) + node = NULL; + } + } + + return rc; +} + +int +bpf_validate(struct rte_bpf *bpf) +{ + int32_t rc; + struct bpf_verifier bvf; + + /* check input argument type, don't allow mbuf ptr on 32-bit */ + if (bpf->prm.prog_arg.type != RTE_BPF_ARG_RAW && + bpf->prm.prog_arg.type != RTE_BPF_ARG_PTR && + (sizeof(uint64_t) != sizeof(uintptr_t) || + bpf->prm.prog_arg.type != RTE_BPF_ARG_PTR_MBUF)) { + RTE_BPF_LOG(ERR, "%s: unsupported argument type\n", __func__); + return -ENOTSUP; + } + + memset(&bvf, 0, sizeof(bvf)); + bvf.prm = &bpf->prm; + bvf.in = calloc(bpf->prm.nb_ins, sizeof(bvf.in[0])); + if (bvf.in == NULL) + return -ENOMEM; + + rc = validate(&bvf); + + if (rc == 0) { + rc = evst_pool_init(&bvf); + if (rc == 0) + rc = evaluate(&bvf); + evst_pool_fini(&bvf); + } + + free(bvf.in); + + /* copy collected info */ + if (rc == 0) + bpf->stack_sz = bvf.stack_sz; + + return rc; +} diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build new file mode 100644 index 00000000..bc0cd78f --- /dev/null +++ b/lib/librte_bpf/meson.build @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +allow_experimental_apis = true +sources = files('bpf.c', + 'bpf_exec.c', + 'bpf_load.c', + 'bpf_pkt.c', + 'bpf_validate.c') + +if arch_subdir == 'x86' and cc.sizeof('void *') == 8 + sources += files('bpf_jit_x86.c') +endif + +install_headers = files('bpf_def.h', + 'rte_bpf.h', + 'rte_bpf_ethdev.h') + +deps += ['mbuf', 'net', 'ethdev'] + +dep = cc.find_library('elf', required: false) +if dep.found() == true and cc.has_header('libelf.h', dependencies: dep) + sources += files('bpf_load_elf.c') + ext_deps += dep +endif diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h new file mode 100644 index 00000000..ad62ef2c --- /dev/null +++ b/lib/librte_bpf/rte_bpf.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _RTE_BPF_H_ +#define _RTE_BPF_H_ + +/** + * @file rte_bpf.h + * @b EXPERIMENTAL: this API may change without prior notice + * + * RTE BPF support. + * librte_bpf provides a framework to load and execute eBPF bytecode + * inside user-space dpdk based applications. + * It supports basic set of features from eBPF spec + * (https://www.kernel.org/doc/Documentation/networking/filter.txt). + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Possible types for function/BPF program arguments. + */ +enum rte_bpf_arg_type { + RTE_BPF_ARG_UNDEF, /**< undefined */ + RTE_BPF_ARG_RAW, /**< scalar value */ + RTE_BPF_ARG_PTR = 0x10, /**< pointer to data buffer */ + RTE_BPF_ARG_PTR_MBUF, /**< pointer to rte_mbuf */ + RTE_BPF_ARG_PTR_STACK, +}; + +/** + * function argument information + */ +struct rte_bpf_arg { + enum rte_bpf_arg_type type; + /** + * for ptr type - max size of data buffer it points to + * for raw type - the size (in bytes) of the value + */ + size_t size; + size_t buf_size; + /**< for mbuf ptr type, max size of rte_mbuf data buffer */ +}; + +/** + * determine is argument a pointer + */ +#define RTE_BPF_ARG_PTR_TYPE(x) ((x) & RTE_BPF_ARG_PTR) + +/** + * Possible types for external symbols. + */ +enum rte_bpf_xtype { + RTE_BPF_XTYPE_FUNC, /**< function */ + RTE_BPF_XTYPE_VAR, /**< variable */ + RTE_BPF_XTYPE_NUM +}; + +/** + * Definition for external symbols available in the BPF program. + */ +struct rte_bpf_xsym { + const char *name; /**< name */ + enum rte_bpf_xtype type; /**< type */ + union { + struct { + uint64_t (*val)(uint64_t, uint64_t, uint64_t, + uint64_t, uint64_t); + uint32_t nb_args; + struct rte_bpf_arg args[EBPF_FUNC_MAX_ARGS]; + /**< Function arguments descriptions. */ + struct rte_bpf_arg ret; /**< function return value. */ + } func; + struct { + void *val; /**< actual memory location */ + struct rte_bpf_arg desc; /**< type, size, etc. */ + } var; /**< external variable */ + }; +}; + +/** + * Input parameters for loading eBPF code. + */ +struct rte_bpf_prm { + const struct ebpf_insn *ins; /**< array of eBPF instructions */ + uint32_t nb_ins; /**< number of instructions in ins */ + const struct rte_bpf_xsym *xsym; + /**< array of external symbols that eBPF code is allowed to reference */ + uint32_t nb_xsym; /**< number of elements in xsym */ + struct rte_bpf_arg prog_arg; /**< eBPF program input arg description */ +}; + +/** + * Information about compiled into native ISA eBPF code. + */ +struct rte_bpf_jit { + uint64_t (*func)(void *); /**< JIT-ed native code */ + size_t sz; /**< size of JIT-ed code */ +}; + +struct rte_bpf; + +/** + * De-allocate all memory used by this eBPF execution context. + * + * @param bpf + * BPF handle to destroy. + */ +void __rte_experimental +rte_bpf_destroy(struct rte_bpf *bpf); + +/** + * Create a new eBPF execution context and load given BPF code into it. + * + * @param prm + * Parameters used to create and initialise the BPF exeution context. + * @return + * BPF handle that is used in future BPF operations, + * or NULL on error, with error code set in rte_errno. + * Possible rte_errno errors include: + * - EINVAL - invalid parameter passed to function + * - ENOMEM - can't reserve enough memory + */ +struct rte_bpf * __rte_experimental +rte_bpf_load(const struct rte_bpf_prm *prm); + +/** + * Create a new eBPF execution context and load BPF code from given ELF + * file into it. + * + * @param prm + * Parameters used to create and initialise the BPF exeution context. + * @param fname + * Pathname for a ELF file. + * @param sname + * Name of the executable section within the file to load. + * @return + * BPF handle that is used in future BPF operations, + * or NULL on error, with error code set in rte_errno. + * Possible rte_errno errors include: + * - EINVAL - invalid parameter passed to function + * - ENOMEM - can't reserve enough memory + */ +struct rte_bpf * __rte_experimental +rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname, + const char *sname); +/** + * Execute given BPF bytecode. + * + * @param bpf + * handle for the BPF code to execute. + * @param ctx + * pointer to input context. + * @return + * BPF execution return value. + */ +uint64_t __rte_experimental +rte_bpf_exec(const struct rte_bpf *bpf, void *ctx); + +/** + * Execute given BPF bytecode over a set of input contexts. + * + * @param bpf + * handle for the BPF code to execute. + * @param ctx + * array of pointers to the input contexts. + * @param rc + * array of return values (one per input). + * @param num + * number of elements in ctx[] (and rc[]). + * @return + * number of successfully processed inputs. + */ +uint32_t __rte_experimental +rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[], + uint32_t num); + +/** + * Provide information about natively compield code for given BPF handle. + * + * @param bpf + * handle for the BPF code. + * @param jit + * pointer to the rte_bpf_jit structure to be filled with related data. + * @return + * - -EINVAL if the parameters are invalid. + * - Zero if operation completed successfully. + */ +int __rte_experimental +rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BPF_H_ */ diff --git a/lib/librte_bpf/rte_bpf_ethdev.h b/lib/librte_bpf/rte_bpf_ethdev.h new file mode 100644 index 00000000..31731e7a --- /dev/null +++ b/lib/librte_bpf/rte_bpf_ethdev.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _RTE_BPF_ETHDEV_H_ +#define _RTE_BPF_ETHDEV_H_ + +/** + * @file rte_bpf_ethdev.h + * @b EXPERIMENTAL: this API may change without prior notice + * + * API to install BPF filter as RX/TX callbacks for eth devices. + * Note that right now: + * - it is not MT safe, i.e. it is not allowed to do load/unload for the + * same port/queue from different threads in parallel. + * - though it allows to do load/unload at runtime + * (while RX/TX is ongoing on given port/queue). + * - allows only one BPF program per port/queue, + * i.e. new load will replace previously loaded for that port/queue BPF program. + * Filter behaviour - if BPF program returns zero value for a given packet, + * then it will be dropped inside callback and no further processing + * on RX - it will be dropped inside callback and no further processing + * for that packet will happen. + * on TX - packet will remain unsent, and it is responsibility of the user + * to handle such situation (drop, try to send again, etc.). + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + RTE_BPF_ETH_F_NONE = 0, + RTE_BPF_ETH_F_JIT = 0x1, /*< use compiled into native ISA code */ +}; + +/** + * Unload previously loaded BPF program (if any) from given RX port/queue + * and remove appropriate RX port/queue callback. + * + * @param port + * The identifier of the ethernet port + * @param queue + * The identifier of the RX queue on the given port + */ +void __rte_experimental +rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue); + +/** + * Unload previously loaded BPF program (if any) from given TX port/queue + * and remove appropriate TX port/queue callback. + * + * @param port + * The identifier of the ethernet port + * @param queue + * The identifier of the TX queue on the given port + */ +void __rte_experimental +rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue); + +/** + * Load BPF program from the ELF file and install callback to execute it + * on given RX port/queue. + * + * @param port + * The identifier of the ethernet port + * @param queue + * The identifier of the RX queue on the given port + * @param fname + * Pathname for a ELF file. + * @param sname + * Name of the executable section within the file to load. + * @param prm + * Parameters used to create and initialise the BPF exeution context. + * @param flags + * Flags that define expected expected behavior of the loaded filter + * (i.e. jited/non-jited version to use). + * @return + * Zero on successful completion or negative error code otherwise. + */ +int __rte_experimental +rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm, const char *fname, const char *sname, + uint32_t flags); + +/** + * Load BPF program from the ELF file and install callback to execute it + * on given TX port/queue. + * + * @param port + * The identifier of the ethernet port + * @param queue + * The identifier of the TX queue on the given port + * @param fname + * Pathname for a ELF file. + * @param sname + * Name of the executable section within the file to load. + * @param prm + * Parameters used to create and initialise the BPF exeution context. + * @param flags + * Flags that define expected expected behavior of the loaded filter + * (i.e. jited/non-jited version to use). + * @return + * Zero on successful completion or negative error code otherwise. + */ +int __rte_experimental +rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm, const char *fname, const char *sname, + uint32_t flags); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BPF_ETHDEV_H_ */ diff --git a/lib/librte_bpf/rte_bpf_version.map b/lib/librte_bpf/rte_bpf_version.map new file mode 100644 index 00000000..a203e088 --- /dev/null +++ b/lib/librte_bpf/rte_bpf_version.map @@ -0,0 +1,16 @@ +EXPERIMENTAL { + global: + + rte_bpf_destroy; + rte_bpf_elf_load; + rte_bpf_eth_rx_elf_load; + rte_bpf_eth_rx_unload; + rte_bpf_eth_tx_elf_load; + rte_bpf_eth_tx_unload; + rte_bpf_exec; + rte_bpf_exec_burst; + rte_bpf_get_jit; + rte_bpf_load; + + local: *; +}; diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c index e88e4e11..9666e90c 100644 --- a/lib/librte_cmdline/cmdline_parse.c +++ b/lib/librte_cmdline/cmdline_parse.c @@ -208,9 +208,6 @@ cmdline_parse(struct cmdline *cl, const char * buf) int err = CMDLINE_PARSE_NOMATCH; int tok; cmdline_parse_ctx_t *ctx; -#ifdef RTE_LIBRTE_CMDLINE_DEBUG - char debug_buf[BUFSIZ]; -#endif char *result_buf = result.buf; if (!cl || !buf) @@ -250,10 +247,8 @@ cmdline_parse(struct cmdline *cl, const char * buf) return linelen; } -#ifdef RTE_LIBRTE_CMDLINE_DEBUG - snprintf(debug_buf, (linelen>64 ? 64 : linelen), "%s", buf); - debug_printf("Parse line : len=%d, <%s>\n", linelen, debug_buf); -#endif + debug_printf("Parse line : len=%d, <%.*s>\n", + linelen, linelen > 64 ? 64 : linelen, buf); /* parse it !! */ inst = ctx[inst_num]; @@ -436,7 +431,7 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, if ((unsigned)(comp_len + 1) > size) return 0; - snprintf(dst, size, "%s", comp_buf); + strlcpy(dst, comp_buf, size); dst[comp_len] = 0; return 2; } @@ -513,7 +508,7 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state, continue; } (*state)++; - l=snprintf(dst, size, "%s", tmpbuf); + l=strlcpy(dst, tmpbuf, size); if (l>=0 && token_hdr.ops->get_help) { token_hdr.ops->get_help(token_p, tmpbuf, sizeof(tmpbuf)); diff --git a/lib/librte_cmdline/cmdline_parse_etheraddr.c b/lib/librte_cmdline/cmdline_parse_etheraddr.c index 8d281192..24e04755 100644 --- a/lib/librte_cmdline/cmdline_parse_etheraddr.c +++ b/lib/librte_cmdline/cmdline_parse_etheraddr.c @@ -102,7 +102,7 @@ cmdline_parse_etheraddr(__attribute__((unused)) cmdline_parse_token_hdr_t *tk, (token_len != ETHER_ADDRSTRLENSHORT - 1)) return -1; - snprintf(ether_str, token_len+1, "%s", buf); + strlcpy(ether_str, buf, token_len + 1); tmp = my_ether_aton(ether_str); if (tmp == NULL) diff --git a/lib/librte_cmdline/cmdline_parse_ipaddr.c b/lib/librte_cmdline/cmdline_parse_ipaddr.c index ae6ea100..6647f569 100644 --- a/lib/librte_cmdline/cmdline_parse_ipaddr.c +++ b/lib/librte_cmdline/cmdline_parse_ipaddr.c @@ -4,26 +4,6 @@ * All rights reserved. */ -/* - * For inet_ntop() functions: - * - * Copyright (c) 1996 by Internet Software Consortium. - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS - * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE - * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL - * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR - * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - - #include #include #include @@ -31,6 +11,7 @@ #include #include #include +#include #include #ifndef __linux__ #ifndef __FreeBSD__ @@ -52,205 +33,9 @@ struct cmdline_token_ops cmdline_token_ipaddr_ops = { .get_help = cmdline_get_help_ipaddr, }; -#define INADDRSZ 4 -#define IN6ADDRSZ 16 #define PREFIXMAX 128 #define V4PREFIXMAX 32 -/* - * WARNING: Don't even consider trying to compile this on a system where - * sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX. - */ - -static int inet_pton4(const char *src, unsigned char *dst); -static int inet_pton6(const char *src, unsigned char *dst); - -/* int - * inet_pton(af, src, dst) - * convert from presentation format (which usually means ASCII printable) - * to network format (which is usually some kind of binary format). - * return: - * 1 if the address was valid for the specified address family - * 0 if the address wasn't valid (`dst' is untouched in this case) - * -1 if some other error occurred (`dst' is untouched in this case, too) - * author: - * Paul Vixie, 1996. - */ -static int -my_inet_pton(int af, const char *src, void *dst) -{ - switch (af) { - case AF_INET: - return inet_pton4(src, dst); - case AF_INET6: - return inet_pton6(src, dst); - default: - errno = EAFNOSUPPORT; - return -1; - } - /* NOTREACHED */ -} - -/* int - * inet_pton4(src, dst) - * like inet_aton() but without all the hexadecimal and shorthand. - * return: - * 1 if `src' is a valid dotted quad, else 0. - * notice: - * does not touch `dst' unless it's returning 1. - * author: - * Paul Vixie, 1996. - */ -static int -inet_pton4(const char *src, unsigned char *dst) -{ - static const char digits[] = "0123456789"; - int saw_digit, octets, ch; - unsigned char tmp[INADDRSZ], *tp; - - saw_digit = 0; - octets = 0; - *(tp = tmp) = 0; - while ((ch = *src++) != '\0') { - const char *pch; - - if ((pch = strchr(digits, ch)) != NULL) { - unsigned int new = *tp * 10 + (pch - digits); - - if (new > 255) - return 0; - if (! saw_digit) { - if (++octets > 4) - return 0; - saw_digit = 1; - } - *tp = (unsigned char)new; - } else if (ch == '.' && saw_digit) { - if (octets == 4) - return 0; - *++tp = 0; - saw_digit = 0; - } else - return 0; - } - if (octets < 4) - return 0; - - memcpy(dst, tmp, INADDRSZ); - return 1; -} - -/* int - * inet_pton6(src, dst) - * convert presentation level address to network order binary form. - * return: - * 1 if `src' is a valid [RFC1884 2.2] address, else 0. - * notice: - * (1) does not touch `dst' unless it's returning 1. - * (2) :: in a full address is silently ignored. - * credit: - * inspired by Mark Andrews. - * author: - * Paul Vixie, 1996. - */ -static int -inet_pton6(const char *src, unsigned char *dst) -{ - static const char xdigits_l[] = "0123456789abcdef", - xdigits_u[] = "0123456789ABCDEF"; - unsigned char tmp[IN6ADDRSZ], *tp = 0, *endp = 0, *colonp = 0; - const char *xdigits = 0, *curtok = 0; - int ch = 0, saw_xdigit = 0, count_xdigit = 0; - unsigned int val = 0; - unsigned dbloct_count = 0; - - memset((tp = tmp), '\0', IN6ADDRSZ); - endp = tp + IN6ADDRSZ; - colonp = NULL; - /* Leading :: requires some special handling. */ - if (*src == ':') - if (*++src != ':') - return 0; - curtok = src; - saw_xdigit = count_xdigit = 0; - val = 0; - - while ((ch = *src++) != '\0') { - const char *pch; - - if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL) - pch = strchr((xdigits = xdigits_u), ch); - if (pch != NULL) { - if (count_xdigit >= 4) - return 0; - val <<= 4; - val |= (pch - xdigits); - if (val > 0xffff) - return 0; - saw_xdigit = 1; - count_xdigit++; - continue; - } - if (ch == ':') { - curtok = src; - if (!saw_xdigit) { - if (colonp) - return 0; - colonp = tp; - continue; - } else if (*src == '\0') { - return 0; - } - if (tp + sizeof(int16_t) > endp) - return 0; - *tp++ = (unsigned char) ((val >> 8) & 0xff); - *tp++ = (unsigned char) (val & 0xff); - saw_xdigit = 0; - count_xdigit = 0; - val = 0; - dbloct_count++; - continue; - } - if (ch == '.' && ((tp + INADDRSZ) <= endp) && - inet_pton4(curtok, tp) > 0) { - tp += INADDRSZ; - saw_xdigit = 0; - dbloct_count += 2; - break; /* '\0' was seen by inet_pton4(). */ - } - return 0; - } - if (saw_xdigit) { - if (tp + sizeof(int16_t) > endp) - return 0; - *tp++ = (unsigned char) ((val >> 8) & 0xff); - *tp++ = (unsigned char) (val & 0xff); - dbloct_count++; - } - if (colonp != NULL) { - /* if we already have 8 double octets, having a colon means error */ - if (dbloct_count == 8) - return 0; - - /* - * Since some memmove()'s erroneously fail to handle - * overlapping regions, we'll do the shift by hand. - */ - const int n = tp - colonp; - int i; - - for (i = 1; i <= n; i++) { - endp[- i] = colonp[n - i]; - colonp[n - i] = 0; - } - tp = endp; - } - if (tp != endp) - return 0; - memcpy(dst, tmp, IN6ADDRSZ); - return 1; -} - int cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res, unsigned ressize) @@ -277,7 +62,7 @@ cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res, if (token_len >= INET6_ADDRSTRLEN+4) return -1; - snprintf(ip_str, token_len+1, "%s", buf); + strlcpy(ip_str, buf, token_len + 1); /* convert the network prefix */ if (tk2->ipaddr_data.flags & CMDLINE_IPADDR_NETWORK) { @@ -299,7 +84,7 @@ cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res, /* convert the IP addr */ if ((tk2->ipaddr_data.flags & CMDLINE_IPADDR_V4) && - my_inet_pton(AF_INET, ip_str, &ipaddr.addr.ipv4) == 1 && + inet_pton(AF_INET, ip_str, &ipaddr.addr.ipv4) == 1 && prefixlen <= V4PREFIXMAX) { ipaddr.family = AF_INET; if (res) @@ -307,7 +92,7 @@ cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res, return token_len; } if ((tk2->ipaddr_data.flags & CMDLINE_IPADDR_V6) && - my_inet_pton(AF_INET6, ip_str, &ipaddr.addr.ipv6) == 1) { + inet_pton(AF_INET6, ip_str, &ipaddr.addr.ipv6) == 1) { ipaddr.family = AF_INET6; if (res) memcpy(res, &ipaddr, sizeof(ipaddr)); diff --git a/lib/librte_cmdline/cmdline_parse_portlist.c b/lib/librte_cmdline/cmdline_parse_portlist.c index 5952f343..ad43b522 100644 --- a/lib/librte_cmdline/cmdline_parse_portlist.c +++ b/lib/librte_cmdline/cmdline_parse_portlist.c @@ -94,7 +94,7 @@ cmdline_parse_portlist(__attribute__((unused)) cmdline_parse_token_hdr_t *tk, if (token_len >= PORTLIST_TOKEN_SIZE) return -1; - snprintf(portlist_str, token_len+1, "%s", buf); + strlcpy(portlist_str, buf, token_len + 1); if (pl) { pl->map = 0; diff --git a/lib/librte_cmdline/cmdline_parse_string.c b/lib/librte_cmdline/cmdline_parse_string.c index abde0412..9cf41d0f 100644 --- a/lib/librte_cmdline/cmdline_parse_string.c +++ b/lib/librte_cmdline/cmdline_parse_string.c @@ -125,10 +125,10 @@ cmdline_parse_string(cmdline_parse_token_hdr_t *tk, const char *buf, void *res, if (res) { if ((sd->str != NULL) && (strcmp(sd->str, TOKEN_STRING_MULTI) == 0)) /* we are sure that token_len is < STR_MULTI_TOKEN_SIZE-1 */ - snprintf(res, STR_MULTI_TOKEN_SIZE, "%s", buf); + strlcpy(res, buf, STR_MULTI_TOKEN_SIZE); else /* we are sure that token_len is < STR_TOKEN_SIZE-1 */ - snprintf(res, STR_TOKEN_SIZE, "%s", buf); + strlcpy(res, buf, STR_TOKEN_SIZE); *((char *)res + token_len) = 0; } diff --git a/lib/librte_compat/Makefile b/lib/librte_compat/Makefile index 0c57533c..61089fe7 100644 --- a/lib/librte_compat/Makefile +++ b/lib/librte_compat/Makefile @@ -1,33 +1,6 @@ -# BSD LICENSE -# -# Copyright(c) 2013 Neil Horman -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2013 Neil Horman +# All rights reserved. include $(RTE_SDK)/mk/rte.vars.mk diff --git a/lib/librte_compressdev/Makefile b/lib/librte_compressdev/Makefile new file mode 100644 index 00000000..7ef89e61 --- /dev/null +++ b/lib/librte_compressdev/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017-2018 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_compressdev.a + +# library version +LIBABIVER := 1 + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API +LDLIBS += -lrte_eal -lrte_mempool -lrte_kvargs + +# library source files +SRCS-y += rte_compressdev.c rte_compressdev_pmd.c rte_comp.c + +# export include files +SYMLINK-y-include += rte_comp.h +SYMLINK-y-include += rte_compressdev.h +# export include files (for PMDs) +SYMLINK-y-include += rte_compressdev_pmd.h +SYMLINK-y-include += rte_compressdev_internal.h + +# versioning export map +EXPORT_MAP := rte_compressdev_version.map + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_compressdev/meson.build b/lib/librte_compressdev/meson.build new file mode 100644 index 00000000..5416571c --- /dev/null +++ b/lib/librte_compressdev/meson.build @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +allow_experimental_apis = true +sources = files('rte_compressdev.c', + 'rte_compressdev_pmd.c', + 'rte_comp.c') +headers = files('rte_compressdev.h', + 'rte_compressdev_pmd.h', + 'rte_compressdev_internal.h', + 'rte_comp.h') +deps += ['kvargs', 'mbuf'] diff --git a/lib/librte_compressdev/rte_comp.c b/lib/librte_compressdev/rte_comp.c new file mode 100644 index 00000000..98ad0cfd --- /dev/null +++ b/lib/librte_compressdev/rte_comp.c @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#include "rte_comp.h" +#include "rte_compressdev.h" +#include "rte_compressdev_internal.h" + +const char * __rte_experimental +rte_comp_get_feature_name(uint64_t flag) +{ + switch (flag) { + case RTE_COMP_FF_STATEFUL_COMPRESSION: + return "STATEFUL_COMPRESSION"; + case RTE_COMP_FF_STATEFUL_DECOMPRESSION: + return "STATEFUL_DECOMPRESSION"; + case RTE_COMP_FF_OOP_SGL_IN_SGL_OUT: + return "OOP_SGL_IN_SGL_OUT"; + case RTE_COMP_FF_OOP_SGL_IN_LB_OUT: + return "OOP_SGL_IN_LB_OUT"; + case RTE_COMP_FF_OOP_LB_IN_SGL_OUT: + return "OOP_LB_IN_SGL_OUT"; + case RTE_COMP_FF_MULTI_PKT_CHECKSUM: + return "MULTI_PKT_CHECKSUM"; + case RTE_COMP_FF_ADLER32_CHECKSUM: + return "ADLER32_CHECKSUM"; + case RTE_COMP_FF_CRC32_CHECKSUM: + return "CRC32_CHECKSUM"; + case RTE_COMP_FF_CRC32_ADLER32_CHECKSUM: + return "CRC32_ADLER32_CHECKSUM"; + case RTE_COMP_FF_NONCOMPRESSED_BLOCKS: + return "NONCOMPRESSED_BLOCKS"; + case RTE_COMP_FF_SHA1_HASH: + return "SHA1_HASH"; + case RTE_COMP_FF_SHA2_SHA256_HASH: + return "SHA2_SHA256_HASH"; + case RTE_COMP_FF_SHAREABLE_PRIV_XFORM: + return "SHAREABLE_PRIV_XFORM"; + case RTE_COMP_FF_HUFFMAN_FIXED: + return "HUFFMAN_FIXED"; + case RTE_COMP_FF_HUFFMAN_DYNAMIC: + return "HUFFMAN_DYNAMIC"; + default: + return NULL; + } +} + +/** + * Reset the fields of an operation to their default values. + * + * @note The private data associated with the operation is not zeroed. + * + * @param op + * The operation to be reset + */ +static inline void +rte_comp_op_reset(struct rte_comp_op *op) +{ + struct rte_mempool *tmp_mp = op->mempool; + rte_iova_t tmp_iova_addr = op->iova_addr; + + memset(op, 0, sizeof(struct rte_comp_op)); + op->status = RTE_COMP_OP_STATUS_NOT_PROCESSED; + op->iova_addr = tmp_iova_addr; + op->mempool = tmp_mp; +} + +/** + * Private data structure belonging to an operation pool. + */ +struct rte_comp_op_pool_private { + uint16_t user_size; + /**< Size of private user data with each operation. */ +}; + +/** + * Bulk allocate raw element from mempool and return as comp operations + * + * @param mempool + * Compress operation mempool + * @param ops + * Array to place allocated operations + * @param nb_ops + * Number of operations to allocate + * @return + * - 0: Success + * - -ENOENT: Not enough entries in the mempool; no ops are retrieved. + */ +static inline int +rte_comp_op_raw_bulk_alloc(struct rte_mempool *mempool, + struct rte_comp_op **ops, uint16_t nb_ops) +{ + if (rte_mempool_get_bulk(mempool, (void **)ops, nb_ops) == 0) + return nb_ops; + + return 0; +} + +/** Initialise rte_comp_op mempool element */ +static void +rte_comp_op_init(struct rte_mempool *mempool, + __rte_unused void *opaque_arg, + void *_op_data, + __rte_unused unsigned int i) +{ + struct rte_comp_op *op = _op_data; + + memset(_op_data, 0, mempool->elt_size); + + op->status = RTE_COMP_OP_STATUS_NOT_PROCESSED; + op->iova_addr = rte_mem_virt2iova(_op_data); + op->mempool = mempool; +} + +struct rte_mempool * __rte_experimental +rte_comp_op_pool_create(const char *name, + unsigned int nb_elts, unsigned int cache_size, + uint16_t user_size, int socket_id) +{ + struct rte_comp_op_pool_private *priv; + + unsigned int elt_size = sizeof(struct rte_comp_op) + user_size; + + /* lookup mempool in case already allocated */ + struct rte_mempool *mp = rte_mempool_lookup(name); + + if (mp != NULL) { + priv = (struct rte_comp_op_pool_private *) + rte_mempool_get_priv(mp); + + if (mp->elt_size != elt_size || + mp->cache_size < cache_size || + mp->size < nb_elts || + priv->user_size < user_size) { + mp = NULL; + COMPRESSDEV_LOG(ERR, + "Mempool %s already exists but with incompatible parameters", + name); + return NULL; + } + return mp; + } + + mp = rte_mempool_create( + name, + nb_elts, + elt_size, + cache_size, + sizeof(struct rte_comp_op_pool_private), + NULL, + NULL, + rte_comp_op_init, + NULL, + socket_id, + 0); + + if (mp == NULL) { + COMPRESSDEV_LOG(ERR, "Failed to create mempool %s", name); + return NULL; + } + + priv = (struct rte_comp_op_pool_private *) + rte_mempool_get_priv(mp); + + priv->user_size = user_size; + + return mp; +} + +struct rte_comp_op * __rte_experimental +rte_comp_op_alloc(struct rte_mempool *mempool) +{ + struct rte_comp_op *op = NULL; + int retval; + + retval = rte_comp_op_raw_bulk_alloc(mempool, &op, 1); + if (unlikely(retval < 0)) + return NULL; + + rte_comp_op_reset(op); + + return op; +} + +int __rte_experimental +rte_comp_op_bulk_alloc(struct rte_mempool *mempool, + struct rte_comp_op **ops, uint16_t nb_ops) +{ + int ret; + uint16_t i; + + ret = rte_comp_op_raw_bulk_alloc(mempool, ops, nb_ops); + if (unlikely(ret < nb_ops)) + return ret; + + for (i = 0; i < nb_ops; i++) + rte_comp_op_reset(ops[i]); + + return nb_ops; +} + +/** + * free operation structure + * If operation has been allocate from a rte_mempool, then the operation will + * be returned to the mempool. + * + * @param op + * Compress operation + */ +void __rte_experimental +rte_comp_op_free(struct rte_comp_op *op) +{ + if (op != NULL && op->mempool != NULL) + rte_mempool_put(op->mempool, op); +} diff --git a/lib/librte_compressdev/rte_comp.h b/lib/librte_compressdev/rte_comp.h new file mode 100644 index 00000000..ee9056ea --- /dev/null +++ b/lib/librte_compressdev/rte_comp.h @@ -0,0 +1,485 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#ifndef _RTE_COMP_H_ +#define _RTE_COMP_H_ + +/** + * @file rte_comp.h + * + * RTE definitions for Data Compression Service + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * compression service feature flags + * + * @note New features flags should be added to the end of the list + * + * Keep these flags synchronised with rte_comp_get_feature_name() + */ +#define RTE_COMP_FF_STATEFUL_COMPRESSION (1ULL << 0) +/**< Stateful compression is supported */ +#define RTE_COMP_FF_STATEFUL_DECOMPRESSION (1ULL << 1) +/**< Stateful decompression is supported */ +#define RTE_COMP_FF_OOP_SGL_IN_SGL_OUT (1ULL << 2) +/**< Out-of-place Scatter-gather (SGL) buffers, + * with multiple segments, are supported in input and output + */ +#define RTE_COMP_FF_OOP_SGL_IN_LB_OUT (1ULL << 3) +/**< Out-of-place Scatter-gather (SGL) buffers are supported + * in input, combined with linear buffers (LB), with a + * single segment, in output + */ +#define RTE_COMP_FF_OOP_LB_IN_SGL_OUT (1ULL << 4) +/**< Out-of-place Scatter-gather (SGL) buffers are supported + * in output, combined with linear buffers (LB) in input + */ +#define RTE_COMP_FF_ADLER32_CHECKSUM (1ULL << 5) +/**< Adler-32 Checksum is supported */ +#define RTE_COMP_FF_CRC32_CHECKSUM (1ULL << 6) +/**< CRC32 Checksum is supported */ +#define RTE_COMP_FF_CRC32_ADLER32_CHECKSUM (1ULL << 7) +/**< Adler-32/CRC32 Checksum is supported */ +#define RTE_COMP_FF_MULTI_PKT_CHECKSUM (1ULL << 8) +/**< Generation of checksum across multiple stateless packets is supported */ +#define RTE_COMP_FF_SHA1_HASH (1ULL << 9) +/**< SHA1 Hash is supported */ +#define RTE_COMP_FF_SHA2_SHA256_HASH (1ULL << 10) +/**< SHA256 Hash of SHA2 family is supported */ +#define RTE_COMP_FF_NONCOMPRESSED_BLOCKS (1ULL << 11) +/**< Creation of non-compressed blocks using RTE_COMP_LEVEL_NONE is supported */ +#define RTE_COMP_FF_SHAREABLE_PRIV_XFORM (1ULL << 12) +/**< Private xforms created by the PMD can be shared + * across multiple stateless operations. If not set, then app needs + * to create as many priv_xforms as it expects to have stateless + * operations in-flight. + */ +#define RTE_COMP_FF_HUFFMAN_FIXED (1ULL << 13) +/**< Fixed huffman encoding is supported */ +#define RTE_COMP_FF_HUFFMAN_DYNAMIC (1ULL << 14) +/**< Dynamic huffman encoding is supported */ + +/** Status of comp operation */ +enum rte_comp_op_status { + RTE_COMP_OP_STATUS_SUCCESS = 0, + /**< Operation completed successfully */ + RTE_COMP_OP_STATUS_NOT_PROCESSED, + /**< Operation has not yet been processed by the device */ + RTE_COMP_OP_STATUS_INVALID_ARGS, + /**< Operation failed due to invalid arguments in request */ + RTE_COMP_OP_STATUS_ERROR, + /**< Error handling operation */ + RTE_COMP_OP_STATUS_INVALID_STATE, + /**< Operation is invoked in invalid state */ + RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED, + /**< Output buffer ran out of space before operation completed. + * Error case. Application must resubmit all data with a larger + * output buffer. + */ + RTE_COMP_OP_STATUS_OUT_OF_SPACE_RECOVERABLE, + /**< Output buffer ran out of space before operation completed, but this + * is not an error case. Output data up to op.produced can be used and + * next op in the stream should continue on from op.consumed+1. + */ +}; + +/** Compression Algorithms */ +enum rte_comp_algorithm { + RTE_COMP_ALGO_UNSPECIFIED = 0, + /** No Compression algorithm */ + RTE_COMP_ALGO_NULL, + /**< No compression. + * Pass-through, data is copied unchanged from source buffer to + * destination buffer. + */ + RTE_COMP_ALGO_DEFLATE, + /**< DEFLATE compression algorithm + * https://tools.ietf.org/html/rfc1951 + */ + RTE_COMP_ALGO_LZS, + /**< LZS compression algorithm + * https://tools.ietf.org/html/rfc2395 + */ + RTE_COMP_ALGO_LIST_END +}; + +/** Compression Hash Algorithms */ +enum rte_comp_hash_algorithm { + RTE_COMP_HASH_ALGO_NONE = 0, + /**< No hash */ + RTE_COMP_HASH_ALGO_SHA1, + /**< SHA1 hash algorithm */ + RTE_COMP_HASH_ALGO_SHA2_256, + /**< SHA256 hash algorithm of SHA2 family */ + RTE_COMP_HASH_ALGO_LIST_END +}; + +/**< Compression Level. + * The number is interpreted by each PMD differently. However, lower numbers + * give fastest compression, at the expense of compression ratio while + * higher numbers may give better compression ratios but are likely slower. + */ +#define RTE_COMP_LEVEL_PMD_DEFAULT (-1) +/** Use PMD Default */ +#define RTE_COMP_LEVEL_NONE (0) +/** Output uncompressed blocks if supported by the specified algorithm */ +#define RTE_COMP_LEVEL_MIN (1) +/** Use minimum compression level supported by the PMD */ +#define RTE_COMP_LEVEL_MAX (9) +/** Use maximum compression level supported by the PMD */ + +/** Compression checksum types */ +enum rte_comp_checksum_type { + RTE_COMP_CHECKSUM_NONE, + /**< No checksum generated */ + RTE_COMP_CHECKSUM_CRC32, + /**< Generates a CRC32 checksum, as used by gzip */ + RTE_COMP_CHECKSUM_ADLER32, + /**< Generates an Adler-32 checksum, as used by zlib */ + RTE_COMP_CHECKSUM_CRC32_ADLER32, + /**< Generates both Adler-32 and CRC32 checksums, concatenated. + * CRC32 is in the lower 32bits, Adler-32 in the upper 32 bits. + */ +}; + + +/** Compression Huffman Type - used by DEFLATE algorithm */ +enum rte_comp_huffman { + RTE_COMP_HUFFMAN_DEFAULT, + /**< PMD may choose which Huffman codes to use */ + RTE_COMP_HUFFMAN_FIXED, + /**< Use Fixed Huffman codes */ + RTE_COMP_HUFFMAN_DYNAMIC, + /**< Use Dynamic Huffman codes */ +}; + +/** Compression flush flags */ +enum rte_comp_flush_flag { + RTE_COMP_FLUSH_NONE, + /**< Data is not flushed. Output may remain in the compressor and be + * processed during a following op. It may not be possible to decompress + * output until a later op with some other flush flag has been sent. + */ + RTE_COMP_FLUSH_SYNC, + /**< All data should be flushed to output buffer. Output data can be + * decompressed. However state and history is not cleared, so future + * operations may use history from this operation. + */ + RTE_COMP_FLUSH_FULL, + /**< All data should be flushed to output buffer. Output data can be + * decompressed. State and history data is cleared, so future + * ops will be independent of ops processed before this. + */ + RTE_COMP_FLUSH_FINAL + /**< Same as RTE_COMP_FLUSH_FULL but if op.algo is RTE_COMP_ALGO_DEFLATE + * then bfinal bit is set in the last block. + */ +}; + +/** Compression transform types */ +enum rte_comp_xform_type { + RTE_COMP_COMPRESS, + /**< Compression service - compress */ + RTE_COMP_DECOMPRESS, + /**< Compression service - decompress */ +}; + +/** Compression operation type */ +enum rte_comp_op_type { + RTE_COMP_OP_STATELESS, + /**< All data to be processed is submitted in the op, no state or + * history from previous ops is used and none will be stored for future + * ops. Flush flag must be set to either FLUSH_FULL or FLUSH_FINAL. + */ + RTE_COMP_OP_STATEFUL + /**< There may be more data to be processed after this op, it's part of + * a stream of data. State and history from previous ops can be used + * and resulting state and history can be stored for future ops, + * depending on flush flag. + */ +}; + + +/** Parameters specific to the deflate algorithm */ +struct rte_comp_deflate_params { + enum rte_comp_huffman huffman; + /**< Compression huffman encoding type */ +}; + +/** Setup Data for compression */ +struct rte_comp_compress_xform { + enum rte_comp_algorithm algo; + /**< Algorithm to use for compress operation */ + union { + struct rte_comp_deflate_params deflate; + /**< Parameters specific to the deflate algorithm */ + }; /**< Algorithm specific parameters */ + int level; + /**< Compression level */ + uint8_t window_size; + /**< Base two log value of sliding window to be used. If window size + * can't be supported by the PMD then it may fall back to a smaller + * size. This is likely to result in a worse compression ratio. + */ + enum rte_comp_checksum_type chksum; + /**< Type of checksum to generate on the uncompressed data */ + enum rte_comp_hash_algorithm hash_algo; + /**< Hash algorithm to be used with compress operation. Hash is always + * done on plaintext. + */ +}; + +/** + * Setup Data for decompression. + */ +struct rte_comp_decompress_xform { + enum rte_comp_algorithm algo; + /**< Algorithm to use for decompression */ + enum rte_comp_checksum_type chksum; + /**< Type of checksum to generate on the decompressed data */ + uint8_t window_size; + /**< Base two log value of sliding window which was used to generate + * compressed data. If window size can't be supported by the PMD then + * setup of stream or private_xform should fail. + */ + enum rte_comp_hash_algorithm hash_algo; + /**< Hash algorithm to be used with decompress operation. Hash is always + * done on plaintext. + */ +}; + +/** + * Compression transform structure. + * + * This is used to specify the compression transforms required. + * Each transform structure can hold a single transform, the type field is + * used to specify which transform is contained within the union. + */ +struct rte_comp_xform { + enum rte_comp_xform_type type; + /**< xform type */ + union { + struct rte_comp_compress_xform compress; + /**< xform for compress operation */ + struct rte_comp_decompress_xform decompress; + /**< decompress xform */ + }; +}; + +/** + * Compression Operation. + * + * This structure contains data relating to performing a compression + * operation on the referenced mbuf data buffers. + * + * Comp operations are enqueued and dequeued in comp PMDs using the + * rte_compressdev_enqueue_burst() / rte_compressdev_dequeue_burst() APIs + */ +struct rte_comp_op { + enum rte_comp_op_type op_type; + union { + void *private_xform; + /**< Stateless private PMD data derived from an rte_comp_xform. + * A handle returned by rte_compressdev_private_xform_create() + * must be attached to operations of op_type RTE_COMP_STATELESS. + */ + void *stream; + /**< Private PMD data derived initially from an rte_comp_xform, + * which holds state and history data and evolves as operations + * are processed. rte_compressdev_stream_create() must be called + * on a device for all STATEFUL data streams and the resulting + * stream attached to the one or more operations associated + * with the data stream. + * All operations in a stream must be sent to the same device. + */ + }; + + struct rte_mempool *mempool; + /**< Pool from which operation is allocated */ + rte_iova_t iova_addr; + /**< IOVA address of this operation */ + struct rte_mbuf *m_src; + /**< source mbuf + * The total size of the input buffer(s) can be retrieved using + * rte_pktmbuf_data_len(m_src). The max data size which can fit in a + * single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1. + * If the input data is bigger than this it can be passed to the PMD in + * a chain of mbufs if the PMD's capabilities indicate it supports this. + */ + struct rte_mbuf *m_dst; + /**< destination mbuf + * The total size of the output buffer(s) can be retrieved using + * rte_pktmbuf_data_len(m_dst). The max data size which can fit in a + * single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1. + * If the output data is expected to be bigger than this a chain of + * mbufs can be passed to the PMD if the PMD's capabilities indicate + * it supports this. + */ + + struct { + uint32_t offset; + /**< Starting point for compression or decompression, + * specified as number of bytes from start of packet in + * source buffer. + * This offset starts from the first segment + * of the buffer, in case the m_src is a chain of mbufs. + * Starting point for checksum generation in compress direction. + */ + uint32_t length; + /**< The length, in bytes, of the data in source buffer + * to be compressed or decompressed. + * Also the length of the data over which the checksum + * should be generated in compress direction + */ + } src; + struct { + uint32_t offset; + /**< Starting point for writing output data, specified as + * number of bytes from start of packet in dest + * buffer. + * This offset starts from the first segment + * of the buffer, in case the m_dst is a chain of mbufs. + * Starting point for checksum generation in + * decompress direction. + */ + } dst; + struct { + uint8_t *digest; + /**< Output buffer to store hash output, if enabled in xform. + * Buffer would contain valid value only after an op with + * flush flag = RTE_COMP_FLUSH_FULL/FLUSH_FINAL is processed + * successfully. + * + * Length of buffer should be contiguous and large enough to + * accommodate digest produced by specific hash algo. + */ + rte_iova_t iova_addr; + /**< IO address of the buffer */ + } hash; + enum rte_comp_flush_flag flush_flag; + /**< Defines flush characteristics for the output data. + * Only applicable in compress direction + */ + uint64_t input_chksum; + /**< An input checksum can be provided to generate a + * cumulative checksum across sequential blocks in a STATELESS stream. + * Checksum type is as specified in xform chksum_type + */ + uint64_t output_chksum; + /**< If a checksum is generated it will be written in here. + * Checksum type is as specified in xform chksum_type. + */ + uint32_t consumed; + /**< The number of bytes from the source buffer + * which were compressed/decompressed. + */ + uint32_t produced; + /**< The number of bytes written to the destination buffer + * which were compressed/decompressed. + */ + uint64_t debug_status; + /**< + * Status of the operation is returned in the status param. + * This field allows the PMD to pass back extra + * pmd-specific debug information. Value is not defined on the API. + */ + uint8_t status; + /**< + * Operation status - use values from enum rte_comp_status. + * This is reset to + * RTE_COMP_OP_STATUS_NOT_PROCESSED on allocation from mempool and + * will be set to RTE_COMP_OP_STATUS_SUCCESS after operation + * is successfully processed by a PMD + */ +} __rte_cache_aligned; + +/** + * Creates an operation pool + * + * @param name + * Compress pool name + * @param nb_elts + * Number of elements in pool + * @param cache_size + * Number of elements to cache on lcore, see + * *rte_mempool_create* for further details about cache size + * @param user_size + * Size of private data to allocate for user with each operation + * @param socket_id + * Socket to identifier allocate memory on + * @return + * - On success pointer to mempool + * - On failure NULL + */ +struct rte_mempool * __rte_experimental +rte_comp_op_pool_create(const char *name, + unsigned int nb_elts, unsigned int cache_size, + uint16_t user_size, int socket_id); + +/** + * Allocate an operation from a mempool with default parameters set + * + * @param mempool + * Compress operation mempool + * + * @return + * - On success returns a valid rte_comp_op structure + * - On failure returns NULL + */ +struct rte_comp_op * __rte_experimental +rte_comp_op_alloc(struct rte_mempool *mempool); + +/** + * Bulk allocate operations from a mempool with default parameters set + * + * @param mempool + * Compress operation mempool + * @param ops + * Array to place allocated operations + * @param nb_ops + * Number of operations to allocate + * @return + * - 0: Success + * - -ENOENT: Not enough entries in the mempool; no ops are retrieved. + */ +int __rte_experimental +rte_comp_op_bulk_alloc(struct rte_mempool *mempool, + struct rte_comp_op **ops, uint16_t nb_ops); + +/** + * Free operation structure + * If operation has been allocate from a rte_mempool, then the operation will + * be returned to the mempool. + * + * @param op + * Compress operation + */ +void __rte_experimental +rte_comp_op_free(struct rte_comp_op *op); + +/** + * Get the name of a compress service feature flag + * + * @param flag + * The mask describing the flag + * + * @return + * The name of this flag, or NULL if it's not a valid feature flag. + */ +const char * __rte_experimental +rte_comp_get_feature_name(uint64_t flag); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_COMP_H_ */ diff --git a/lib/librte_compressdev/rte_compressdev.c b/lib/librte_compressdev/rte_compressdev.c new file mode 100644 index 00000000..9091dd6e --- /dev/null +++ b/lib/librte_compressdev/rte_compressdev.c @@ -0,0 +1,772 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "rte_compressdev.h" +#include "rte_compressdev_internal.h" +#include "rte_compressdev_pmd.h" + +#define RTE_COMPRESSDEV_DETACHED (0) +#define RTE_COMPRESSDEV_ATTACHED (1) + +struct rte_compressdev rte_comp_devices[RTE_COMPRESS_MAX_DEVS]; + +struct rte_compressdev *rte_compressdevs = &rte_comp_devices[0]; + +static struct rte_compressdev_global compressdev_globals = { + .devs = &rte_comp_devices[0], + .data = { NULL }, + .nb_devs = 0, + .max_devs = RTE_COMPRESS_MAX_DEVS +}; + +struct rte_compressdev_global *rte_compressdev_globals = &compressdev_globals; + +const struct rte_compressdev_capabilities * __rte_experimental +rte_compressdev_capability_get(uint8_t dev_id, + enum rte_comp_algorithm algo) +{ + const struct rte_compressdev_capabilities *capability; + struct rte_compressdev_info dev_info; + int i = 0; + + if (dev_id >= compressdev_globals.nb_devs) { + COMPRESSDEV_LOG(ERR, "Invalid dev_id=%d", dev_id); + return NULL; + } + rte_compressdev_info_get(dev_id, &dev_info); + + while ((capability = &dev_info.capabilities[i++])->algo != + RTE_COMP_ALGO_UNSPECIFIED){ + if (capability->algo == algo) + return capability; + } + + return NULL; +} + +const char * __rte_experimental +rte_compressdev_get_feature_name(uint64_t flag) +{ + switch (flag) { + case RTE_COMPDEV_FF_HW_ACCELERATED: + return "HW_ACCELERATED"; + case RTE_COMPDEV_FF_CPU_SSE: + return "CPU_SSE"; + case RTE_COMPDEV_FF_CPU_AVX: + return "CPU_AVX"; + case RTE_COMPDEV_FF_CPU_AVX2: + return "CPU_AVX2"; + case RTE_COMPDEV_FF_CPU_AVX512: + return "CPU_AVX512"; + case RTE_COMPDEV_FF_CPU_NEON: + return "CPU_NEON"; + default: + return NULL; + } +} + +static struct rte_compressdev * +rte_compressdev_get_dev(uint8_t dev_id) +{ + return &rte_compressdev_globals->devs[dev_id]; +} + +struct rte_compressdev * __rte_experimental +rte_compressdev_pmd_get_named_dev(const char *name) +{ + struct rte_compressdev *dev; + unsigned int i; + + if (name == NULL) + return NULL; + + for (i = 0; i < rte_compressdev_globals->max_devs; i++) { + dev = &rte_compressdev_globals->devs[i]; + + if ((dev->attached == RTE_COMPRESSDEV_ATTACHED) && + (strcmp(dev->data->name, name) == 0)) + return dev; + } + + return NULL; +} + +static unsigned int +rte_compressdev_is_valid_dev(uint8_t dev_id) +{ + struct rte_compressdev *dev = NULL; + + if (dev_id >= rte_compressdev_globals->nb_devs) + return 0; + + dev = rte_compressdev_get_dev(dev_id); + if (dev->attached != RTE_COMPRESSDEV_ATTACHED) + return 0; + else + return 1; +} + + +int __rte_experimental +rte_compressdev_get_dev_id(const char *name) +{ + unsigned int i; + + if (name == NULL) + return -1; + + for (i = 0; i < rte_compressdev_globals->nb_devs; i++) + if ((strcmp(rte_compressdev_globals->devs[i].data->name, name) + == 0) && + (rte_compressdev_globals->devs[i].attached == + RTE_COMPRESSDEV_ATTACHED)) + return i; + + return -1; +} + +uint8_t __rte_experimental +rte_compressdev_count(void) +{ + return rte_compressdev_globals->nb_devs; +} + +uint8_t __rte_experimental +rte_compressdev_devices_get(const char *driver_name, uint8_t *devices, + uint8_t nb_devices) +{ + uint8_t i, count = 0; + struct rte_compressdev *devs = rte_compressdev_globals->devs; + uint8_t max_devs = rte_compressdev_globals->max_devs; + + for (i = 0; i < max_devs && count < nb_devices; i++) { + + if (devs[i].attached == RTE_COMPRESSDEV_ATTACHED) { + int cmp; + + cmp = strncmp(devs[i].device->driver->name, + driver_name, + strlen(driver_name)); + + if (cmp == 0) + devices[count++] = devs[i].data->dev_id; + } + } + + return count; +} + +int __rte_experimental +rte_compressdev_socket_id(uint8_t dev_id) +{ + struct rte_compressdev *dev; + + if (!rte_compressdev_is_valid_dev(dev_id)) + return -1; + + dev = rte_compressdev_get_dev(dev_id); + + return dev->data->socket_id; +} + +static inline int +rte_compressdev_data_alloc(uint8_t dev_id, struct rte_compressdev_data **data, + int socket_id) +{ + char mz_name[RTE_COMPRESSDEV_NAME_MAX_LEN]; + const struct rte_memzone *mz; + int n; + + /* generate memzone name */ + n = snprintf(mz_name, sizeof(mz_name), + "rte_compressdev_data_%u", dev_id); + if (n >= (int)sizeof(mz_name)) + return -EINVAL; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + mz = rte_memzone_reserve(mz_name, + sizeof(struct rte_compressdev_data), + socket_id, 0); + } else + mz = rte_memzone_lookup(mz_name); + + if (mz == NULL) + return -ENOMEM; + + *data = mz->addr; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + memset(*data, 0, sizeof(struct rte_compressdev_data)); + + return 0; +} + +static uint8_t +rte_compressdev_find_free_device_index(void) +{ + uint8_t dev_id; + + for (dev_id = 0; dev_id < RTE_COMPRESS_MAX_DEVS; dev_id++) { + if (rte_comp_devices[dev_id].attached == + RTE_COMPRESSDEV_DETACHED) + return dev_id; + } + return RTE_COMPRESS_MAX_DEVS; +} + +struct rte_compressdev * __rte_experimental +rte_compressdev_pmd_allocate(const char *name, int socket_id) +{ + struct rte_compressdev *compressdev; + uint8_t dev_id; + + if (rte_compressdev_pmd_get_named_dev(name) != NULL) { + COMPRESSDEV_LOG(ERR, + "comp device with name %s already allocated!", name); + return NULL; + } + + dev_id = rte_compressdev_find_free_device_index(); + if (dev_id == RTE_COMPRESS_MAX_DEVS) { + COMPRESSDEV_LOG(ERR, "Reached maximum number of comp devices"); + return NULL; + } + compressdev = rte_compressdev_get_dev(dev_id); + + if (compressdev->data == NULL) { + struct rte_compressdev_data *compressdev_data = + compressdev_globals.data[dev_id]; + + int retval = rte_compressdev_data_alloc(dev_id, + &compressdev_data, socket_id); + + if (retval < 0 || compressdev_data == NULL) + return NULL; + + compressdev->data = compressdev_data; + + snprintf(compressdev->data->name, RTE_COMPRESSDEV_NAME_MAX_LEN, + "%s", name); + + compressdev->data->dev_id = dev_id; + compressdev->data->socket_id = socket_id; + compressdev->data->dev_started = 0; + + compressdev->attached = RTE_COMPRESSDEV_ATTACHED; + + compressdev_globals.nb_devs++; + } + + return compressdev; +} + +int __rte_experimental +rte_compressdev_pmd_release_device(struct rte_compressdev *compressdev) +{ + int ret; + + if (compressdev == NULL) + return -EINVAL; + + /* Close device only if device operations have been set */ + if (compressdev->dev_ops) { + ret = rte_compressdev_close(compressdev->data->dev_id); + if (ret < 0) + return ret; + } + + compressdev->attached = RTE_COMPRESSDEV_DETACHED; + compressdev_globals.nb_devs--; + return 0; +} + +uint16_t __rte_experimental +rte_compressdev_queue_pair_count(uint8_t dev_id) +{ + struct rte_compressdev *dev; + + dev = &rte_comp_devices[dev_id]; + return dev->data->nb_queue_pairs; +} + +static int +rte_compressdev_queue_pairs_config(struct rte_compressdev *dev, + uint16_t nb_qpairs, int socket_id) +{ + struct rte_compressdev_info dev_info; + void **qp; + unsigned int i; + + if ((dev == NULL) || (nb_qpairs < 1)) { + COMPRESSDEV_LOG(ERR, "invalid param: dev %p, nb_queues %u", + dev, nb_qpairs); + return -EINVAL; + } + + COMPRESSDEV_LOG(DEBUG, "Setup %d queues pairs on device %u", + nb_qpairs, dev->data->dev_id); + + memset(&dev_info, 0, sizeof(struct rte_compressdev_info)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + (*dev->dev_ops->dev_infos_get)(dev, &dev_info); + + if ((dev_info.max_nb_queue_pairs != 0) && + (nb_qpairs > dev_info.max_nb_queue_pairs)) { + COMPRESSDEV_LOG(ERR, "Invalid num queue_pairs (%u) for dev %u", + nb_qpairs, dev->data->dev_id); + return -EINVAL; + } + + if (dev->data->queue_pairs == NULL) { /* first time configuration */ + dev->data->queue_pairs = rte_zmalloc_socket( + "compressdev->queue_pairs", + sizeof(dev->data->queue_pairs[0]) * nb_qpairs, + RTE_CACHE_LINE_SIZE, socket_id); + + if (dev->data->queue_pairs == NULL) { + dev->data->nb_queue_pairs = 0; + COMPRESSDEV_LOG(ERR, + "failed to get memory for qp meta data, nb_queues %u", + nb_qpairs); + return -(ENOMEM); + } + } else { /* re-configure */ + int ret; + uint16_t old_nb_queues = dev->data->nb_queue_pairs; + + qp = dev->data->queue_pairs; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_release, + -ENOTSUP); + + for (i = nb_qpairs; i < old_nb_queues; i++) { + ret = (*dev->dev_ops->queue_pair_release)(dev, i); + if (ret < 0) + return ret; + } + + qp = rte_realloc(qp, sizeof(qp[0]) * nb_qpairs, + RTE_CACHE_LINE_SIZE); + if (qp == NULL) { + COMPRESSDEV_LOG(ERR, + "failed to realloc qp meta data, nb_queues %u", + nb_qpairs); + return -(ENOMEM); + } + + if (nb_qpairs > old_nb_queues) { + uint16_t new_qs = nb_qpairs - old_nb_queues; + + memset(qp + old_nb_queues, 0, + sizeof(qp[0]) * new_qs); + } + + dev->data->queue_pairs = qp; + + } + dev->data->nb_queue_pairs = nb_qpairs; + return 0; +} + +static int +rte_compressdev_queue_pairs_release(struct rte_compressdev *dev) +{ + uint16_t num_qps, i; + int ret; + + if (dev == NULL) { + COMPRESSDEV_LOG(ERR, "invalid param: dev %p", dev); + return -EINVAL; + } + + num_qps = dev->data->nb_queue_pairs; + + if (num_qps == 0) + return 0; + + COMPRESSDEV_LOG(DEBUG, "Free %d queues pairs on device %u", + dev->data->nb_queue_pairs, dev->data->dev_id); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_release, + -ENOTSUP); + + for (i = 0; i < num_qps; i++) { + ret = (*dev->dev_ops->queue_pair_release)(dev, i); + if (ret < 0) + return ret; + } + + if (dev->data->queue_pairs != NULL) + rte_free(dev->data->queue_pairs); + dev->data->queue_pairs = NULL; + dev->data->nb_queue_pairs = 0; + + return 0; +} + +int __rte_experimental +rte_compressdev_configure(uint8_t dev_id, struct rte_compressdev_config *config) +{ + struct rte_compressdev *dev; + int diag; + + if (!rte_compressdev_is_valid_dev(dev_id)) { + COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_comp_devices[dev_id]; + + if (dev->data->dev_started) { + COMPRESSDEV_LOG(ERR, + "device %d must be stopped to allow configuration", dev_id); + return -EBUSY; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP); + + /* Setup new number of queue pairs and reconfigure device. */ + diag = rte_compressdev_queue_pairs_config(dev, config->nb_queue_pairs, + config->socket_id); + if (diag != 0) { + COMPRESSDEV_LOG(ERR, + "dev%d rte_comp_dev_queue_pairs_config = %d", + dev_id, diag); + return diag; + } + + return (*dev->dev_ops->dev_configure)(dev, config); +} + +int __rte_experimental +rte_compressdev_start(uint8_t dev_id) +{ + struct rte_compressdev *dev; + int diag; + + COMPRESSDEV_LOG(DEBUG, "Start dev_id=%" PRIu8, dev_id); + + if (!rte_compressdev_is_valid_dev(dev_id)) { + COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_comp_devices[dev_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP); + + if (dev->data->dev_started != 0) { + COMPRESSDEV_LOG(ERR, + "Device with dev_id=%" PRIu8 " already started", dev_id); + return 0; + } + + diag = (*dev->dev_ops->dev_start)(dev); + if (diag == 0) + dev->data->dev_started = 1; + else + return diag; + + return 0; +} + +void __rte_experimental +rte_compressdev_stop(uint8_t dev_id) +{ + struct rte_compressdev *dev; + + if (!rte_compressdev_is_valid_dev(dev_id)) { + COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id); + return; + } + + dev = &rte_comp_devices[dev_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop); + + if (dev->data->dev_started == 0) { + COMPRESSDEV_LOG(ERR, + "Device with dev_id=%" PRIu8 " already stopped", dev_id); + return; + } + + (*dev->dev_ops->dev_stop)(dev); + dev->data->dev_started = 0; +} + +int __rte_experimental +rte_compressdev_close(uint8_t dev_id) +{ + struct rte_compressdev *dev; + int retval; + + if (!rte_compressdev_is_valid_dev(dev_id)) { + COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id); + return -1; + } + + dev = &rte_comp_devices[dev_id]; + + /* Device must be stopped before it can be closed */ + if (dev->data->dev_started == 1) { + COMPRESSDEV_LOG(ERR, "Device %u must be stopped before closing", + dev_id); + return -EBUSY; + } + + /* Free queue pairs memory */ + retval = rte_compressdev_queue_pairs_release(dev); + + if (retval < 0) + return retval; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_close, -ENOTSUP); + retval = (*dev->dev_ops->dev_close)(dev); + + if (retval < 0) + return retval; + + return 0; +} + +int __rte_experimental +rte_compressdev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id, + uint32_t max_inflight_ops, int socket_id) +{ + struct rte_compressdev *dev; + + if (!rte_compressdev_is_valid_dev(dev_id)) { + COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_comp_devices[dev_id]; + if (queue_pair_id >= dev->data->nb_queue_pairs) { + COMPRESSDEV_LOG(ERR, "Invalid queue_pair_id=%d", queue_pair_id); + return -EINVAL; + } + + if (dev->data->dev_started) { + COMPRESSDEV_LOG(ERR, + "device %d must be stopped to allow configuration", dev_id); + return -EBUSY; + } + + if (max_inflight_ops == 0) { + COMPRESSDEV_LOG(ERR, + "Invalid maximum number of inflight operations"); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_setup, -ENOTSUP); + + return (*dev->dev_ops->queue_pair_setup)(dev, queue_pair_id, + max_inflight_ops, socket_id); +} + +uint16_t __rte_experimental +rte_compressdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id, + struct rte_comp_op **ops, uint16_t nb_ops) +{ + struct rte_compressdev *dev = &rte_compressdevs[dev_id]; + + nb_ops = (*dev->dequeue_burst) + (dev->data->queue_pairs[qp_id], ops, nb_ops); + + return nb_ops; +} + +uint16_t __rte_experimental +rte_compressdev_enqueue_burst(uint8_t dev_id, uint16_t qp_id, + struct rte_comp_op **ops, uint16_t nb_ops) +{ + struct rte_compressdev *dev = &rte_compressdevs[dev_id]; + + return (*dev->enqueue_burst)( + dev->data->queue_pairs[qp_id], ops, nb_ops); +} + +int __rte_experimental +rte_compressdev_stats_get(uint8_t dev_id, struct rte_compressdev_stats *stats) +{ + struct rte_compressdev *dev; + + if (!rte_compressdev_is_valid_dev(dev_id)) { + COMPRESSDEV_LOG(ERR, "Invalid dev_id=%d", dev_id); + return -ENODEV; + } + + if (stats == NULL) { + COMPRESSDEV_LOG(ERR, "Invalid stats ptr"); + return -EINVAL; + } + + dev = &rte_comp_devices[dev_id]; + memset(stats, 0, sizeof(*stats)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP); + (*dev->dev_ops->stats_get)(dev, stats); + return 0; +} + +void __rte_experimental +rte_compressdev_stats_reset(uint8_t dev_id) +{ + struct rte_compressdev *dev; + + if (!rte_compressdev_is_valid_dev(dev_id)) { + COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id); + return; + } + + dev = &rte_comp_devices[dev_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->stats_reset); + (*dev->dev_ops->stats_reset)(dev); +} + + +void __rte_experimental +rte_compressdev_info_get(uint8_t dev_id, struct rte_compressdev_info *dev_info) +{ + struct rte_compressdev *dev; + + if (dev_id >= compressdev_globals.nb_devs) { + COMPRESSDEV_LOG(ERR, "Invalid dev_id=%d", dev_id); + return; + } + + dev = &rte_comp_devices[dev_id]; + + memset(dev_info, 0, sizeof(struct rte_compressdev_info)); + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); + (*dev->dev_ops->dev_infos_get)(dev, dev_info); + + dev_info->driver_name = dev->device->driver->name; +} + +int __rte_experimental +rte_compressdev_private_xform_create(uint8_t dev_id, + const struct rte_comp_xform *xform, + void **priv_xform) +{ + struct rte_compressdev *dev; + int ret; + + dev = rte_compressdev_get_dev(dev_id); + + if (xform == NULL || priv_xform == NULL || dev == NULL) + return -EINVAL; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->private_xform_create, -ENOTSUP); + ret = (*dev->dev_ops->private_xform_create)(dev, xform, priv_xform); + if (ret < 0) { + COMPRESSDEV_LOG(ERR, + "dev_id %d failed to create private_xform: err=%d", + dev_id, ret); + return ret; + }; + + return 0; +} + +int __rte_experimental +rte_compressdev_private_xform_free(uint8_t dev_id, void *priv_xform) +{ + struct rte_compressdev *dev; + int ret; + + dev = rte_compressdev_get_dev(dev_id); + + if (dev == NULL || priv_xform == NULL) + return -EINVAL; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->private_xform_free, -ENOTSUP); + ret = dev->dev_ops->private_xform_free(dev, priv_xform); + if (ret < 0) { + COMPRESSDEV_LOG(ERR, + "dev_id %d failed to free private xform: err=%d", + dev_id, ret); + return ret; + }; + + return 0; +} + +int __rte_experimental +rte_compressdev_stream_create(uint8_t dev_id, + const struct rte_comp_xform *xform, + void **stream) +{ + struct rte_compressdev *dev; + int ret; + + dev = rte_compressdev_get_dev(dev_id); + + if (xform == NULL || dev == NULL || stream == NULL) + return -EINVAL; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stream_create, -ENOTSUP); + ret = (*dev->dev_ops->stream_create)(dev, xform, stream); + if (ret < 0) { + COMPRESSDEV_LOG(ERR, + "dev_id %d failed to create stream: err=%d", + dev_id, ret); + return ret; + }; + + return 0; +} + + +int __rte_experimental +rte_compressdev_stream_free(uint8_t dev_id, void *stream) +{ + struct rte_compressdev *dev; + int ret; + + dev = rte_compressdev_get_dev(dev_id); + + if (dev == NULL || stream == NULL) + return -EINVAL; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stream_free, -ENOTSUP); + ret = dev->dev_ops->stream_free(dev, stream); + if (ret < 0) { + COMPRESSDEV_LOG(ERR, + "dev_id %d failed to free stream: err=%d", + dev_id, ret); + return ret; + }; + + return 0; +} + +const char * __rte_experimental +rte_compressdev_name_get(uint8_t dev_id) +{ + struct rte_compressdev *dev = rte_compressdev_get_dev(dev_id); + + if (dev == NULL) + return NULL; + + return dev->data->name; +} + +RTE_INIT(rte_compressdev_log) +{ + compressdev_logtype = rte_log_register("lib.compressdev"); + if (compressdev_logtype >= 0) + rte_log_set_level(compressdev_logtype, RTE_LOG_NOTICE); +} diff --git a/lib/librte_compressdev/rte_compressdev.h b/lib/librte_compressdev/rte_compressdev.h new file mode 100644 index 00000000..5b4fca4d --- /dev/null +++ b/lib/librte_compressdev/rte_compressdev.h @@ -0,0 +1,540 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#ifndef _RTE_COMPRESSDEV_H_ +#define _RTE_COMPRESSDEV_H_ + +/** + * @file rte_compressdev.h + * + * RTE Compression Device APIs + * + * Defines comp device APIs for the provisioning of compression operations. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "rte_comp.h" + +/** + * Parameter log base 2 range description. + * Final value will be 2^value. + */ +struct rte_param_log2_range { + uint8_t min; /**< Minimum log2 value */ + uint8_t max; /**< Maximum log2 value */ + uint8_t increment; + /**< If a range of sizes are supported, + * this parameter is used to indicate + * increments in base 2 log byte value + * that are supported between the minimum and maximum + */ +}; + +/** Structure used to capture a capability of a comp device */ +struct rte_compressdev_capabilities { + enum rte_comp_algorithm algo; + /* Compression algorithm */ + uint64_t comp_feature_flags; + /**< Bitmask of flags for compression service features */ + struct rte_param_log2_range window_size; + /**< Window size range in base two log byte values */ +}; + +/** Macro used at end of comp PMD list */ +#define RTE_COMP_END_OF_CAPABILITIES_LIST() \ + { RTE_COMP_ALGO_UNSPECIFIED } + +const struct rte_compressdev_capabilities * __rte_experimental +rte_compressdev_capability_get(uint8_t dev_id, + enum rte_comp_algorithm algo); + +/** + * compression device supported feature flags + * + * @note New features flags should be added to the end of the list + * + * Keep these flags synchronised with rte_compressdev_get_feature_name() + */ +#define RTE_COMPDEV_FF_HW_ACCELERATED (1ULL << 0) +/**< Operations are off-loaded to an external hardware accelerator */ +#define RTE_COMPDEV_FF_CPU_SSE (1ULL << 1) +/**< Utilises CPU SIMD SSE instructions */ +#define RTE_COMPDEV_FF_CPU_AVX (1ULL << 2) +/**< Utilises CPU SIMD AVX instructions */ +#define RTE_COMPDEV_FF_CPU_AVX2 (1ULL << 3) +/**< Utilises CPU SIMD AVX2 instructions */ +#define RTE_COMPDEV_FF_CPU_AVX512 (1ULL << 4) +/**< Utilises CPU SIMD AVX512 instructions */ +#define RTE_COMPDEV_FF_CPU_NEON (1ULL << 5) +/**< Utilises CPU NEON instructions */ + +/** + * Get the name of a compress device feature flag. + * + * @param flag + * The mask describing the flag + * + * @return + * The name of this flag, or NULL if it's not a valid feature flag. + */ +const char * __rte_experimental +rte_compressdev_get_feature_name(uint64_t flag); + +/** comp device information */ +struct rte_compressdev_info { + const char *driver_name; /**< Driver name. */ + uint64_t feature_flags; /**< Feature flags */ + const struct rte_compressdev_capabilities *capabilities; + /**< Array of devices supported capabilities */ + uint16_t max_nb_queue_pairs; + /**< Maximum number of queues pairs supported by device. + * (If 0, there is no limit in maximum number of queue pairs) + */ +}; + +/** comp device statistics */ +struct rte_compressdev_stats { + uint64_t enqueued_count; + /**< Count of all operations enqueued */ + uint64_t dequeued_count; + /**< Count of all operations dequeued */ + + uint64_t enqueue_err_count; + /**< Total error count on operations enqueued */ + uint64_t dequeue_err_count; + /**< Total error count on operations dequeued */ +}; + + +/** + * Get the device identifier for the named compress device. + * + * @param name + * Device name to select the device structure + * @return + * - Returns compress device identifier on success. + * - Return -1 on failure to find named compress device. + */ +int __rte_experimental +rte_compressdev_get_dev_id(const char *name); + +/** + * Get the compress device name given a device identifier. + * + * @param dev_id + * Compress device identifier + * @return + * - Returns compress device name. + * - Returns NULL if compress device is not present. + */ +const char * __rte_experimental +rte_compressdev_name_get(uint8_t dev_id); + +/** + * Get the total number of compress devices that have been successfully + * initialised. + * + * @return + * - The total number of usable compress devices. + */ +uint8_t __rte_experimental +rte_compressdev_count(void); + +/** + * Get number and identifiers of attached comp devices that + * use the same compress driver. + * + * @param driver_name + * Driver name + * @param devices + * Output devices identifiers + * @param nb_devices + * Maximal number of devices + * + * @return + * Returns number of attached compress devices. + */ +uint8_t __rte_experimental +rte_compressdev_devices_get(const char *driver_name, uint8_t *devices, + uint8_t nb_devices); + +/* + * Return the NUMA socket to which a device is connected. + * + * @param dev_id + * Compress device identifier + * @return + * The NUMA socket id to which the device is connected or + * a default of zero if the socket could not be determined. + * -1 if returned is the dev_id value is out of range. + */ +int __rte_experimental +rte_compressdev_socket_id(uint8_t dev_id); + +/** Compress device configuration structure */ +struct rte_compressdev_config { + int socket_id; + /**< Socket on which to allocate resources */ + uint16_t nb_queue_pairs; + /**< Total number of queue pairs to configure on a device */ + uint16_t max_nb_priv_xforms; + /**< Max number of private_xforms which will be created on the device */ + uint16_t max_nb_streams; + /**< Max number of streams which will be created on the device */ +}; + +/** + * Configure a device. + * + * This function must be invoked first before any other function in the + * API. This function can also be re-invoked when a device is in the + * stopped state. + * + * @param dev_id + * Compress device identifier + * @param config + * The compress device configuration + * @return + * - 0: Success, device configured. + * - <0: Error code returned by the driver configuration function. + */ +int __rte_experimental +rte_compressdev_configure(uint8_t dev_id, + struct rte_compressdev_config *config); + +/** + * Start a device. + * + * The device start step is called after configuring the device and setting up + * its queue pairs. + * On success, data-path functions exported by the API (enqueue/dequeue, etc) + * can be invoked. + * + * @param dev_id + * Compress device identifier + * @return + * - 0: Success, device started. + * - <0: Error code of the driver device start function. + */ +int __rte_experimental +rte_compressdev_start(uint8_t dev_id); + +/** + * Stop a device. The device can be restarted with a call to + * rte_compressdev_start() + * + * @param dev_id + * Compress device identifier + */ +void __rte_experimental +rte_compressdev_stop(uint8_t dev_id); + +/** + * Close an device. + * The memory allocated in the device gets freed. + * After calling this function, in order to use + * the device again, it is required to + * configure the device again. + * + * @param dev_id + * Compress device identifier + * + * @return + * - 0 on successfully closing device + * - <0 on failure to close device + */ +int __rte_experimental +rte_compressdev_close(uint8_t dev_id); + +/** + * Allocate and set up a receive queue pair for a device. + * This should only be called when the device is stopped. + * + * + * @param dev_id + * Compress device identifier + * @param queue_pair_id + * The index of the queue pairs to set up. The + * value must be in the range [0, nb_queue_pair - 1] + * previously supplied to rte_compressdev_configure() + * @param max_inflight_ops + * Max number of ops which the qp will have to + * accommodate simultaneously + * @param socket_id + * The *socket_id* argument is the socket identifier + * in case of NUMA. The value can be *SOCKET_ID_ANY* + * if there is no NUMA constraint for the DMA memory + * allocated for the receive queue pair + * @return + * - 0: Success, queue pair correctly set up. + * - <0: Queue pair configuration failed + */ +int __rte_experimental +rte_compressdev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id, + uint32_t max_inflight_ops, int socket_id); + +/** + * Get the number of queue pairs on a specific comp device + * + * @param dev_id + * Compress device identifier + * @return + * - The number of configured queue pairs. + */ +uint16_t __rte_experimental +rte_compressdev_queue_pair_count(uint8_t dev_id); + + +/** + * Retrieve the general I/O statistics of a device. + * + * @param dev_id + * The identifier of the device + * @param stats + * A pointer to a structure of type + * *rte_compressdev_stats* to be filled with the + * values of device counters + * @return + * - Zero if successful. + * - Non-zero otherwise. + */ +int __rte_experimental +rte_compressdev_stats_get(uint8_t dev_id, struct rte_compressdev_stats *stats); + +/** + * Reset the general I/O statistics of a device. + * + * @param dev_id + * The identifier of the device. + */ +void __rte_experimental +rte_compressdev_stats_reset(uint8_t dev_id); + +/** + * Retrieve the contextual information of a device. + * + * @param dev_id + * Compress device identifier + * @param dev_info + * A pointer to a structure of type *rte_compressdev_info* + * to be filled with the contextual information of the device + * + * @note The capabilities field of dev_info is set to point to the first + * element of an array of struct rte_compressdev_capabilities. + * The element after the last valid element has it's op field set to + * RTE_COMP_ALGO_LIST_END. + */ +void __rte_experimental +rte_compressdev_info_get(uint8_t dev_id, struct rte_compressdev_info *dev_info); + +/** + * + * Dequeue a burst of processed compression operations from a queue on the comp + * device. The dequeued operation are stored in *rte_comp_op* structures + * whose pointers are supplied in the *ops* array. + * + * The rte_compressdev_dequeue_burst() function returns the number of ops + * actually dequeued, which is the number of *rte_comp_op* data structures + * effectively supplied into the *ops* array. + * + * A return value equal to *nb_ops* indicates that the queue contained + * at least *nb_ops* operations, and this is likely to signify that other + * processed operations remain in the devices output queue. Applications + * implementing a "retrieve as many processed operations as possible" policy + * can check this specific case and keep invoking the + * rte_compressdev_dequeue_burst() function until a value less than + * *nb_ops* is returned. + * + * The rte_compressdev_dequeue_burst() function does not provide any error + * notification to avoid the corresponding overhead. + * + * @note: operation ordering is not maintained within the queue pair. + * + * @note: In case op status = OUT_OF_SPACE_TERMINATED, op.consumed=0 and the + * op must be resubmitted with the same input data and a larger output buffer. + * op.produced is usually 0, but in decompression cases a PMD may return > 0 + * and the application may find it useful to inspect that data. + * This status is only returned on STATELESS ops. + * + * @note: In case op status = OUT_OF_SPACE_RECOVERABLE, op.produced can be used + * and next op in stream should continue on from op.consumed+1 with a fresh + * output buffer. + * Consumed=0, produced=0 is an unusual but allowed case. There may be useful + * state/history stored in the PMD, even though no output was produced yet. + * + * + * @param dev_id + * Compress device identifier + * @param qp_id + * The index of the queue pair from which to retrieve + * processed operations. The value must be in the range + * [0, nb_queue_pair - 1] previously supplied to + * rte_compressdev_configure() + * @param ops + * The address of an array of pointers to + * *rte_comp_op* structures that must be + * large enough to store *nb_ops* pointers in it + * @param nb_ops + * The maximum number of operations to dequeue + * @return + * - The number of operations actually dequeued, which is the number + * of pointers to *rte_comp_op* structures effectively supplied to the + * *ops* array. + */ +uint16_t __rte_experimental +rte_compressdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id, + struct rte_comp_op **ops, uint16_t nb_ops); + +/** + * Enqueue a burst of operations for processing on a compression device. + * + * The rte_compressdev_enqueue_burst() function is invoked to place + * comp operations on the queue *qp_id* of the device designated by + * its *dev_id*. + * + * The *nb_ops* parameter is the number of operations to process which are + * supplied in the *ops* array of *rte_comp_op* structures. + * + * The rte_compressdev_enqueue_burst() function returns the number of + * operations it actually enqueued for processing. A return value equal to + * *nb_ops* means that all packets have been enqueued. + * + * @note All compression operations are Out-of-place (OOP) operations, + * as the size of the output data is different to the size of the input data. + * + * @note The flush flag only applies to operations which return SUCCESS. + * In OUT_OF_SPACE cases whether STATEFUL or STATELESS, data in dest buffer + * is as if flush flag was FLUSH_NONE. + * @note flush flag only applies in compression direction. It has no meaning + * for decompression. + * @note: operation ordering is not maintained within the queue pair. + * + * @param dev_id + * Compress device identifier + * @param qp_id + * The index of the queue pair on which operations + * are to be enqueued for processing. The value + * must be in the range [0, nb_queue_pairs - 1] + * previously supplied to *rte_compressdev_configure* + * @param ops + * The address of an array of *nb_ops* pointers + * to *rte_comp_op* structures which contain + * the operations to be processed + * @param nb_ops + * The number of operations to process + * @return + * The number of operations actually enqueued on the device. The return + * value can be less than the value of the *nb_ops* parameter when the + * comp devices queue is full or if invalid parameters are specified in + * a *rte_comp_op*. + */ +uint16_t __rte_experimental +rte_compressdev_enqueue_burst(uint8_t dev_id, uint16_t qp_id, + struct rte_comp_op **ops, uint16_t nb_ops); + +/** + * This should alloc a stream from the device's mempool and initialise it. + * The application should call this API when setting up for the stateful + * processing of a set of data on a device. The API can be called multiple + * times to set up a stream for each data set. The handle returned is only for + * use with ops of op_type STATEFUL and must be passed to the PMD + * with every op in the data stream + * + * @param dev_id + * Compress device identifier + * @param xform + * xform data + * @param stream + * Pointer to where PMD's private stream handle should be stored + * + * @return + * - 0 if successful and valid stream handle + * - <0 in error cases + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if comp device does not support STATEFUL operations. + * - Returns -ENOTSUP if comp device does not support the comp transform. + * - Returns -ENOMEM if the private stream could not be allocated. + * + */ +int __rte_experimental +rte_compressdev_stream_create(uint8_t dev_id, + const struct rte_comp_xform *xform, + void **stream); + +/** + * This should clear the stream and return it to the device's mempool. + * + * @param dev_id + * Compress device identifier + * + * @param stream + * PMD's private stream data + * + * @return + * - 0 if successful + * - <0 in error cases + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if comp device does not support STATEFUL operations. + * - Returns -EBUSY if can't free stream as there are inflight operations + */ +int __rte_experimental +rte_compressdev_stream_free(uint8_t dev_id, void *stream); + +/** + * This should alloc a private_xform from the device's mempool and initialise + * it. The application should call this API when setting up for stateless + * processing on a device. If it returns non-shareable, then the appl cannot + * share this handle with multiple in-flight ops and should call this API again + * to get a separate handle for every in-flight op. + * The handle returned is only valid for use with ops of op_type STATELESS. + * + * @param dev_id + * Compress device identifier + * @param xform + * xform data + * @param private_xform + * Pointer to where PMD's private_xform handle should be stored + * + * @return + * - if successful returns 0 + * and valid private_xform handle + * - <0 in error cases + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if comp device does not support the comp transform. + * - Returns -ENOMEM if the private_xform could not be allocated. + */ +int __rte_experimental +rte_compressdev_private_xform_create(uint8_t dev_id, + const struct rte_comp_xform *xform, + void **private_xform); + +/** + * This should clear the private_xform and return it to the device's mempool. + * It is the application's responsibility to ensure that private_xform data + * is not cleared while there are still in-flight operations using it. + * + * @param dev_id + * Compress device identifier + * + * @param private_xform + * PMD's private_xform data + * + * @return + * - 0 if successful + * - <0 in error cases + * - Returns -EINVAL if input parameters are invalid. + */ +int __rte_experimental +rte_compressdev_private_xform_free(uint8_t dev_id, void *private_xform); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_COMPRESSDEV_H_ */ diff --git a/lib/librte_compressdev/rte_compressdev_internal.h b/lib/librte_compressdev/rte_compressdev_internal.h new file mode 100644 index 00000000..22ceac66 --- /dev/null +++ b/lib/librte_compressdev/rte_compressdev_internal.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#ifndef _RTE_COMPRESSDEV_INTERNAL_H_ +#define _RTE_COMPRESSDEV_INTERNAL_H_ + +/* rte_compressdev_internal.h + * This file holds Compressdev private data structures. + */ +#include + +#include "rte_comp.h" + +#define RTE_COMPRESSDEV_NAME_MAX_LEN (64) +/**< Max length of name of comp PMD */ + +/* Logging Macros */ +extern int compressdev_logtype; +#define COMPRESSDEV_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, compressdev_logtype, "%s(): "fmt "\n", \ + __func__, ##args) + +/** + * Dequeue processed packets from queue pair of a device. + * + * @param qp + * The queue pair from which to retrieve + * processed operations. + * @param ops + * The address of an array of pointers to + * *rte_comp_op* structures that must be + * large enough to store *nb_ops* pointers in it + * @param nb_ops + * The maximum number of operations to dequeue + * @return + * - The number of operations actually dequeued, which is the number + * of pointers to *rte_comp_op* structures effectively supplied to the + * *ops* array. + */ +typedef uint16_t (*compressdev_dequeue_pkt_burst_t)(void *qp, + struct rte_comp_op **ops, uint16_t nb_ops); + +/** + * Enqueue a burst of operations for processing. + * + * @param qp + * The queue pair on which operations + * are to be enqueued for processing + * @param ops + * The address of an array of *nb_ops* pointers + * to *rte_comp_op* structures which contain + * the operations to be processed + * @param nb_ops + * The number of operations to process + * @return + * The number of operations actually enqueued on the device. The return + * value can be less than the value of the *nb_ops* parameter when the + * comp devices queue is full or if invalid parameters are specified in + * a *rte_comp_op*. + */ + +typedef uint16_t (*compressdev_enqueue_pkt_burst_t)(void *qp, + struct rte_comp_op **ops, uint16_t nb_ops); + +/** The data structure associated with each comp device. */ +struct rte_compressdev { + compressdev_dequeue_pkt_burst_t dequeue_burst; + /**< Pointer to PMD receive function */ + compressdev_enqueue_pkt_burst_t enqueue_burst; + /**< Pointer to PMD transmit function */ + + struct rte_compressdev_data *data; + /**< Pointer to device data */ + struct rte_compressdev_ops *dev_ops; + /**< Functions exported by PMD */ + uint64_t feature_flags; + /**< Supported features */ + struct rte_device *device; + /**< Backing device */ + + __extension__ + uint8_t attached : 1; + /**< Flag indicating the device is attached */ +} __rte_cache_aligned; + +/** + * + * The data part, with no function pointers, associated with each device. + * + * This structure is safe to place in shared memory to be common among + * different processes in a multi-process configuration. + */ +struct rte_compressdev_data { + uint8_t dev_id; + /**< Compress device identifier */ + uint8_t socket_id; + /**< Socket identifier where memory is allocated */ + char name[RTE_COMPRESSDEV_NAME_MAX_LEN]; + /**< Unique identifier name */ + + __extension__ + uint8_t dev_started : 1; + /**< Device state: STARTED(1)/STOPPED(0) */ + + void **queue_pairs; + /**< Array of pointers to queue pairs. */ + uint16_t nb_queue_pairs; + /**< Number of device queue pairs */ + + void *dev_private; + /**< PMD-specific private data */ +} __rte_cache_aligned; +#endif diff --git a/lib/librte_compressdev/rte_compressdev_pmd.c b/lib/librte_compressdev/rte_compressdev_pmd.c new file mode 100644 index 00000000..7de4f339 --- /dev/null +++ b/lib/librte_compressdev/rte_compressdev_pmd.c @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#include +#include +#include + +#include "rte_compressdev_internal.h" +#include "rte_compressdev_pmd.h" + +int compressdev_logtype; + +/** + * Parse name from argument + */ +static int +rte_compressdev_pmd_parse_name_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + struct rte_compressdev_pmd_init_params *params = extra_args; + int n; + + n = snprintf(params->name, RTE_COMPRESSDEV_NAME_MAX_LEN, "%s", value); + if (n >= RTE_COMPRESSDEV_NAME_MAX_LEN) + return -EINVAL; + + return 0; +} + +/** + * Parse unsigned integer from argument + */ +static int +rte_compressdev_pmd_parse_uint_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + int i; + char *end; + + errno = 0; + i = strtol(value, &end, 10); + if (*end != 0 || errno != 0 || i < 0) + return -EINVAL; + + *((uint32_t *)extra_args) = i; + return 0; +} + +int __rte_experimental +rte_compressdev_pmd_parse_input_args( + struct rte_compressdev_pmd_init_params *params, + const char *args) +{ + struct rte_kvargs *kvlist = NULL; + int ret = 0; + + if (params == NULL) + return -EINVAL; + + if (args) { + kvlist = rte_kvargs_parse(args, compressdev_pmd_valid_params); + if (kvlist == NULL) + return -EINVAL; + + ret = rte_kvargs_process(kvlist, + RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG, + &rte_compressdev_pmd_parse_uint_arg, + ¶ms->socket_id); + if (ret < 0) + goto free_kvlist; + + ret = rte_kvargs_process(kvlist, + RTE_COMPRESSDEV_PMD_NAME_ARG, + &rte_compressdev_pmd_parse_name_arg, + params); + if (ret < 0) + goto free_kvlist; + } + +free_kvlist: + rte_kvargs_free(kvlist); + return ret; +} + +struct rte_compressdev * __rte_experimental +rte_compressdev_pmd_create(const char *name, + struct rte_device *device, + size_t private_data_size, + struct rte_compressdev_pmd_init_params *params) +{ + struct rte_compressdev *compressdev; + + if (params->name[0] != '\0') { + COMPRESSDEV_LOG(INFO, "[%s] User specified device name = %s\n", + device->driver->name, params->name); + name = params->name; + } + + COMPRESSDEV_LOG(INFO, "[%s] - Creating compressdev %s\n", + device->driver->name, name); + + COMPRESSDEV_LOG(INFO, + "[%s] - Init parameters - name: %s, socket id: %d", + device->driver->name, name, + params->socket_id); + + /* allocate device structure */ + compressdev = rte_compressdev_pmd_allocate(name, params->socket_id); + if (compressdev == NULL) { + COMPRESSDEV_LOG(ERR, "[%s] Failed to allocate comp device %s", + device->driver->name, name); + return NULL; + } + + /* allocate private device structure */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + compressdev->data->dev_private = + rte_zmalloc_socket("compressdev device private", + private_data_size, + RTE_CACHE_LINE_SIZE, + params->socket_id); + + if (compressdev->data->dev_private == NULL) { + COMPRESSDEV_LOG(ERR, + "[%s] Cannot allocate memory for compressdev %s private data", + device->driver->name, name); + + rte_compressdev_pmd_release_device(compressdev); + return NULL; + } + } + + compressdev->device = device; + + return compressdev; +} + +int __rte_experimental +rte_compressdev_pmd_destroy(struct rte_compressdev *compressdev) +{ + int retval; + + COMPRESSDEV_LOG(INFO, "[%s] Closing comp device %s", + compressdev->device->driver->name, + compressdev->device->name); + + /* free comp device */ + retval = rte_compressdev_pmd_release_device(compressdev); + if (retval) + return retval; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(compressdev->data->dev_private); + + compressdev->device = NULL; + compressdev->data = NULL; + + return 0; +} diff --git a/lib/librte_compressdev/rte_compressdev_pmd.h b/lib/librte_compressdev/rte_compressdev_pmd.h new file mode 100644 index 00000000..38e9ea02 --- /dev/null +++ b/lib/librte_compressdev/rte_compressdev_pmd.h @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#ifndef _RTE_COMPRESSDEV_PMD_H_ +#define _RTE_COMPRESSDEV_PMD_H_ + +/** @file + * RTE comp PMD APIs + * + * @note + * These APIs are for comp PMDs only and user applications should not call + * them directly. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include +#include + +#include "rte_compressdev.h" +#include "rte_compressdev_internal.h" + +#define RTE_COMPRESSDEV_PMD_NAME_ARG ("name") +#define RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG ("socket_id") + +static const char * const compressdev_pmd_valid_params[] = { + RTE_COMPRESSDEV_PMD_NAME_ARG, + RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG +}; + +/** + * @internal + * Initialisation parameters for comp devices + */ +struct rte_compressdev_pmd_init_params { + char name[RTE_COMPRESSDEV_NAME_MAX_LEN]; + int socket_id; +}; + +/** Global structure used for maintaining state of allocated comp devices */ +struct rte_compressdev_global { + struct rte_compressdev *devs; /**< Device information array */ + struct rte_compressdev_data *data[RTE_COMPRESS_MAX_DEVS]; + /**< Device private data */ + uint8_t nb_devs; /**< Number of devices found */ + uint8_t max_devs; /**< Max number of devices */ +}; + +/** Pointer to global array of comp devices */ +extern struct rte_compressdev *rte_compressdevs; +/** Pointer to global comp devices data structure */ +extern struct rte_compressdev_global *rte_compressdev_globals; + +/** + * Get the rte_compressdev structure device pointer for the named device. + * + * @param name + * Compress device name + * @return + * - The rte_compressdev structure pointer for the given device identifier. + */ +struct rte_compressdev * __rte_experimental +rte_compressdev_pmd_get_named_dev(const char *name); + +/** + * Definitions of all functions exported by a driver through the + * the generic structure of type *comp_dev_ops* supplied in the + * *rte_compressdev* structure associated with a device. + */ + +/** + * Function used to configure device. + * + * @param dev + * Compress device + * @param config + * Compress device configurations + * @return + * Returns 0 on success + */ +typedef int (*compressdev_configure_t)(struct rte_compressdev *dev, + struct rte_compressdev_config *config); + +/** + * Function used to start a configured device. + * + * @param dev + * Compress device + * @return + * Returns 0 on success + */ +typedef int (*compressdev_start_t)(struct rte_compressdev *dev); + +/** + * Function used to stop a configured device. + * + * @param dev + * Compress device + */ +typedef void (*compressdev_stop_t)(struct rte_compressdev *dev); + +/** + * Function used to close a configured device. + * + * @param dev + * Compress device + * @return + * - 0 on success. + * - EAGAIN if can't close as device is busy + */ +typedef int (*compressdev_close_t)(struct rte_compressdev *dev); + + +/** + * Function used to get statistics of a device. + * + * @param dev + * Compress device + * @param stats + * Compress device stats to populate + */ +typedef void (*compressdev_stats_get_t)(struct rte_compressdev *dev, + struct rte_compressdev_stats *stats); + + +/** + * Function used to reset statistics of a device. + * + * @param dev + * Compress device + */ +typedef void (*compressdev_stats_reset_t)(struct rte_compressdev *dev); + + +/** + * Function used to get specific information of a device. + * + * @param dev + * Compress device + */ +typedef void (*compressdev_info_get_t)(struct rte_compressdev *dev, + struct rte_compressdev_info *dev_info); + +/** + * Setup a queue pair for a device. + * + * @param dev + * Compress device + * @param qp_id + * Queue pair identifier + * @param max_inflight_ops + * Max inflight ops which qp must accommodate + * @param socket_id + * Socket identifier + * @return + * Returns 0 on success. + */ +typedef int (*compressdev_queue_pair_setup_t)(struct rte_compressdev *dev, + uint16_t qp_id, uint32_t max_inflight_ops, int socket_id); + +/** + * Release memory resources allocated by given queue pair. + * + * @param dev + * Compress device + * @param qp_id + * Queue pair identifier + * @return + * - 0 on success. + * - EAGAIN if can't close as device is busy + */ +typedef int (*compressdev_queue_pair_release_t)(struct rte_compressdev *dev, + uint16_t qp_id); + +/** + * Get number of available queue pairs of a device. + * + * @param dev + * Compress device + * @return + * Returns number of queue pairs on success. + */ +typedef uint32_t (*compressdev_queue_pair_count_t)(struct rte_compressdev *dev); + +/** + * Create driver private stream data. + * + * @param dev + * Compressdev device + * @param xform + * xform data + * @param stream + * ptr where handle of pmd's private stream data should be stored + * @return + * - Returns 0 if private stream structure has been created successfully. + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if comp device does not support STATEFUL operations. + * - Returns -ENOTSUP if comp device does not support the comp transform. + * - Returns -ENOMEM if the private stream could not be allocated. + */ +typedef int (*compressdev_stream_create_t)(struct rte_compressdev *dev, + const struct rte_comp_xform *xform, void **stream); + +/** + * Free driver private stream data. + * + * @param dev + * Compressdev device + * @param stream + * handle of pmd's private stream data + * @return + * - 0 if successful + * - <0 in error cases + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if comp device does not support STATEFUL operations. + * - Returns -EBUSY if can't free stream as there are inflight operations + */ +typedef int (*compressdev_stream_free_t)(struct rte_compressdev *dev, + void *stream); + +/** + * Create driver private_xform data. + * + * @param dev + * Compressdev device + * @param xform + * xform data + * @param private_xform + * ptr where handle of pmd's private_xform data should be stored + * @return + * - if successful returns 0 + * and valid private_xform handle + * - <0 in error cases + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if comp device does not support the comp transform. + * - Returns -ENOMEM if the private_xform could not be allocated. + */ +typedef int (*compressdev_private_xform_create_t)(struct rte_compressdev *dev, + const struct rte_comp_xform *xform, void **private_xform); + +/** + * Free driver private_xform data. + * + * @param dev + * Compressdev device + * @param private_xform + * handle of pmd's private_xform data + * @return + * - 0 if successful + * - <0 in error cases + * - Returns -EINVAL if input parameters are invalid. + * - Returns -EBUSY if can't free private_xform due to inflight operations + */ +typedef int (*compressdev_private_xform_free_t)(struct rte_compressdev *dev, + void *private_xform); + +/** comp device operations function pointer table */ +struct rte_compressdev_ops { + compressdev_configure_t dev_configure; /**< Configure device. */ + compressdev_start_t dev_start; /**< Start device. */ + compressdev_stop_t dev_stop; /**< Stop device. */ + compressdev_close_t dev_close; /**< Close device. */ + + compressdev_info_get_t dev_infos_get; /**< Get device info. */ + + compressdev_stats_get_t stats_get; + /**< Get device statistics. */ + compressdev_stats_reset_t stats_reset; + /**< Reset device statistics. */ + + compressdev_queue_pair_setup_t queue_pair_setup; + /**< Set up a device queue pair. */ + compressdev_queue_pair_release_t queue_pair_release; + /**< Release a queue pair. */ + + compressdev_stream_create_t stream_create; + /**< Create a comp stream and initialise its private data. */ + compressdev_stream_free_t stream_free; + /**< Free a comp stream's private data. */ + + compressdev_private_xform_create_t private_xform_create; + /**< Create a comp private_xform and initialise its private data. */ + compressdev_private_xform_free_t private_xform_free; + /**< Free a comp private_xform's data. */ +}; + +/** + * @internal + * + * Function for internal use by dummy drivers primarily, e.g. ring-based + * driver. + * Allocates a new compressdev slot for an comp device and returns the pointer + * to that slot for the driver to use. + * + * @param name + * Unique identifier name for each device + * @param socket_id + * Socket to allocate resources on + * @return + * - Slot in the rte_dev_devices array for a new device; + */ +struct rte_compressdev * __rte_experimental +rte_compressdev_pmd_allocate(const char *name, int socket_id); + +/** + * @internal + * + * Function for internal use by dummy drivers primarily, e.g. ring-based + * driver. + * Release the specified compressdev device. + * + * @param dev + * Compress device + * @return + * - 0 on success, negative on error + */ +int __rte_experimental +rte_compressdev_pmd_release_device(struct rte_compressdev *dev); + + +/** + * @internal + * + * PMD assist function to parse initialisation arguments for comp driver + * when creating a new comp PMD device instance. + * + * PMD driver should set default values for that PMD before calling function, + * these default values will be over-written with successfully parsed values + * from args string. + * + * @param params + * Parsed PMD initialisation parameters + * @param args + * Input argument string to parse + * @return + * - 0 on success + * - errno on failure + */ +int __rte_experimental +rte_compressdev_pmd_parse_input_args( + struct rte_compressdev_pmd_init_params *params, + const char *args); + +/** + * @internal + * + * PMD assist function to provide boiler plate code for comp driver to create + * and allocate resources for a new comp PMD device instance. + * + * @param name + * Compress device name + * @param device + * Base device instance + * @param params + * PMD initialisation parameters + * @return + * - comp device instance on success + * - NULL on creation failure + */ +struct rte_compressdev * __rte_experimental +rte_compressdev_pmd_create(const char *name, + struct rte_device *device, + size_t private_data_size, + struct rte_compressdev_pmd_init_params *params); + +/** + * @internal + * + * PMD assist function to provide boiler plate code for comp driver to + * destroy and free resources associated with a comp PMD device instance. + * + * @param dev + * Compress device + * @return + * - 0 on success + * - errno on failure + */ +int __rte_experimental +rte_compressdev_pmd_destroy(struct rte_compressdev *dev); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_COMPRESSDEV_PMD_H_ */ diff --git a/lib/librte_compressdev/rte_compressdev_version.map b/lib/librte_compressdev/rte_compressdev_version.map new file mode 100644 index 00000000..6f900b67 --- /dev/null +++ b/lib/librte_compressdev/rte_compressdev_version.map @@ -0,0 +1,39 @@ +EXPERIMENTAL { + global: + + rte_compressdev_capability_get; + rte_compressdev_close; + rte_compressdev_configure; + rte_compressdev_count; + rte_compressdev_dequeue_burst; + rte_compressdev_devices_get; + rte_compressdev_enqueue_burst; + rte_compressdev_get_dev_id; + rte_compressdev_get_feature_name; + rte_compressdev_info_get; + rte_compressdev_name_get; + rte_compressdev_pmd_allocate; + rte_compressdev_pmd_create; + rte_compressdev_pmd_destroy; + rte_compressdev_pmd_get_named_dev; + rte_compressdev_pmd_parse_input_args; + rte_compressdev_pmd_release_device; + rte_compressdev_private_xform_create; + rte_compressdev_private_xform_free; + rte_compressdev_queue_pair_count; + rte_compressdev_queue_pair_setup; + rte_compressdev_socket_id; + rte_compressdev_start; + rte_compressdev_stats_get; + rte_compressdev_stats_reset; + rte_compressdev_stop; + rte_compressdev_stream_create; + rte_compressdev_stream_free; + rte_comp_get_feature_name; + rte_comp_op_alloc; + rte_comp_op_bulk_alloc; + rte_comp_op_free; + rte_comp_op_pool_create; + + local: *; +}; diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile index bba8dee9..c1148887 100644 --- a/lib/librte_cryptodev/Makefile +++ b/lib/librte_cryptodev/Makefile @@ -23,6 +23,7 @@ SYMLINK-y-include += rte_crypto.h SYMLINK-y-include += rte_crypto_sym.h SYMLINK-y-include += rte_cryptodev.h SYMLINK-y-include += rte_cryptodev_pmd.h +SYMLINK-y-include += rte_crypto_asym.h # versioning export map EXPORT_MAP := rte_cryptodev_version.map diff --git a/lib/librte_cryptodev/meson.build b/lib/librte_cryptodev/meson.build index 234da323..295f509e 100644 --- a/lib/librte_cryptodev/meson.build +++ b/lib/librte_cryptodev/meson.build @@ -1,10 +1,11 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -version = 3 +version = 4 sources = files('rte_cryptodev.c', 'rte_cryptodev_pmd.c') headers = files('rte_cryptodev.h', 'rte_cryptodev_pmd.h', 'rte_crypto.h', - 'rte_crypto_sym.h') + 'rte_crypto_sym.h', + 'rte_crypto_asym.h') deps += ['kvargs', 'mbuf'] diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h index 95cf8615..fd5ef3a8 100644 --- a/lib/librte_cryptodev/rte_crypto.h +++ b/lib/librte_cryptodev/rte_crypto.h @@ -23,6 +23,7 @@ extern "C" { #include #include "rte_crypto_sym.h" +#include "rte_crypto_asym.h" /** Crypto operation types */ enum rte_crypto_op_type { @@ -30,6 +31,8 @@ enum rte_crypto_op_type { /**< Undefined operation type */ RTE_CRYPTO_OP_TYPE_SYMMETRIC, /**< Symmetric operation */ + RTE_CRYPTO_OP_TYPE_ASYMMETRIC + /**< Asymmetric operation */ }; /** Status of crypto operation */ @@ -73,20 +76,37 @@ enum rte_crypto_op_sess_type { * rte_cryptodev_enqueue_burst() / rte_cryptodev_dequeue_burst() . */ struct rte_crypto_op { - uint8_t type; - /**< operation type */ - uint8_t status; - /**< - * operation status - this is reset to - * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation from mempool and - * will be set to RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation - * is successfully processed by a crypto PMD - */ - uint8_t sess_type; - /**< operation session type */ - - uint8_t reserved[5]; - /**< Reserved bytes to fill 64 bits for future additions */ + __extension__ + union { + uint64_t raw; + __extension__ + struct { + uint8_t type; + /**< operation type */ + uint8_t status; + /**< + * operation status - this is reset to + * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation + * from mempool and will be set to + * RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation + * is successfully processed by a crypto PMD + */ + uint8_t sess_type; + /**< operation session type */ + uint8_t reserved[3]; + /**< Reserved bytes to fill 64 bits for + * future additions + */ + uint16_t private_data_offset; + /**< Offset to indicate start of private data (if any). + * The offset is counted from the start of the + * rte_crypto_op including IV. + * The private data may be used by the application + * to store information which should remain untouched + * in the library/driver + */ + }; + }; struct rte_mempool *mempool; /**< crypto operation mempool which operation is allocated from */ @@ -97,6 +117,10 @@ struct rte_crypto_op { union { struct rte_crypto_sym_op sym[0]; /**< Symmetric operation parameters */ + + struct rte_crypto_asym_op asym[0]; + /**< Asymmetric operation parameters */ + }; /**< operation specific parameters */ }; @@ -117,6 +141,9 @@ __rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type) case RTE_CRYPTO_OP_TYPE_SYMMETRIC: __rte_crypto_sym_op_reset(op->sym); break; + case RTE_CRYPTO_OP_TYPE_ASYMMETRIC: + memset(op->asym, 0, sizeof(struct rte_crypto_asym_op)); + break; case RTE_CRYPTO_OP_TYPE_UNDEFINED: default: break; @@ -283,9 +310,14 @@ __rte_crypto_op_get_priv_data(struct rte_crypto_op *op, uint32_t size) if (likely(op->mempool != NULL)) { priv_size = __rte_crypto_op_get_priv_data_size(op->mempool); - if (likely(priv_size >= size)) - return (void *)((uint8_t *)(op + 1) + + if (likely(priv_size >= size)) { + if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) + return (void *)((uint8_t *)(op + 1) + sizeof(struct rte_crypto_sym_op)); + if (op->type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) + return (void *)((uint8_t *)(op + 1) + + sizeof(struct rte_crypto_asym_op)); + } } return NULL; @@ -388,6 +420,24 @@ rte_crypto_op_attach_sym_session(struct rte_crypto_op *op, return __rte_crypto_sym_op_attach_sym_session(op->sym, sess); } +/** + * Attach a asymmetric session to a crypto operation + * + * @param op crypto operation, must be of type asymmetric + * @param sess cryptodev session + */ +static inline int +rte_crypto_op_attach_asym_session(struct rte_crypto_op *op, + struct rte_cryptodev_asym_session *sess) +{ + if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_ASYMMETRIC)) + return -1; + + op->sess_type = RTE_CRYPTO_OP_WITH_SESSION; + op->asym->session = sess; + return 0; +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_cryptodev/rte_crypto_asym.h b/lib/librte_cryptodev/rte_crypto_asym.h new file mode 100644 index 00000000..5e185b2d --- /dev/null +++ b/lib/librte_cryptodev/rte_crypto_asym.h @@ -0,0 +1,496 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Cavium Networks + */ + +#ifndef _RTE_CRYPTO_ASYM_H_ +#define _RTE_CRYPTO_ASYM_H_ + +/** + * @file rte_crypto_asym.h + * + * RTE Definitions for Asymmetric Cryptography + * + * Defines asymmetric algorithms and modes, as well as supported + * asymmetric crypto operations. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include +#include +#include + +typedef struct rte_crypto_param_t { + uint8_t *data; + /**< pointer to buffer holding data */ + rte_iova_t iova; + /**< IO address of data buffer */ + size_t length; + /**< length of data in bytes */ +} rte_crypto_param; + +/** asym xform type name strings */ +extern const char * +rte_crypto_asym_xform_strings[]; + +/** asym operations type name strings */ +extern const char * +rte_crypto_asym_op_strings[]; + +/** + * Asymmetric crypto transformation types. + * Each xform type maps to one asymmetric algorithm + * performing specific operation + * + */ +enum rte_crypto_asym_xform_type { + RTE_CRYPTO_ASYM_XFORM_UNSPECIFIED = 0, + /**< Invalid xform. */ + RTE_CRYPTO_ASYM_XFORM_NONE, + /**< Xform type None. + * May be supported by PMD to support + * passthrough op for debugging purpose. + * if xform_type none , op_type is disregarded. + */ + RTE_CRYPTO_ASYM_XFORM_RSA, + /**< RSA. Performs Encrypt, Decrypt, Sign and Verify. + * Refer to rte_crypto_asym_op_type + */ + RTE_CRYPTO_ASYM_XFORM_DH, + /**< Diffie-Hellman. + * Performs Key Generate and Shared Secret Compute. + * Refer to rte_crypto_asym_op_type + */ + RTE_CRYPTO_ASYM_XFORM_DSA, + /**< Digital Signature Algorithm + * Performs Signature Generation and Verification. + * Refer to rte_crypto_asym_op_type + */ + RTE_CRYPTO_ASYM_XFORM_MODINV, + /**< Modular Inverse + * Perform Modulus inverse b^(-1) mod n + */ + RTE_CRYPTO_ASYM_XFORM_MODEX, + /**< Modular Exponentiation + * Perform Modular Exponentiation b^e mod n + */ + RTE_CRYPTO_ASYM_XFORM_TYPE_LIST_END + /**< End of list */ +}; + +/** + * Asymmetric crypto operation type variants + */ +enum rte_crypto_asym_op_type { + RTE_CRYPTO_ASYM_OP_ENCRYPT, + /**< Asymmetric Encrypt operation */ + RTE_CRYPTO_ASYM_OP_DECRYPT, + /**< Asymmetric Decrypt operation */ + RTE_CRYPTO_ASYM_OP_SIGN, + /**< Signature Generation operation */ + RTE_CRYPTO_ASYM_OP_VERIFY, + /**< Signature Verification operation */ + RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE, + /**< DH Private Key generation operation */ + RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE, + /**< DH Public Key generation operation */ + RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE, + /**< DH Shared Secret compute operation */ + RTE_CRYPTO_ASYM_OP_LIST_END +}; + +/** + * Padding types for RSA signature. + */ +enum rte_crypto_rsa_padding_type { + RTE_CRYPTO_RSA_PADDING_NONE = 0, + /**< RSA no padding scheme */ + RTE_CRYPTO_RSA_PKCS1_V1_5_BT0, + /**< RSA PKCS#1 V1.5 Block Type 0 padding scheme + * as descibed in rfc2313 + */ + RTE_CRYPTO_RSA_PKCS1_V1_5_BT1, + /**< RSA PKCS#1 V1.5 Block Type 01 padding scheme + * as descibed in rfc2313 + */ + RTE_CRYPTO_RSA_PKCS1_V1_5_BT2, + /**< RSA PKCS#1 V1.5 Block Type 02 padding scheme + * as descibed in rfc2313 + */ + RTE_CRYPTO_RSA_PADDING_OAEP, + /**< RSA PKCS#1 OAEP padding scheme */ + RTE_CRYPTO_RSA_PADDING_PSS, + /**< RSA PKCS#1 PSS padding scheme */ + RTE_CRYPTO_RSA_PADDING_TYPE_LIST_END +}; + +/** + * RSA private key type enumeration + * + * enumerates private key format required to perform RSA crypto + * transform. + * + */ +enum rte_crypto_rsa_priv_key_type { + RTE_RSA_KEY_TYPE_EXP, + /**< RSA private key is an exponent */ + RTE_RSA_KET_TYPE_QT, + /**< RSA private key is in quintuple format + * See rte_crypto_rsa_priv_key_qt + */ +}; + +/** + * Structure describing RSA private key in quintuple format. + * See PKCS V1.5 RSA Cryptography Standard. + */ +struct rte_crypto_rsa_priv_key_qt { + rte_crypto_param p; + /**< p - Private key component P + * Private key component of RSA parameter required for CRT method + * of private key operations in Octet-string network byte order + * format. + */ + + rte_crypto_param q; + /**< q - Private key component Q + * Private key component of RSA parameter required for CRT method + * of private key operations in Octet-string network byte order + * format. + */ + + rte_crypto_param dP; + /**< dP - Private CRT component + * Private CRT component of RSA parameter required for CRT method + * RSA private key operations in Octet-string network byte order + * format. + * dP = d mod ( p - 1 ) + */ + + rte_crypto_param dQ; + /**< dQ - Private CRT component + * Private CRT component of RSA parameter required for CRT method + * RSA private key operations in Octet-string network byte order + * format. + * dQ = d mod ( q - 1 ) + */ + + rte_crypto_param qInv; + /**< qInv - Private CRT component + * Private CRT component of RSA parameter required for CRT method + * RSA private key operations in Octet-string network byte order + * format. + * qInv = inv q mod p + */ +}; + +/** + * Asymmetric RSA transform data + * + * Structure describing RSA xform params + * + */ +struct rte_crypto_rsa_xform { + rte_crypto_param n; + /**< n - Prime modulus + * Prime modulus data of RSA operation in Octet-string network + * byte order format. + */ + + rte_crypto_param e; + /**< e - Public key exponent + * Public key exponent used for RSA public key operations in Octet- + * string network byte order format. + */ + + enum rte_crypto_rsa_priv_key_type key_type; + + __extension__ + union { + rte_crypto_param d; + /**< d - Private key exponent + * Private key exponent used for RSA + * private key operations in + * Octet-string network byte order format. + */ + + struct rte_crypto_rsa_priv_key_qt qt; + /**< qt - Private key in quintuple format */ + }; +}; + +/** + * Asymmetric Modular exponentiation transform data + * + * Structure describing modular exponentation xform param + * + */ +struct rte_crypto_modex_xform { + rte_crypto_param modulus; + /**< modulus + * Prime modulus of the modexp transform operation in octet-string + * network byte order format. + */ + + rte_crypto_param exponent; + /**< exponent + * Private exponent of the modexp transform operation in + * octet-string network byte order format. + */ +}; + +/** + * Asymmetric modular inverse transform operation + * + * Structure describing modulus inverse xform params + * + */ +struct rte_crypto_modinv_xform { + rte_crypto_param modulus; + /**< + * Pointer to the prime modulus data for modular + * inverse operation in octet-string network byte + * order format. + */ +}; + +/** + * Asymmetric DH transform data + * + * Structure describing deffie-hellman xform params + * + */ +struct rte_crypto_dh_xform { + enum rte_crypto_asym_op_type type; + /**< Setup xform for key generate or shared secret compute */ + + rte_crypto_param p; + /**< p : Prime modulus data + * DH prime modulous data in octet-string network byte order format. + * + */ + + rte_crypto_param g; + /**< g : Generator + * DH group generator data in octet-string network byte order + * format. + * + */ +}; + +/** + * Asymmetric Digital Signature transform operation + * + * Structure describing DSA xform params + * + */ +struct rte_crypto_dsa_xform { + rte_crypto_param p; + /**< p - Prime modulus + * Prime modulus data for DSA operation in Octet-string network byte + * order format. + */ + rte_crypto_param q; + /**< q : Order of the subgroup. + * Order of the subgroup data in Octet-string network byte order + * format. + * (p-1) % q = 0 + */ + rte_crypto_param g; + /**< g: Generator of the subgroup + * Generator data in Octet-string network byte order format. + */ + rte_crypto_param x; + /**< x: Private key of the signer in octet-string network + * byte order format. + * Used when app has pre-defined private key. + * Valid only when xform chain is DSA ONLY. + * if xform chain is DH private key generate + DSA, then DSA sign + * compute will use internally generated key. + */ +}; + +/** + * Operations params for modular operations: + * exponentiation and invert + * + */ +struct rte_crypto_mod_op_param { + rte_crypto_param base; + /**< + * Pointer to base of modular exponentiation/inversion data in + * Octet-string network byte order format. + */ +}; + +/** + * Asymmetric crypto transform data + * + * Structure describing asym xforms. + */ +struct rte_crypto_asym_xform { + struct rte_crypto_asym_xform *next; + /**< Pointer to next xform to set up xform chain.*/ + enum rte_crypto_asym_xform_type xform_type; + /**< Asymmetric crypto transform */ + + __extension__ + union { + struct rte_crypto_rsa_xform rsa; + /**< RSA xform parameters */ + + struct rte_crypto_modex_xform modex; + /**< Modular Exponentiation xform parameters */ + + struct rte_crypto_modinv_xform modinv; + /**< Modulus Inverse xform parameters */ + + struct rte_crypto_dh_xform dh; + /**< DH xform parameters */ + + struct rte_crypto_dsa_xform dsa; + /**< DSA xform parameters */ + }; +}; + +struct rte_cryptodev_asym_session; + +/** + * RSA operation params + * + */ +struct rte_crypto_rsa_op_param { + enum rte_crypto_asym_op_type op_type; + /**< Type of RSA operation for transform */; + + rte_crypto_param message; + /**< + * Pointer to data + * - to be encrypted for RSA public encrypt. + * - to be decrypted for RSA private decrypt. + * - to be signed for RSA sign generation. + * - to be authenticated for RSA sign verification. + */ + + rte_crypto_param sign; + /**< + * Pointer to RSA signature data. If operation is RSA + * sign @ref RTE_CRYPTO_ASYM_OP_SIGN, buffer will be + * over-written with generated signature. + * + * Length of the signature data will be equal to the + * RSA prime modulus length. + */ + + enum rte_crypto_rsa_padding_type pad; + /**< RSA padding scheme to be used for transform */ + + enum rte_crypto_auth_algorithm md; + /**< Hash algorithm to be used for data hash if padding + * scheme is either OAEP or PSS. Valid hash algorithms + * are: + * MD5, SHA1, SHA224, SHA256, SHA384, SHA512 + */ + + enum rte_crypto_auth_algorithm mgf1md; + /**< + * Hash algorithm to be used for mask generation if + * padding scheme is either OAEP or PSS. If padding + * scheme is unspecified data hash algorithm is used + * for mask generation. Valid hash algorithms are: + * MD5, SHA1, SHA224, SHA256, SHA384, SHA512 + */ +}; + +/** + * Diffie-Hellman Operations params. + * @note: + */ +struct rte_crypto_dh_op_param { + rte_crypto_param pub_key; + /**< + * Output generated public key when xform type is + * DH PUB_KEY_GENERATION. + * Input peer public key when xform type is DH + * SHARED_SECRET_COMPUTATION + * pub_key is in octet-string network byte order format. + * + */ + + rte_crypto_param priv_key; + /**< + * Output generated private key if xform type is + * DH PRIVATE_KEY_GENERATION + * Input when xform type is DH SHARED_SECRET_COMPUTATION. + * priv_key is in octet-string network byte order format. + * + */ + + rte_crypto_param shared_secret; + /**< + * Output with calculated shared secret + * when dh xform set up with op type = SHARED_SECRET_COMPUTATION. + * shared_secret is an octet-string network byte order format. + * + */ +}; + +/** + * DSA Operations params + * + */ +struct rte_crypto_dsa_op_param { + enum rte_crypto_asym_op_type op_type; + /**< Signature Generation or Verification */ + rte_crypto_param message; + /**< input message to be signed or verified */ + rte_crypto_param r; + /**< dsa sign component 'r' value + * + * output if op_type = sign generate, + * input if op_type = sign verify + */ + rte_crypto_param s; + /**< dsa sign component 's' value + * + * output if op_type = sign generate, + * input if op_type = sign verify + */ + rte_crypto_param y; + /**< y : Public key of the signer. + * Public key data of the signer in Octet-string network byte order + * format. + * y = g^x mod p + */ +}; + +/** + * Asymmetric Cryptographic Operation. + * + * Structure describing asymmetric crypto operation params. + * + */ +struct rte_crypto_asym_op { + struct rte_cryptodev_asym_session *session; + /**< Handle for the initialised session context */ + + __extension__ + union { + struct rte_crypto_rsa_op_param rsa; + struct rte_crypto_mod_op_param modex; + struct rte_crypto_mod_op_param modinv; + struct rte_crypto_dh_op_param dh; + struct rte_crypto_dsa_op_param dsa; + }; +} __rte_cache_aligned; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CRYPTO_ASYM_H_ */ diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h index 60797e9c..eb5afc5e 100644 --- a/lib/librte_cryptodev/rte_crypto_sym.h +++ b/lib/librte_cryptodev/rte_crypto_sym.h @@ -245,6 +245,23 @@ enum rte_crypto_auth_algorithm { RTE_CRYPTO_AUTH_ZUC_EIA3, /**< ZUC algorithm in EIA3 mode */ + RTE_CRYPTO_AUTH_SHA3_224, + /**< 224 bit SHA3 algorithm. */ + RTE_CRYPTO_AUTH_SHA3_224_HMAC, + /**< HMAC using 224 bit SHA3 algorithm. */ + RTE_CRYPTO_AUTH_SHA3_256, + /**< 256 bit SHA3 algorithm. */ + RTE_CRYPTO_AUTH_SHA3_256_HMAC, + /**< HMAC using 256 bit SHA3 algorithm. */ + RTE_CRYPTO_AUTH_SHA3_384, + /**< 384 bit SHA3 algorithm. */ + RTE_CRYPTO_AUTH_SHA3_384_HMAC, + /**< HMAC using 384 bit SHA3 algorithm. */ + RTE_CRYPTO_AUTH_SHA3_512, + /**< 512 bit SHA3 algorithm. */ + RTE_CRYPTO_AUTH_SHA3_512_HMAC, + /**< HMAC using 512 bit SHA3 algorithm. */ + RTE_CRYPTO_AUTH_LIST_END }; diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c index 8745b6b0..63ae23f0 100644 --- a/lib/librte_cryptodev/rte_cryptodev.c +++ b/lib/librte_cryptodev/rte_cryptodev.c @@ -166,6 +166,31 @@ rte_crypto_aead_operation_strings[] = { [RTE_CRYPTO_AEAD_OP_DECRYPT] = "decrypt" }; +/** + * Asymmetric crypto transform operation strings identifiers. + */ +const char *rte_crypto_asym_xform_strings[] = { + [RTE_CRYPTO_ASYM_XFORM_NONE] = "none", + [RTE_CRYPTO_ASYM_XFORM_RSA] = "rsa", + [RTE_CRYPTO_ASYM_XFORM_MODEX] = "modexp", + [RTE_CRYPTO_ASYM_XFORM_MODINV] = "modinv", + [RTE_CRYPTO_ASYM_XFORM_DH] = "dh", + [RTE_CRYPTO_ASYM_XFORM_DSA] = "dsa", +}; + +/** + * Asymmetric crypto operation strings identifiers. + */ +const char *rte_crypto_asym_op_strings[] = { + [RTE_CRYPTO_ASYM_OP_ENCRYPT] = "encrypt", + [RTE_CRYPTO_ASYM_OP_DECRYPT] = "decrypt", + [RTE_CRYPTO_ASYM_OP_SIGN] = "sign", + [RTE_CRYPTO_ASYM_OP_VERIFY] = "verify", + [RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE] = "priv_key_generate", + [RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE] = "pub_key_generate", + [RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE] = "sharedsecret_compute", +}; + int rte_cryptodev_get_cipher_algo_enum(enum rte_crypto_cipher_algorithm *algo_enum, const char *algo_string) @@ -217,6 +242,24 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum, return -1; } +int __rte_experimental +rte_cryptodev_asym_get_xform_enum(enum rte_crypto_asym_xform_type *xform_enum, + const char *xform_string) +{ + unsigned int i; + + for (i = 1; i < RTE_DIM(rte_crypto_asym_xform_strings); i++) { + if (strcmp(xform_string, + rte_crypto_asym_xform_strings[i]) == 0) { + *xform_enum = (enum rte_crypto_asym_xform_type) i; + return 0; + } + } + + /* Invalid string */ + return -1; +} + /** * The crypto auth operation strings identifiers. * It could be used in application command line. @@ -262,19 +305,62 @@ rte_cryptodev_sym_capability_get(uint8_t dev_id, } -#define param_range_check(x, y) \ - (((x < y.min) || (x > y.max)) || \ - (y.increment != 0 && (x % y.increment) != 0)) +static int +param_range_check(uint16_t size, const struct rte_crypto_param_range *range) +{ + unsigned int next_size; + + /* Check lower/upper bounds */ + if (size < range->min) + return -1; + + if (size > range->max) + return -1; + + /* If range is actually only one value, size is correct */ + if (range->increment == 0) + return 0; + + /* Check if value is one of the supported sizes */ + for (next_size = range->min; next_size <= range->max; + next_size += range->increment) + if (size == next_size) + return 0; + + return -1; +} + +const struct rte_cryptodev_asymmetric_xform_capability * __rte_experimental +rte_cryptodev_asym_capability_get(uint8_t dev_id, + const struct rte_cryptodev_asym_capability_idx *idx) +{ + const struct rte_cryptodev_capabilities *capability; + struct rte_cryptodev_info dev_info; + unsigned int i = 0; + + memset(&dev_info, 0, sizeof(struct rte_cryptodev_info)); + rte_cryptodev_info_get(dev_id, &dev_info); + + while ((capability = &dev_info.capabilities[i++])->op != + RTE_CRYPTO_OP_TYPE_UNDEFINED) { + if (capability->op != RTE_CRYPTO_OP_TYPE_ASYMMETRIC) + continue; + + if (capability->asym.xform_capa.xform_type == idx->type) + return &capability->asym.xform_capa; + } + return NULL; +}; int rte_cryptodev_sym_capability_check_cipher( const struct rte_cryptodev_symmetric_capability *capability, uint16_t key_size, uint16_t iv_size) { - if (param_range_check(key_size, capability->cipher.key_size)) + if (param_range_check(key_size, &capability->cipher.key_size) != 0) return -1; - if (param_range_check(iv_size, capability->cipher.iv_size)) + if (param_range_check(iv_size, &capability->cipher.iv_size) != 0) return -1; return 0; @@ -285,13 +371,13 @@ rte_cryptodev_sym_capability_check_auth( const struct rte_cryptodev_symmetric_capability *capability, uint16_t key_size, uint16_t digest_size, uint16_t iv_size) { - if (param_range_check(key_size, capability->auth.key_size)) + if (param_range_check(key_size, &capability->auth.key_size) != 0) return -1; - if (param_range_check(digest_size, capability->auth.digest_size)) + if (param_range_check(digest_size, &capability->auth.digest_size) != 0) return -1; - if (param_range_check(iv_size, capability->auth.iv_size)) + if (param_range_check(iv_size, &capability->auth.iv_size) != 0) return -1; return 0; @@ -303,20 +389,56 @@ rte_cryptodev_sym_capability_check_aead( uint16_t key_size, uint16_t digest_size, uint16_t aad_size, uint16_t iv_size) { - if (param_range_check(key_size, capability->aead.key_size)) + if (param_range_check(key_size, &capability->aead.key_size) != 0) return -1; - if (param_range_check(digest_size, capability->aead.digest_size)) + if (param_range_check(digest_size, &capability->aead.digest_size) != 0) return -1; - if (param_range_check(aad_size, capability->aead.aad_size)) + if (param_range_check(aad_size, &capability->aead.aad_size) != 0) return -1; - if (param_range_check(iv_size, capability->aead.iv_size)) + if (param_range_check(iv_size, &capability->aead.iv_size) != 0) return -1; return 0; } +int __rte_experimental +rte_cryptodev_asym_xform_capability_check_optype( + const struct rte_cryptodev_asymmetric_xform_capability *capability, + enum rte_crypto_asym_op_type op_type) +{ + if (capability->op_types & (1 << op_type)) + return 1; + + return 0; +} + +int __rte_experimental +rte_cryptodev_asym_xform_capability_check_modlen( + const struct rte_cryptodev_asymmetric_xform_capability *capability, + uint16_t modlen) +{ + /* no need to check for limits, if min or max = 0 */ + if (capability->modlen.min != 0) { + if (modlen < capability->modlen.min) + return -1; + } + + if (capability->modlen.max != 0) { + if (modlen > capability->modlen.max) + return -1; + } + + /* in any case, check if given modlen is module increment */ + if (capability->modlen.increment != 0) { + if (modlen % (capability->modlen.increment)) + return -1; + } + + return 0; +} + const char * rte_cryptodev_get_feature_name(uint64_t flag) @@ -340,12 +462,22 @@ rte_cryptodev_get_feature_name(uint64_t flag) return "CPU_AESNI"; case RTE_CRYPTODEV_FF_HW_ACCELERATED: return "HW_ACCELERATED"; - case RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER: - return "MBUF_SCATTER_GATHER"; + case RTE_CRYPTODEV_FF_IN_PLACE_SGL: + return "IN_PLACE_SGL"; + case RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT: + return "OOP_SGL_IN_SGL_OUT"; + case RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT: + return "OOP_SGL_IN_LB_OUT"; + case RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT: + return "OOP_LB_IN_SGL_OUT"; + case RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT: + return "OOP_LB_IN_LB_OUT"; case RTE_CRYPTODEV_FF_CPU_NEON: return "CPU_NEON"; case RTE_CRYPTODEV_FF_CPU_ARM_CE: return "CPU_ARM_CE"; + case RTE_CRYPTODEV_FF_SECURITY: + return "SECURITY_PROTOCOL"; default: return NULL; } @@ -679,50 +811,6 @@ rte_cryptodev_queue_pairs_config(struct rte_cryptodev *dev, uint16_t nb_qpairs, return 0; } -int -rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id) -{ - struct rte_cryptodev *dev; - - if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { - CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); - return -EINVAL; - } - - dev = &rte_crypto_devices[dev_id]; - if (queue_pair_id >= dev->data->nb_queue_pairs) { - CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_start, -ENOTSUP); - - return dev->dev_ops->queue_pair_start(dev, queue_pair_id); - -} - -int -rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id) -{ - struct rte_cryptodev *dev; - - if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { - CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); - return -EINVAL; - } - - dev = &rte_crypto_devices[dev_id]; - if (queue_pair_id >= dev->data->nb_queue_pairs) { - CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_stop, -ENOTSUP); - - return dev->dev_ops->queue_pair_stop(dev, queue_pair_id); - -} - int rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config) { @@ -943,6 +1031,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info) (*dev->dev_ops->dev_infos_get)(dev, dev_info); dev_info->driver_name = dev->device->driver->name; + dev_info->device = dev->device; } @@ -1075,8 +1164,46 @@ rte_cryptodev_sym_session_init(uint8_t dev_id, index = dev->driver_id; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->sym_session_configure, -ENOTSUP); + if (sess->sess_private_data[index] == NULL) { - ret = dev->dev_ops->session_configure(dev, xforms, sess, mp); + ret = dev->dev_ops->sym_session_configure(dev, xforms, + sess, mp); + if (ret < 0) { + CDEV_LOG_ERR( + "dev_id %d failed to configure session details", + dev_id); + return ret; + } + } + + return 0; +} + +int __rte_experimental +rte_cryptodev_asym_session_init(uint8_t dev_id, + struct rte_cryptodev_asym_session *sess, + struct rte_crypto_asym_xform *xforms, + struct rte_mempool *mp) +{ + struct rte_cryptodev *dev; + uint8_t index; + int ret; + + dev = rte_cryptodev_pmd_get_dev(dev_id); + + if (sess == NULL || xforms == NULL || dev == NULL) + return -EINVAL; + + index = dev->driver_id; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->asym_session_configure, + -ENOTSUP); + + if (sess->sess_private_data[index] == NULL) { + ret = dev->dev_ops->asym_session_configure(dev, + xforms, + sess, mp); if (ret < 0) { CDEV_LOG_ERR( "dev_id %d failed to configure session details", @@ -1099,69 +1226,54 @@ rte_cryptodev_sym_session_create(struct rte_mempool *mp) return NULL; } - /* Clear device session pointer */ - memset(sess, 0, (sizeof(void *) * nb_drivers)); + /* Clear device session pointer. + * Include the flag indicating presence of user data + */ + memset(sess, 0, (sizeof(void *) * nb_drivers) + sizeof(uint8_t)); return sess; } -int -rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id, - struct rte_cryptodev_sym_session *sess) +struct rte_cryptodev_asym_session * __rte_experimental +rte_cryptodev_asym_session_create(struct rte_mempool *mp) { - struct rte_cryptodev *dev; + struct rte_cryptodev_asym_session *sess; - if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { - CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); - return -EINVAL; + /* Allocate a session structure from the session pool */ + if (rte_mempool_get(mp, (void **)&sess)) { + CDEV_LOG_ERR("couldn't get object from session mempool"); + return NULL; } - dev = &rte_crypto_devices[dev_id]; - - /* The API is optional, not returning error if driver do not suuport */ - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_attach_session, 0); - - void *sess_priv = get_session_private_data(sess, dev->driver_id); - - if (dev->dev_ops->qp_attach_session(dev, qp_id, sess_priv)) { - CDEV_LOG_ERR("dev_id %d failed to attach qp: %d with session", - dev_id, qp_id); - return -EPERM; - } + /* Clear device session pointer. + * Include the flag indicating presence of private data + */ + memset(sess, 0, (sizeof(void *) * nb_drivers) + sizeof(uint8_t)); - return 0; + return sess; } int -rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id, +rte_cryptodev_sym_session_clear(uint8_t dev_id, struct rte_cryptodev_sym_session *sess) { struct rte_cryptodev *dev; - if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { - CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); - return -EINVAL; - } - - dev = &rte_crypto_devices[dev_id]; + dev = rte_cryptodev_pmd_get_dev(dev_id); - /* The API is optional, not returning error if driver do not suuport */ - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_detach_session, 0); + if (dev == NULL || sess == NULL) + return -EINVAL; - void *sess_priv = get_session_private_data(sess, dev->driver_id); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->sym_session_clear, -ENOTSUP); - if (dev->dev_ops->qp_detach_session(dev, qp_id, sess_priv)) { - CDEV_LOG_ERR("dev_id %d failed to detach qp: %d from session", - dev_id, qp_id); - return -EPERM; - } + dev->dev_ops->sym_session_clear(dev, sess); return 0; } -int -rte_cryptodev_sym_session_clear(uint8_t dev_id, - struct rte_cryptodev_sym_session *sess) +int __rte_experimental +rte_cryptodev_asym_session_clear(uint8_t dev_id, + struct rte_cryptodev_asym_session *sess) { struct rte_cryptodev *dev; @@ -1170,7 +1282,9 @@ rte_cryptodev_sym_session_clear(uint8_t dev_id, if (dev == NULL || sess == NULL) return -EINVAL; - dev->dev_ops->session_clear(dev, sess); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->asym_session_clear, -ENOTSUP); + + dev->dev_ops->asym_session_clear(dev, sess); return 0; } @@ -1187,7 +1301,7 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess) /* Check that all device private data has been freed */ for (i = 0; i < nb_drivers; i++) { - sess_priv = get_session_private_data(sess, i); + sess_priv = get_sym_session_private_data(sess, i); if (sess_priv != NULL) return -EBUSY; } @@ -1199,18 +1313,55 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess) return 0; } +int __rte_experimental +rte_cryptodev_asym_session_free(struct rte_cryptodev_asym_session *sess) +{ + uint8_t i; + void *sess_priv; + struct rte_mempool *sess_mp; + + if (sess == NULL) + return -EINVAL; + + /* Check that all device private data has been freed */ + for (i = 0; i < nb_drivers; i++) { + sess_priv = get_asym_session_private_data(sess, i); + if (sess_priv != NULL) + return -EBUSY; + } + + /* Return session to mempool */ + sess_mp = rte_mempool_from_obj(sess); + rte_mempool_put(sess_mp, sess); + + return 0; +} + + unsigned int -rte_cryptodev_get_header_session_size(void) +rte_cryptodev_sym_get_header_session_size(void) { /* * Header contains pointers to the private data - * of all registered drivers + * of all registered drivers, and a flag which + * indicates presence of user data */ - return (sizeof(void *) * nb_drivers); + return ((sizeof(void *) * nb_drivers) + sizeof(uint8_t)); +} + +unsigned int __rte_experimental +rte_cryptodev_asym_get_header_session_size(void) +{ + /* + * Header contains pointers to the private data + * of all registered drivers, and a flag which + * indicates presence of private data + */ + return ((sizeof(void *) * nb_drivers) + sizeof(uint8_t)); } unsigned int -rte_cryptodev_get_private_session_size(uint8_t dev_id) +rte_cryptodev_sym_get_private_session_size(uint8_t dev_id) { struct rte_cryptodev *dev; unsigned int header_size = sizeof(void *) * nb_drivers; @@ -1221,10 +1372,10 @@ rte_cryptodev_get_private_session_size(uint8_t dev_id) dev = rte_cryptodev_pmd_get_dev(dev_id); - if (*dev->dev_ops->session_get_size == NULL) + if (*dev->dev_ops->sym_session_get_size == NULL) return 0; - priv_sess_size = (*dev->dev_ops->session_get_size)(dev); + priv_sess_size = (*dev->dev_ops->sym_session_get_size)(dev); /* * If size is less than session header size, @@ -1238,6 +1389,61 @@ rte_cryptodev_get_private_session_size(uint8_t dev_id) } +unsigned int __rte_experimental +rte_cryptodev_asym_get_private_session_size(uint8_t dev_id) +{ + struct rte_cryptodev *dev; + unsigned int header_size = sizeof(void *) * nb_drivers; + unsigned int priv_sess_size; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) + return 0; + + dev = rte_cryptodev_pmd_get_dev(dev_id); + + if (*dev->dev_ops->asym_session_get_size == NULL) + return 0; + + priv_sess_size = (*dev->dev_ops->asym_session_get_size)(dev); + if (priv_sess_size < header_size) + return header_size; + + return priv_sess_size; + +} + +int __rte_experimental +rte_cryptodev_sym_session_set_user_data( + struct rte_cryptodev_sym_session *sess, + void *data, + uint16_t size) +{ + uint16_t off_set = sizeof(void *) * nb_drivers; + uint8_t *user_data_present = (uint8_t *)sess + off_set; + + if (sess == NULL) + return -EINVAL; + + *user_data_present = 1; + off_set += sizeof(uint8_t); + rte_memcpy((uint8_t *)sess + off_set, data, size); + return 0; +} + +void * __rte_experimental +rte_cryptodev_sym_session_get_user_data( + struct rte_cryptodev_sym_session *sess) +{ + uint16_t off_set = sizeof(void *) * nb_drivers; + uint8_t *user_data_present = (uint8_t *)sess + off_set; + + if (sess == NULL || !*user_data_present) + return NULL; + + off_set += sizeof(uint8_t); + return (uint8_t *)sess + off_set; +} + /** Initialise rte_crypto_op mempool element */ static void rte_crypto_op_init(struct rte_mempool *mempool, @@ -1265,9 +1471,17 @@ rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type, struct rte_crypto_op_pool_private *priv; unsigned elt_size = sizeof(struct rte_crypto_op) + - sizeof(struct rte_crypto_sym_op) + priv_size; + if (type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { + elt_size += sizeof(struct rte_crypto_sym_op); + } else if (type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) { + elt_size += sizeof(struct rte_crypto_asym_op); + } else { + CDEV_LOG_ERR("Invalid op_type\n"); + return NULL; + } + /* lookup mempool in case already allocated */ struct rte_mempool *mp = rte_mempool_lookup(name); diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h index c8fa6893..4099823f 100644 --- a/lib/librte_cryptodev/rte_cryptodev.h +++ b/lib/librte_cryptodev/rte_cryptodev.h @@ -1,32 +1,5 @@ -/*- - * - * Copyright(c) 2015-2017 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015-2017 Intel Corporation. */ #ifndef _RTE_CRYPTODEV_H_ @@ -65,7 +38,6 @@ extern const char **rte_cyptodev_names; RTE_FMT(RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ RTE_FMT_TAIL(__VA_ARGS__,))) -#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG #define CDEV_LOG_DEBUG(...) \ RTE_LOG(DEBUG, CRYPTODEV, \ RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ @@ -76,13 +48,6 @@ extern const char **rte_cyptodev_names; RTE_FMT("[%s] %s: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ dev, __func__, RTE_FMT_TAIL(__VA_ARGS__,))) -#else -#define CDEV_LOG_DEBUG(...) (void)0 -#define CDEV_PMD_TRACE(...) (void)0 -#endif - - - /** * A macro that points to an offset from the start * of the crypto operation structure (rte_crypto_op) @@ -178,6 +143,35 @@ struct rte_cryptodev_symmetric_capability { }; }; +/** + * Asymmetric Xform Crypto Capability + * + */ +struct rte_cryptodev_asymmetric_xform_capability { + enum rte_crypto_asym_xform_type xform_type; + /**< Transform type: RSA/MODEXP/DH/DSA/MODINV */ + + uint32_t op_types; + /**< bitmask for supported rte_crypto_asym_op_type */ + + __extension__ + union { + struct rte_crypto_param_range modlen; + /**< Range of modulus length supported by modulus based xform. + * Value 0 mean implementation default + */ + }; +}; + +/** + * Asymmetric Crypto Capability + * + */ +struct rte_cryptodev_asymmetric_capability { + struct rte_cryptodev_asymmetric_xform_capability xform_capa; +}; + + /** Structure used to capture a capability of a crypto device */ struct rte_cryptodev_capabilities { enum rte_crypto_op_type op; @@ -187,6 +181,8 @@ struct rte_cryptodev_capabilities { union { struct rte_cryptodev_symmetric_capability sym; /**< Symmetric operation capability parameters */ + struct rte_cryptodev_asymmetric_capability asym; + /**< Asymmetric operation capability parameters */ }; }; @@ -201,7 +197,17 @@ struct rte_cryptodev_sym_capability_idx { }; /** - * Provide capabilities available for defined device and algorithm + * Structure used to describe asymmetric crypto xforms + * Each xform maps to one asym algorithm. + * + */ +struct rte_cryptodev_asym_capability_idx { + enum rte_crypto_asym_xform_type type; + /**< Asymmetric xform (algo) type */ +}; + +/** + * Provide capabilities available for defined device and algorithm * * @param dev_id The identifier of the device. * @param idx Description of crypto algorithms. @@ -214,6 +220,20 @@ const struct rte_cryptodev_symmetric_capability * rte_cryptodev_sym_capability_get(uint8_t dev_id, const struct rte_cryptodev_sym_capability_idx *idx); +/** + * Provide capabilities available for defined device and xform + * + * @param dev_id The identifier of the device. + * @param idx Description of asym crypto xform. + * + * @return + * - Return description of the asymmetric crypto capability if exist. + * - Return NULL if the capability not exist. + */ +const struct rte_cryptodev_asymmetric_xform_capability * __rte_experimental +rte_cryptodev_asym_capability_get(uint8_t dev_id, + const struct rte_cryptodev_asym_capability_idx *idx); + /** * Check if key size and initial vector are supported * in crypto cipher capability @@ -269,6 +289,36 @@ rte_cryptodev_sym_capability_check_aead( uint16_t key_size, uint16_t digest_size, uint16_t aad_size, uint16_t iv_size); +/** + * Check if op type is supported + * + * @param capability Description of the asymmetric crypto capability. + * @param op_type op type + * + * @return + * - Return 1 if the op type is supported + * - Return 0 if unsupported + */ +int __rte_experimental +rte_cryptodev_asym_xform_capability_check_optype( + const struct rte_cryptodev_asymmetric_xform_capability *capability, + enum rte_crypto_asym_op_type op_type); + +/** + * Check if modulus length is in supported range + * + * @param capability Description of the asymmetric crypto capability. + * @param modlen modulus length. + * + * @return + * - Return 0 if the parameters are in range of the capability. + * - Return -1 if the parameters are out of range of the capability. + */ +int __rte_experimental +rte_cryptodev_asym_xform_capability_check_modlen( + const struct rte_cryptodev_asymmetric_xform_capability *capability, + uint16_t modlen); + /** * Provide the cipher algorithm enum, given an algorithm string * @@ -314,6 +364,22 @@ int rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum, const char *algo_string); +/** + * Provide the Asymmetric xform enum, given an xform string + * + * @param xform_enum A pointer to the xform type + * enum to be filled + * @param xform_string xform string + * + * @return + * - Return -1 if string is not valid + * - Return 0 if the string is valid + */ +int __rte_experimental +rte_cryptodev_asym_get_xform_enum(enum rte_crypto_asym_xform_type *xform_enum, + const char *xform_string); + + /** Macro used at end of crypto PMD list */ #define RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST() \ { RTE_CRYPTO_OP_TYPE_UNDEFINED } @@ -327,31 +393,50 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum, * * Keep these flags synchronised with rte_cryptodev_get_feature_name() */ -#define RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO (1ULL << 0) +#define RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO (1ULL << 0) /**< Symmetric crypto operations are supported */ -#define RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO (1ULL << 1) +#define RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO (1ULL << 1) /**< Asymmetric crypto operations are supported */ -#define RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING (1ULL << 2) +#define RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING (1ULL << 2) /**< Chaining symmetric crypto operations are supported */ -#define RTE_CRYPTODEV_FF_CPU_SSE (1ULL << 3) +#define RTE_CRYPTODEV_FF_CPU_SSE (1ULL << 3) /**< Utilises CPU SIMD SSE instructions */ -#define RTE_CRYPTODEV_FF_CPU_AVX (1ULL << 4) +#define RTE_CRYPTODEV_FF_CPU_AVX (1ULL << 4) /**< Utilises CPU SIMD AVX instructions */ -#define RTE_CRYPTODEV_FF_CPU_AVX2 (1ULL << 5) +#define RTE_CRYPTODEV_FF_CPU_AVX2 (1ULL << 5) /**< Utilises CPU SIMD AVX2 instructions */ -#define RTE_CRYPTODEV_FF_CPU_AESNI (1ULL << 6) +#define RTE_CRYPTODEV_FF_CPU_AESNI (1ULL << 6) /**< Utilises CPU AES-NI instructions */ -#define RTE_CRYPTODEV_FF_HW_ACCELERATED (1ULL << 7) -/**< Operations are off-loaded to an external hardware accelerator */ -#define RTE_CRYPTODEV_FF_CPU_AVX512 (1ULL << 8) +#define RTE_CRYPTODEV_FF_HW_ACCELERATED (1ULL << 7) +/**< Operations are off-loaded to an + * external hardware accelerator + */ +#define RTE_CRYPTODEV_FF_CPU_AVX512 (1ULL << 8) /**< Utilises CPU SIMD AVX512 instructions */ -#define RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER (1ULL << 9) -/**< Scatter-gather mbufs are supported */ -#define RTE_CRYPTODEV_FF_CPU_NEON (1ULL << 10) +#define RTE_CRYPTODEV_FF_IN_PLACE_SGL (1ULL << 9) +/**< In-place Scatter-gather (SGL) buffers, with multiple segments, + * are supported + */ +#define RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT (1ULL << 10) +/**< Out-of-place Scatter-gather (SGL) buffers are + * supported in input and output + */ +#define RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT (1ULL << 11) +/**< Out-of-place Scatter-gather (SGL) buffers are supported + * in input, combined with linear buffers (LB), with a + * single segment in output + */ +#define RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT (1ULL << 12) +/**< Out-of-place Scatter-gather (SGL) buffers are supported + * in output, combined with linear buffers (LB) in input + */ +#define RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT (1ULL << 13) +/**< Out-of-place linear buffers (LB) are supported in input and output */ +#define RTE_CRYPTODEV_FF_CPU_NEON (1ULL << 14) /**< Utilises CPU NEON instructions */ -#define RTE_CRYPTODEV_FF_CPU_ARM_CE (1ULL << 11) +#define RTE_CRYPTODEV_FF_CPU_ARM_CE (1ULL << 15) /**< Utilises ARM CPU Cryptographic Extensions */ -#define RTE_CRYPTODEV_FF_SECURITY (1ULL << 12) +#define RTE_CRYPTODEV_FF_SECURITY (1ULL << 16) /**< Support Security Protocol Processing */ @@ -369,11 +454,12 @@ rte_cryptodev_get_feature_name(uint64_t flag); /** Crypto device information */ struct rte_cryptodev_info { - const char *driver_name; /**< Driver name. */ - uint8_t driver_id; /**< Driver identifier */ - struct rte_pci_device *pci_dev; /**< PCI information. */ + const char *driver_name; /**< Driver name. */ + uint8_t driver_id; /**< Driver identifier */ + struct rte_device *device; /**< Generic device information. */ - uint64_t feature_flags; /**< Feature flags */ + uint64_t feature_flags; + /**< Feature flags exposes HW/SW features for the given device */ const struct rte_cryptodev_capabilities *capabilities; /**< Array of devices supported capabilities */ @@ -381,12 +467,17 @@ struct rte_cryptodev_info { unsigned max_nb_queue_pairs; /**< Maximum number of queues pairs supported by device. */ + uint16_t min_mbuf_headroom_req; + /**< Minimum mbuf headroom required by device */ + + uint16_t min_mbuf_tailroom_req; + /**< Minimum mbuf tailroom required by device */ + struct { unsigned max_nb_sessions; - /**< Maximum number of sessions supported by device. */ - unsigned int max_nb_sessions_per_qp; - /**< Maximum number of sessions per queue pair. - * Default 0 for infinite sessions + /**< Maximum number of sessions supported by device. + * If 0, the device does not have any limitation in + * number of sessions that can be used. */ } sym; }; @@ -601,39 +692,6 @@ rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id, const struct rte_cryptodev_qp_conf *qp_conf, int socket_id, struct rte_mempool *session_pool); -/** - * Start a specified queue pair of a device. It is used - * when deferred_start flag of the specified queue is true. - * - * @param dev_id The identifier of the device - * @param queue_pair_id The index of the queue pair to start. The value - * must be in the range [0, nb_queue_pair - 1] - * previously supplied to - * rte_crypto_dev_configure(). - * @return - * - 0: Success, the transmit queue is correctly set up. - * - -EINVAL: The dev_id or the queue_id out of range. - * - -ENOTSUP: The function not supported in PMD driver. - */ -extern int -rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id); - -/** - * Stop specified queue pair of a device - * - * @param dev_id The identifier of the device - * @param queue_pair_id The index of the queue pair to stop. The value - * must be in the range [0, nb_queue_pair - 1] - * previously supplied to - * rte_cryptodev_configure(). - * @return - * - 0: Success, the transmit queue is correctly set up. - * - -EINVAL: The dev_id or the queue_id out of range. - * - -ENOTSUP: The function not supported in PMD driver. - */ -extern int -rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id); - /** * Get the number of queue pairs on a specific crypto device * @@ -749,7 +807,7 @@ struct rte_cryptodev { struct rte_cryptodev_ops *dev_ops; /**< Functions exported by PMD */ uint64_t feature_flags; - /**< Supported features */ + /**< Feature flags exposes HW/SW features for the given device */ struct rte_device *device; /**< Backing device */ @@ -897,9 +955,14 @@ rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id, */ struct rte_cryptodev_sym_session { __extension__ void *sess_private_data[0]; - /**< Private session material */ + /**< Private symmetric session material */ }; +/** Cryptodev asymmetric crypto session */ +struct rte_cryptodev_asym_session { + __extension__ void *sess_private_data[0]; + /**< Private asymmetric session material */ +}; /** * Create symmetric crypto session header (generic with no private data) @@ -913,6 +976,18 @@ struct rte_cryptodev_sym_session { struct rte_cryptodev_sym_session * rte_cryptodev_sym_session_create(struct rte_mempool *mempool); +/** + * Create asymmetric crypto session header (generic with no private data) + * + * @param mempool mempool to allocate asymmetric session + * objects from + * @return + * - On success return pointer to asym-session + * - On failure returns NULL + */ +struct rte_cryptodev_asym_session * __rte_experimental +rte_cryptodev_asym_session_create(struct rte_mempool *mempool); + /** * Frees symmetric crypto session header, after checking that all * the device private data has been freed, returning it @@ -928,6 +1003,21 @@ rte_cryptodev_sym_session_create(struct rte_mempool *mempool); int rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess); +/** + * Frees asymmetric crypto session header, after checking that all + * the device private data has been freed, returning it + * to its original mempool. + * + * @param sess Session header to be freed. + * + * @return + * - 0 if successful. + * - -EINVAL if session is NULL. + * - -EBUSY if not all device private data has been freed. + */ +int __rte_experimental +rte_cryptodev_asym_session_free(struct rte_cryptodev_asym_session *sess); + /** * Fill out private data for the device id, based on its device type. * @@ -940,7 +1030,8 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess); * @return * - On success, zero. * - -EINVAL if input parameters are invalid. - * - -ENOTSUP if crypto device does not support the crypto transform. + * - -ENOTSUP if crypto device does not support the crypto transform or + * does not support symmetric operations. * - -ENOMEM if the private session could not be allocated. */ int @@ -949,9 +1040,32 @@ rte_cryptodev_sym_session_init(uint8_t dev_id, struct rte_crypto_sym_xform *xforms, struct rte_mempool *mempool); +/** + * Initialize asymmetric session on a device with specific asymmetric xform + * + * @param dev_id ID of device that we want the session to be used on + * @param sess Session to be set up on a device + * @param xforms Asymmetric crypto transform operations to apply on flow + * processed with this session + * @param mempool Mempool to be used for internal allocation. + * + * @return + * - On success, zero. + * - -EINVAL if input parameters are invalid. + * - -ENOTSUP if crypto device does not support the crypto transform. + * - -ENOMEM if the private session could not be allocated. + */ +int __rte_experimental +rte_cryptodev_asym_session_init(uint8_t dev_id, + struct rte_cryptodev_asym_session *sess, + struct rte_crypto_asym_xform *xforms, + struct rte_mempool *mempool); + /** * Frees private data for the device id, based on its device type, - * returning it to its mempool. + * returning it to its mempool. It is the application's responsibility + * to ensure that private session data is not cleared while there are + * still in-flight operations using it. * * @param dev_id ID of device that uses the session. * @param sess Session containing the reference to the private data @@ -959,63 +1073,70 @@ rte_cryptodev_sym_session_init(uint8_t dev_id, * @return * - 0 if successful. * - -EINVAL if device is invalid or session is NULL. + * - -ENOTSUP if crypto device does not support symmetric operations. */ int rte_cryptodev_sym_session_clear(uint8_t dev_id, struct rte_cryptodev_sym_session *sess); +/** + * Frees resources held by asymmetric session during rte_cryptodev_session_init + * + * @param dev_id ID of device that uses the asymmetric session. + * @param sess Asymmetric session setup on device using + * rte_cryptodev_session_init + * @return + * - 0 if successful. + * - -EINVAL if device is invalid or session is NULL. + */ +int __rte_experimental +rte_cryptodev_asym_session_clear(uint8_t dev_id, + struct rte_cryptodev_asym_session *sess); + /** * Get the size of the header session, for all registered drivers. * * @return - * Size of the header session. + * Size of the symmetric eader session. */ unsigned int -rte_cryptodev_get_header_session_size(void); +rte_cryptodev_sym_get_header_session_size(void); /** - * Get the size of the private session data for a device. - * - * @param dev_id The device identifier. + * Get the size of the asymmetric session header, for all registered drivers. * * @return - * - Size of the private data, if successful - * - 0 if device is invalid or does not have private session + * Size of the asymmetric header session. */ -unsigned int -rte_cryptodev_get_private_session_size(uint8_t dev_id); +unsigned int __rte_experimental +rte_cryptodev_asym_get_header_session_size(void); /** - * Attach queue pair with sym session. + * Get the size of the private symmetric session data + * for a device. * - * @param dev_id Device to which the session will be attached. - * @param qp_id Queue pair to which the session will be attached. - * @param session Session pointer previously allocated by - * *rte_cryptodev_sym_session_create*. + * @param dev_id The device identifier. * * @return - * - On success, zero. - * - On failure, a negative value. + * - Size of the private data, if successful + * - 0 if device is invalid or does not have private + * symmetric session */ -int -rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id, - struct rte_cryptodev_sym_session *session); +unsigned int +rte_cryptodev_sym_get_private_session_size(uint8_t dev_id); /** - * Detach queue pair with sym session. + * Get the size of the private data for asymmetric session + * on device * - * @param dev_id Device to which the session is attached. - * @param qp_id Queue pair to which the session is attached. - * @param session Session pointer previously allocated by - * *rte_cryptodev_sym_session_create*. + * @param dev_id The device identifier. * * @return - * - On success, zero. - * - On failure, a negative value. + * - Size of the asymmetric private data, if successful + * - 0 if device is invalid or does not have private session */ -int -rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id, - struct rte_cryptodev_sym_session *session); +unsigned int __rte_experimental +rte_cryptodev_asym_get_private_session_size(uint8_t dev_id); /** * Provide driver identifier. @@ -1037,6 +1158,38 @@ int rte_cryptodev_driver_id_get(const char *name); */ const char *rte_cryptodev_driver_name_get(uint8_t driver_id); +/** + * Store user data in a session. + * + * @param sess Session pointer allocated by + * *rte_cryptodev_sym_session_create*. + * @param data Pointer to the user data. + * @param size Size of the user data. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int __rte_experimental +rte_cryptodev_sym_session_set_user_data( + struct rte_cryptodev_sym_session *sess, + void *data, + uint16_t size); + +/** + * Get user data stored in a session. + * + * @param sess Session pointer allocated by + * *rte_cryptodev_sym_session_create*. + * + * @return + * - On success return pointer to user data. + * - On failure returns NULL. + */ +void * __rte_experimental +rte_cryptodev_sym_session_get_user_data( + struct rte_cryptodev_sym_session *sess); + #ifdef __cplusplus } #endif diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.c b/lib/librte_cryptodev/rte_cryptodev_pmd.c index f2aac24b..2088ac3f 100644 --- a/lib/librte_cryptodev/rte_cryptodev_pmd.c +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.c @@ -65,13 +65,6 @@ rte_cryptodev_pmd_parse_input_args( if (ret < 0) goto free_kvlist; - ret = rte_kvargs_process(kvlist, - RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG, - &rte_cryptodev_pmd_parse_uint_arg, - ¶ms->max_nb_sessions); - if (ret < 0) - goto free_kvlist; - ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_PMD_SOCKET_ID_ARG, &rte_cryptodev_pmd_parse_uint_arg, @@ -109,10 +102,9 @@ rte_cryptodev_pmd_create(const char *name, device->driver->name, name); CDEV_LOG_INFO("[%s] - Initialisation parameters - name: %s," - "socket id: %d, max queue pairs: %u, max sessions: %u", + "socket id: %d, max queue pairs: %u", device->driver->name, name, - params->socket_id, params->max_nb_queue_pairs, - params->max_nb_sessions); + params->socket_id, params->max_nb_queue_pairs); /* allocate device structure */ cryptodev = rte_cryptodev_pmd_allocate(name, params->socket_id); diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h index 089848e0..6ff49d64 100644 --- a/lib/librte_cryptodev/rte_cryptodev_pmd.h +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h @@ -1,32 +1,5 @@ -/*- - * - * Copyright(c) 2015-2016 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015-2016 Intel Corporation. */ #ifndef _RTE_CRYPTODEV_PMD_H_ @@ -59,18 +32,15 @@ extern "C" { #define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS 8 -#define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_SESSIONS 2048 #define RTE_CRYPTODEV_PMD_NAME_ARG ("name") #define RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG ("max_nb_queue_pairs") -#define RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG ("max_nb_sessions") #define RTE_CRYPTODEV_PMD_SOCKET_ID_ARG ("socket_id") static const char * const cryptodev_pmd_valid_params[] = { RTE_CRYPTODEV_PMD_NAME_ARG, RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG, - RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG, RTE_CRYPTODEV_PMD_SOCKET_ID_ARG }; @@ -83,7 +53,6 @@ struct rte_cryptodev_pmd_init_params { size_t private_data_size; int socket_id; unsigned int max_nb_queue_pairs; - unsigned int max_nb_sessions; }; /** Global structure used for maintaining state of allocated crypto devices */ @@ -215,28 +184,6 @@ typedef void (*cryptodev_stats_reset_t)(struct rte_cryptodev *dev); typedef void (*cryptodev_info_get_t)(struct rte_cryptodev *dev, struct rte_cryptodev_info *dev_info); -/** - * Start queue pair of a device. - * - * @param dev Crypto device pointer - * @param qp_id Queue Pair Index - * - * @return Returns 0 on success. - */ -typedef int (*cryptodev_queue_pair_start_t)(struct rte_cryptodev *dev, - uint16_t qp_id); - -/** - * Stop queue pair of a device. - * - * @param dev Crypto device pointer - * @param qp_id Queue Pair Index - * - * @return Returns 0 on success. - */ -typedef int (*cryptodev_queue_pair_stop_t)(struct rte_cryptodev *dev, - uint16_t qp_id); - /** * Setup a queue pair for a device. * @@ -302,6 +249,17 @@ typedef int (*cryptodev_sym_create_session_pool_t)( */ typedef unsigned (*cryptodev_sym_get_session_private_size_t)( struct rte_cryptodev *dev); +/** + * Get the size of a asymmetric cryptodev session + * + * @param dev Crypto device pointer + * + * @return + * - On success returns the size of the session structure for device + * - On failure returns 0 + */ +typedef unsigned int (*cryptodev_asym_get_session_private_size_t)( + struct rte_cryptodev *dev); /** * Configure a Crypto session on a device. @@ -321,7 +279,24 @@ typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev, struct rte_crypto_sym_xform *xform, struct rte_cryptodev_sym_session *session, struct rte_mempool *mp); - +/** + * Configure a Crypto asymmetric session on a device. + * + * @param dev Crypto device pointer + * @param xform Single or chain of crypto xforms + * @param priv_sess Pointer to cryptodev's private session structure + * @param mp Mempool where the private session is allocated + * + * @return + * - Returns 0 if private session structure have been created successfully. + * - Returns -EINVAL if input parameters are invalid. + * - Returns -ENOTSUP if crypto device does not support the crypto transform. + * - Returns -ENOMEM if the private session could not be allocated. + */ +typedef int (*cryptodev_asym_configure_session_t)(struct rte_cryptodev *dev, + struct rte_crypto_asym_xform *xform, + struct rte_cryptodev_asym_session *session, + struct rte_mempool *mp); /** * Free driver private session data. * @@ -330,32 +305,14 @@ typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev, */ typedef void (*cryptodev_sym_free_session_t)(struct rte_cryptodev *dev, struct rte_cryptodev_sym_session *sess); - -/** - * Optional API for drivers to attach sessions with queue pair. - * @param dev Crypto device pointer - * @param qp_id queue pair id for attaching session - * @param priv_sess Pointer to cryptodev's private session structure - * @return - * - Return 0 on success - */ -typedef int (*cryptodev_sym_queue_pair_attach_session_t)( - struct rte_cryptodev *dev, - uint16_t qp_id, - void *session_private); - /** - * Optional API for drivers to detach sessions from queue pair. + * Free asymmetric session private data. + * * @param dev Crypto device pointer - * @param qp_id queue pair id for detaching session - * @param priv_sess Pointer to cryptodev's private session structure - * @return - * - Return 0 on success + * @param sess Cryptodev session structure */ -typedef int (*cryptodev_sym_queue_pair_detach_session_t)( - struct rte_cryptodev *dev, - uint16_t qp_id, - void *session_private); +typedef void (*cryptodev_asym_free_session_t)(struct rte_cryptodev *dev, + struct rte_cryptodev_asym_session *sess); /** Crypto device operations function pointer table */ struct rte_cryptodev_ops { @@ -375,23 +332,21 @@ struct rte_cryptodev_ops { /**< Set up a device queue pair. */ cryptodev_queue_pair_release_t queue_pair_release; /**< Release a queue pair. */ - cryptodev_queue_pair_start_t queue_pair_start; - /**< Start a queue pair. */ - cryptodev_queue_pair_stop_t queue_pair_stop; - /**< Stop a queue pair. */ cryptodev_queue_pair_count_t queue_pair_count; /**< Get count of the queue pairs. */ - cryptodev_sym_get_session_private_size_t session_get_size; + cryptodev_sym_get_session_private_size_t sym_session_get_size; /**< Return private session. */ - cryptodev_sym_configure_session_t session_configure; + cryptodev_asym_get_session_private_size_t asym_session_get_size; + /**< Return asym session private size. */ + cryptodev_sym_configure_session_t sym_session_configure; /**< Configure a Crypto session. */ - cryptodev_sym_free_session_t session_clear; + cryptodev_asym_configure_session_t asym_session_configure; + /**< Configure asymmetric Crypto session. */ + cryptodev_sym_free_session_t sym_session_clear; + /**< Clear a Crypto sessions private data. */ + cryptodev_asym_free_session_t asym_session_clear; /**< Clear a Crypto sessions private data. */ - cryptodev_sym_queue_pair_attach_session_t qp_attach_session; - /**< Attach session to queue pair. */ - cryptodev_sym_queue_pair_detach_session_t qp_detach_session; - /**< Detach session from queue pair. */ }; @@ -516,20 +471,32 @@ uint8_t rte_cryptodev_allocate_driver(struct cryptodev_driver *crypto_drv, #define RTE_PMD_REGISTER_CRYPTO_DRIVER(crypto_drv, drv, driver_id)\ -RTE_INIT(init_ ##driver_id);\ -static void init_ ##driver_id(void)\ +RTE_INIT(init_ ##driver_id)\ {\ - driver_id = rte_cryptodev_allocate_driver(&crypto_drv, &(drv).driver);\ + driver_id = rte_cryptodev_allocate_driver(&crypto_drv, &(drv));\ +} + +static inline void * +get_sym_session_private_data(const struct rte_cryptodev_sym_session *sess, + uint8_t driver_id) { + return sess->sess_private_data[driver_id]; +} + +static inline void +set_sym_session_private_data(struct rte_cryptodev_sym_session *sess, + uint8_t driver_id, void *private_data) +{ + sess->sess_private_data[driver_id] = private_data; } static inline void * -get_session_private_data(const struct rte_cryptodev_sym_session *sess, +get_asym_session_private_data(const struct rte_cryptodev_asym_session *sess, uint8_t driver_id) { return sess->sess_private_data[driver_id]; } static inline void -set_session_private_data(struct rte_cryptodev_sym_session *sess, +set_asym_session_private_data(struct rte_cryptodev_asym_session *sess, uint8_t driver_id, void *private_data) { sess->sess_private_data[driver_id] = private_data; diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map index eb47308b..7ca00735 100644 --- a/lib/librte_cryptodev/rte_cryptodev_version.map +++ b/lib/librte_cryptodev/rte_cryptodev_version.map @@ -22,8 +22,6 @@ DPDK_16.04 { rte_cryptodev_stop; rte_cryptodev_queue_pair_count; rte_cryptodev_queue_pair_setup; - rte_cryptodev_queue_pair_start; - rte_cryptodev_queue_pair_stop; rte_crypto_op_pool_create; local: *; @@ -52,8 +50,6 @@ DPDK_17.05 { rte_cryptodev_get_auth_algo_enum; rte_cryptodev_get_cipher_algo_enum; - rte_cryptodev_queue_pair_attach_sym_session; - rte_cryptodev_queue_pair_detach_sym_session; } DPDK_17.02; @@ -65,8 +61,6 @@ DPDK_17.08 { rte_cryptodev_driver_id_get; rte_cryptodev_driver_name_get; rte_cryptodev_get_aead_algo_enum; - rte_cryptodev_get_header_session_size; - rte_cryptodev_get_private_session_size; rte_cryptodev_sym_capability_check_aead; rte_cryptodev_sym_session_init; rte_cryptodev_sym_session_clear; @@ -85,3 +79,30 @@ DPDK_17.11 { rte_cryptodev_pmd_parse_input_args; } DPDK_17.08; + +DPDK_18.05 { + global: + + rte_cryptodev_sym_get_header_session_size; + rte_cryptodev_sym_get_private_session_size; + +} DPDK_17.11; + +EXPERIMENTAL { + global: + + rte_cryptodev_asym_capability_get; + rte_cryptodev_asym_get_header_session_size; + rte_cryptodev_asym_get_private_session_size; + rte_cryptodev_asym_get_xform_enum; + rte_cryptodev_asym_session_clear; + rte_cryptodev_asym_session_create; + rte_cryptodev_asym_session_free; + rte_cryptodev_asym_session_init; + rte_cryptodev_asym_xform_capability_check_modlen; + rte_cryptodev_asym_xform_capability_check_optype; + rte_cryptodev_sym_session_get_user_data; + rte_cryptodev_sym_session_set_user_data; + rte_crypto_asym_op_strings; + rte_crypto_asym_xform_strings; +}; diff --git a/lib/librte_eal/bsdapp/Makefile b/lib/librte_eal/bsdapp/Makefile index 9d8e2477..5b06b216 100644 --- a/lib/librte_eal/bsdapp/Makefile +++ b/lib/librte_eal/bsdapp/Makefile @@ -4,7 +4,5 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal -DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += contigmem -DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += nic_uio include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/bsdapp/contigmem/BSDmakefile b/lib/librte_eal/bsdapp/contigmem/BSDmakefile deleted file mode 100644 index 33ce83ee..00000000 --- a/lib/librte_eal/bsdapp/contigmem/BSDmakefile +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2010-2014 Intel Corporation -# - -KMOD= contigmem -SRCS= contigmem.c device_if.h bus_if.h - -.include diff --git a/lib/librte_eal/bsdapp/contigmem/Makefile b/lib/librte_eal/bsdapp/contigmem/Makefile deleted file mode 100644 index 428a7ede..00000000 --- a/lib/librte_eal/bsdapp/contigmem/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2010-2014 Intel Corporation - -include $(RTE_SDK)/mk/rte.vars.mk - -# -# module name and path -# -MODULE = contigmem - -# -# CFLAGS -# -MODULE_CFLAGS += -I$(SRCDIR) -MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -MODULE_CFLAGS += -Winline -Wall -Werror -MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h - -# -# all source are stored in SRCS-y -# -SRCS-y := contigmem.c - -include $(RTE_SDK)/mk/rte.bsdmodule.mk diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c deleted file mode 100644 index 1715b5dc..00000000 --- a/lib/librte_eal/bsdapp/contigmem/contigmem.c +++ /dev/null @@ -1,353 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -struct contigmem_buffer { - void *addr; - int refcnt; - struct mtx mtx; -}; - -struct contigmem_vm_handle { - int buffer_index; -}; - -static int contigmem_load(void); -static int contigmem_unload(void); -static int contigmem_physaddr(SYSCTL_HANDLER_ARGS); - -static d_mmap_single_t contigmem_mmap_single; -static d_open_t contigmem_open; -static d_close_t contigmem_close; - -static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS; -static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE; - -static eventhandler_tag contigmem_eh_tag; -static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS]; -static struct cdev *contigmem_cdev = NULL; -static int contigmem_refcnt; - -TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers); -TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size); - -static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem"); - -SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD, - &contigmem_num_buffers, 0, "Number of contigmem buffers allocated"); -SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD, - &contigmem_buffer_size, 0, "Size of each contiguous buffer"); -SYSCTL_INT(_hw_contigmem, OID_AUTO, num_references, CTLFLAG_RD, - &contigmem_refcnt, 0, "Number of references to contigmem"); - -static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0, - "physaddr"); - -MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations"); - -static int contigmem_modevent(module_t mod, int type, void *arg) -{ - int error = 0; - - switch (type) { - case MOD_LOAD: - error = contigmem_load(); - break; - case MOD_UNLOAD: - error = contigmem_unload(); - break; - default: - break; - } - - return error; -} - -moduledata_t contigmem_mod = { - "contigmem", - (modeventhand_t)contigmem_modevent, - 0 -}; - -DECLARE_MODULE(contigmem, contigmem_mod, SI_SUB_DRIVERS, SI_ORDER_ANY); -MODULE_VERSION(contigmem, 1); - -static struct cdevsw contigmem_ops = { - .d_name = "contigmem", - .d_version = D_VERSION, - .d_flags = D_TRACKCLOSE, - .d_mmap_single = contigmem_mmap_single, - .d_open = contigmem_open, - .d_close = contigmem_close, -}; - -static int -contigmem_load() -{ - char index_string[8], description[32]; - int i, error = 0; - void *addr; - - if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) { - printf("%d buffers requested is greater than %d allowed\n", - contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS); - error = EINVAL; - goto error; - } - - if (contigmem_buffer_size < PAGE_SIZE || - (contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) { - printf("buffer size 0x%lx is not greater than PAGE_SIZE and " - "power of two\n", contigmem_buffer_size); - error = EINVAL; - goto error; - } - - for (i = 0; i < contigmem_num_buffers; i++) { - addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, - 0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0); - if (addr == NULL) { - printf("contigmalloc failed for buffer %d\n", i); - error = ENOMEM; - goto error; - } - - printf("%2u: virt=%p phys=%p\n", i, addr, - (void *)pmap_kextract((vm_offset_t)addr)); - - mtx_init(&contigmem_buffers[i].mtx, "contigmem", NULL, MTX_DEF); - contigmem_buffers[i].addr = addr; - contigmem_buffers[i].refcnt = 0; - - snprintf(index_string, sizeof(index_string), "%d", i); - snprintf(description, sizeof(description), - "phys addr for buffer %d", i); - SYSCTL_ADD_PROC(NULL, - &SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO, - index_string, CTLTYPE_U64 | CTLFLAG_RD, - (void *)(uintptr_t)i, 0, contigmem_physaddr, "LU", - description); - } - - contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT, - GID_WHEEL, 0600, "contigmem"); - - return 0; - -error: - for (i = 0; i < contigmem_num_buffers; i++) { - if (contigmem_buffers[i].addr != NULL) - contigfree(contigmem_buffers[i].addr, - contigmem_buffer_size, M_CONTIGMEM); - if (mtx_initialized(&contigmem_buffers[i].mtx)) - mtx_destroy(&contigmem_buffers[i].mtx); - } - - return error; -} - -static int -contigmem_unload() -{ - int i; - - if (contigmem_refcnt > 0) - return EBUSY; - - if (contigmem_cdev != NULL) - destroy_dev(contigmem_cdev); - - if (contigmem_eh_tag != NULL) - EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag); - - for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) { - if (contigmem_buffers[i].addr != NULL) - contigfree(contigmem_buffers[i].addr, - contigmem_buffer_size, M_CONTIGMEM); - if (mtx_initialized(&contigmem_buffers[i].mtx)) - mtx_destroy(&contigmem_buffers[i].mtx); - } - - return 0; -} - -static int -contigmem_physaddr(SYSCTL_HANDLER_ARGS) -{ - uint64_t physaddr; - int index = (int)(uintptr_t)arg1; - - physaddr = (uint64_t)vtophys(contigmem_buffers[index].addr); - return sysctl_handle_64(oidp, &physaddr, 0, req); -} - -static int -contigmem_open(struct cdev *cdev, int fflags, int devtype, - struct thread *td) -{ - - atomic_add_int(&contigmem_refcnt, 1); - - return 0; -} - -static int -contigmem_close(struct cdev *cdev, int fflags, int devtype, - struct thread *td) -{ - - atomic_subtract_int(&contigmem_refcnt, 1); - - return 0; -} - -static int -contigmem_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, - vm_ooffset_t foff, struct ucred *cred, u_short *color) -{ - struct contigmem_vm_handle *vmh = handle; - struct contigmem_buffer *buf; - - buf = &contigmem_buffers[vmh->buffer_index]; - - atomic_add_int(&contigmem_refcnt, 1); - - mtx_lock(&buf->mtx); - if (buf->refcnt == 0) - memset(buf->addr, 0, contigmem_buffer_size); - buf->refcnt++; - mtx_unlock(&buf->mtx); - - return 0; -} - -static void -contigmem_cdev_pager_dtor(void *handle) -{ - struct contigmem_vm_handle *vmh = handle; - struct contigmem_buffer *buf; - - buf = &contigmem_buffers[vmh->buffer_index]; - - mtx_lock(&buf->mtx); - buf->refcnt--; - mtx_unlock(&buf->mtx); - - free(vmh, M_CONTIGMEM); - - atomic_subtract_int(&contigmem_refcnt, 1); -} - -static int -contigmem_cdev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, - vm_page_t *mres) -{ - vm_paddr_t paddr; - vm_page_t m_paddr, page; - vm_memattr_t memattr, memattr1; - - memattr = object->memattr; - - VM_OBJECT_WUNLOCK(object); - - paddr = offset; - - m_paddr = vm_phys_paddr_to_vm_page(paddr); - if (m_paddr != NULL) { - memattr1 = pmap_page_get_memattr(m_paddr); - if (memattr1 != memattr) - memattr = memattr1; - } - - if (((*mres)->flags & PG_FICTITIOUS) != 0) { - /* - * If the passed in result page is a fake page, update it with - * the new physical address. - */ - page = *mres; - VM_OBJECT_WLOCK(object); - vm_page_updatefake(page, paddr, memattr); - } else { - vm_page_t mret; - /* - * Replace the passed in reqpage page with our own fake page and - * free up the original page. - */ - page = vm_page_getfake(paddr, memattr); - VM_OBJECT_WLOCK(object); - mret = vm_page_replace(page, object, (*mres)->pindex); - KASSERT(mret == *mres, - ("invalid page replacement, old=%p, ret=%p", *mres, mret)); - vm_page_lock(mret); - vm_page_free(mret); - vm_page_unlock(mret); - *mres = page; - } - - page->valid = VM_PAGE_BITS_ALL; - - return VM_PAGER_OK; -} - -static struct cdev_pager_ops contigmem_cdev_pager_ops = { - .cdev_pg_ctor = contigmem_cdev_pager_ctor, - .cdev_pg_dtor = contigmem_cdev_pager_dtor, - .cdev_pg_fault = contigmem_cdev_pager_fault, -}; - -static int -contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, - struct vm_object **obj, int nprot) -{ - struct contigmem_vm_handle *vmh; - uint64_t buffer_index; - - /* - * The buffer index is encoded in the offset. Divide the offset by - * PAGE_SIZE to get the index of the buffer requested by the user - * app. - */ - buffer_index = *offset / PAGE_SIZE; - if (buffer_index >= contigmem_num_buffers) - return EINVAL; - - if (size > contigmem_buffer_size) - return EINVAL; - - vmh = malloc(sizeof(*vmh), M_CONTIGMEM, M_NOWAIT | M_ZERO); - if (vmh == NULL) - return ENOMEM; - vmh->buffer_index = buffer_index; - - *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index].addr); - *obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &contigmem_cdev_pager_ops, - size, nprot, *offset, curthread->td_ucred); - - return 0; -} diff --git a/lib/librte_eal/bsdapp/contigmem/meson.build b/lib/librte_eal/bsdapp/contigmem/meson.build deleted file mode 100644 index 8fb2ab78..00000000 --- a/lib/librte_eal/bsdapp/contigmem/meson.build +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2017 Intel Corporation - -sources = files('contigmem.c') diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index dd455e67..d27da3d1 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -18,21 +18,25 @@ CFLAGS += $(WERROR_FLAGS) -O3 LDLIBS += -lexecinfo LDLIBS += -lpthread LDLIBS += -lgcc_s +LDLIBS += -lrte_kvargs EXPORT_MAP := ../../rte_eal_version.map -LIBABIVER := 6 +LIBABIVER := 8 # specific to bsdapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_cpuflags.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memalloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_dev.c # from common dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c @@ -40,6 +44,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memalloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c @@ -48,14 +53,18 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hypervisor.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_class.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_bus.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_uuid.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_mp.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 4eafcb5a..d7ae9d68 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include #include @@ -64,8 +66,8 @@ static int mem_cfg_fd = -1; static struct flock wr_lock = { .l_type = F_WRLCK, .l_whence = SEEK_SET, - .l_start = offsetof(struct rte_mem_config, memseg), - .l_len = sizeof(early_mem_config.memseg), + .l_start = offsetof(struct rte_mem_config, memsegs), + .l_len = sizeof(early_mem_config.memsegs), }; /* Address of global and public configuration */ @@ -82,20 +84,72 @@ struct internal_config internal_config; /* used by rte_rdtsc() */ int rte_cycles_vmware_tsc_map; -/* Return user provided mbuf pool ops name */ -const char * __rte_experimental -rte_eal_mbuf_user_pool_ops(void) -{ - return internal_config.user_mbuf_pool_ops_name; +/* platform-specific runtime dir */ +static char runtime_dir[PATH_MAX]; + +static const char *default_runtime_dir = "/var/run"; + +int +eal_create_runtime_dir(void) +{ + const char *directory = default_runtime_dir; + const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR"); + const char *fallback = "/tmp"; + char tmp[PATH_MAX]; + int ret; + + if (getuid() != 0) { + /* try XDG path first, fall back to /tmp */ + if (xdg_runtime_dir != NULL) + directory = xdg_runtime_dir; + else + directory = fallback; + } + /* create DPDK subdirectory under runtime dir */ + ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory); + if (ret < 0 || ret == sizeof(tmp)) { + RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n"); + return -1; + } + + /* create prefix-specific subdirectory under DPDK runtime dir */ + ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s", + tmp, internal_config.hugefile_prefix); + if (ret < 0 || ret == sizeof(runtime_dir)) { + RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n"); + return -1; + } + + /* create the path if it doesn't exist. no "mkdir -p" here, so do it + * step by step. + */ + ret = mkdir(tmp, 0700); + if (ret < 0 && errno != EEXIST) { + RTE_LOG(ERR, EAL, "Error creating '%s': %s\n", + tmp, strerror(errno)); + return -1; + } + + ret = mkdir(runtime_dir, 0700); + if (ret < 0 && errno != EEXIST) { + RTE_LOG(ERR, EAL, "Error creating '%s': %s\n", + runtime_dir, strerror(errno)); + return -1; + } + + return 0; } -/* Return mbuf pool ops name */ const char * -rte_eal_mbuf_default_mempool_ops(void) +eal_get_runtime_dir(void) { - if (internal_config.user_mbuf_pool_ops_name == NULL) - return RTE_MBUF_DEFAULT_MEMPOOL_OPS; + return runtime_dir; +} +/* Return user provided mbuf pool ops name */ +const char * +rte_eal_mbuf_user_pool_ops(void) +{ return internal_config.user_mbuf_pool_ops_name; } @@ -222,12 +276,17 @@ eal_proc_type_detect(void) enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; const char *pathname = eal_runtime_config_path(); - /* if we can open the file but not get a write-lock we are a secondary - * process. NOTE: if we get a file handle back, we keep that open - * and don't close it to prevent a race condition between multiple opens */ - if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && - (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) - ptype = RTE_PROC_SECONDARY; + /* if there no shared config, there can be no secondary processes */ + if (!internal_config.no_shconf) { + /* if we can open the file but not get a write-lock we are a + * secondary process. NOTE: if we get a file handle back, we + * keep that open and don't close it to prevent a race condition + * between multiple opens. + */ + if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && + (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) + ptype = RTE_PROC_SECONDARY; + } RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); @@ -289,7 +348,7 @@ eal_get_hugepage_mem_size(void) for (i = 0; i < internal_config.num_hugepage_sizes; i++) { struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) { + if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) { for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { size += hpi->hugepage_sz * hpi->num_pages[j]; } @@ -379,7 +438,8 @@ eal_parse_args(int argc, char **argv) switch (opt) { case OPT_MBUF_POOL_OPS_NAME_NUM: - internal_config.user_mbuf_pool_ops_name = optarg; + internal_config.user_mbuf_pool_ops_name = + strdup(optarg); break; case 'h': eal_usage(prgname); @@ -403,6 +463,14 @@ eal_parse_args(int argc, char **argv) } } + /* create runtime data directory */ + if (internal_config.no_shconf == 0 && + eal_create_runtime_dir() < 0) { + RTE_LOG(ERR, EAL, "Cannot create runtime directory\n"); + ret = -1; + goto out; + } + if (eal_adjust_config(&internal_config) != 0) { ret = -1; goto out; @@ -429,25 +497,29 @@ out: return ret; } +static int +check_socket(const struct rte_memseg_list *msl, void *arg) +{ + int *socket_id = arg; + + if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0) + return 1; + + return 0; +} + static void eal_check_mem_on_local_socket(void) { - const struct rte_memseg *ms; - int i, socket_id; + int socket_id; socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); - ms = rte_eal_get_physmem_layout(); - - for (i = 0; i < RTE_MAX_MEMSEG; i++) - if (ms[i].socket_id == socket_id && - ms[i].len > 0) - return; - - RTE_LOG(WARNING, EAL, "WARNING: Master core has no " - "memory on local socket!\n"); + if (rte_memseg_list_walk(check_socket, &socket_id) == 0) + RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n"); } + static int sync_func(__attribute__((unused)) void *arg) { @@ -531,6 +603,9 @@ rte_eal_init(int argc, char **argv) return -1; } + /* FreeBSD always uses legacy memory model */ + internal_config.legacy_mem = true; + if (eal_plugins_init() < 0) { rte_eal_init_alert("Cannot init plugins\n"); rte_errno = EINVAL; @@ -544,6 +619,24 @@ rte_eal_init(int argc, char **argv) return -1; } + rte_config_init(); + + if (rte_eal_intr_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + return -1; + } + + /* Put mp channel init before bus scan so that we can init the vdev + * bus through mp channel in the secondary process before the bus scan. + */ + if (rte_mp_channel_init() < 0) { + rte_eal_init_alert("failed to init mp channel\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_errno = EFAULT; + return -1; + } + } + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; @@ -554,13 +647,17 @@ rte_eal_init(int argc, char **argv) /* autodetect the iova mapping mode (default is iova_pa) */ rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class(); - if (internal_config.no_hugetlbfs == 0 && - internal_config.process_type != RTE_PROC_SECONDARY && - eal_hugepage_info_init() < 0) { - rte_eal_init_alert("Cannot get hugepage information."); - rte_errno = EACCES; - rte_atomic32_clear(&run_once); - return -1; + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? + eal_hugepage_info_init() : + eal_hugepage_info_read(); + if (ret < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { @@ -583,14 +680,14 @@ rte_eal_init(int argc, char **argv) rte_srand(rte_rdtsc()); - rte_config_init(); - - if (rte_mp_channel_init() < 0) { - rte_eal_init_alert("failed to init mp channel\n"); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - rte_errno = EFAULT; - return -1; - } + /* in secondary processes, memory init may allocate additional fbarrays + * not present in primary processes, so to avoid any potential issues, + * initialize memzones first. + */ + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; } if (rte_eal_memory_init() < 0) { @@ -599,8 +696,8 @@ rte_eal_init(int argc, char **argv) return -1; } - if (rte_eal_memzone_init() < 0) { - rte_eal_init_alert("Cannot init memzone\n"); + if (rte_eal_malloc_heap_init() < 0) { + rte_eal_init_alert("Cannot init malloc heap\n"); rte_errno = ENODEV; return -1; } @@ -617,11 +714,6 @@ rte_eal_init(int argc, char **argv) return -1; } - if (rte_eal_intr_init() < 0) { - rte_eal_init_alert("Cannot init interrupt-handling thread\n"); - return -1; - } - if (rte_eal_timer_init() < 0) { rte_eal_init_alert("Cannot init HPET or TSC timers\n"); rte_errno = ENOTSUP; @@ -632,7 +724,7 @@ rte_eal_init(int argc, char **argv) eal_thread_init_master(rte_config.master_lcore); - ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset)); RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n", rte_config.master_lcore, thread_id, cpuset, @@ -658,7 +750,7 @@ rte_eal_init(int argc, char **argv) rte_panic("Cannot create thread\n"); /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + snprintf(thread_name, sizeof(thread_name), "lcore-slave-%d", i); rte_thread_setname(lcore_config[i].thread_id, thread_name); } @@ -735,18 +827,6 @@ rte_eal_vfio_intr_mode(void) return RTE_INTR_MODE_NONE; } -/* dummy forward declaration. */ -struct vfio_device_info; - -/* dummy prototypes. */ -int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, - int *vfio_dev_fd, struct vfio_device_info *device_info); -int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); -int rte_vfio_enable(const char *modname); -int rte_vfio_is_enabled(const char *modname); -int rte_vfio_noiommu_is_enabled(void); -int rte_vfio_clear_group(int vfio_group_fd); - int rte_vfio_setup_device(__rte_unused const char *sysfs_base, __rte_unused const char *dev_addr, __rte_unused int *vfio_dev_fd, @@ -781,3 +861,81 @@ int rte_vfio_clear_group(__rte_unused int vfio_group_fd) { return 0; } + +int +rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int +rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int +rte_vfio_get_group_num(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int *iommu_group_num) +{ + return -1; +} + +int +rte_vfio_get_container_fd(void) +{ + return -1; +} + +int +rte_vfio_get_group_fd(__rte_unused int iommu_group_num) +{ + return -1; +} + +int +rte_vfio_container_create(void) +{ + return -1; +} + +int +rte_vfio_container_destroy(__rte_unused int container_fd) +{ + return -1; +} + +int +rte_vfio_container_group_bind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int +rte_vfio_container_group_unbind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int +rte_vfio_container_dma_map(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int +rte_vfio_container_dma_unmap(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c index eb3913c9..51ea4b8c 100644 --- a/lib/librte_eal/bsdapp/eal/eal_alarm.c +++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c @@ -1,31 +1,314 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ + +#include +#include +#include +#include #include +#include +#include #include #include +#include #include +#include +#include +#include + #include "eal_private.h" +#include "eal_alarm_private.h" + +#define NS_PER_US 1000 + +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW +#else +#define CLOCK_TYPE_ID CLOCK_MONOTONIC +#endif + +struct alarm_entry { + LIST_ENTRY(alarm_entry) next; + struct rte_intr_handle handle; + struct timespec time; + rte_eal_alarm_callback cb_fn; + void *cb_arg; + volatile uint8_t executing; + volatile pthread_t executing_id; +}; + +static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER(); +static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER; + +static struct rte_intr_handle intr_handle = {.fd = -1 }; +static void eal_alarm_callback(void *arg); int rte_eal_alarm_init(void) { + intr_handle.type = RTE_INTR_HANDLE_ALARM; + + /* on FreeBSD, timers don't use fd's, and their identifiers are stored + * in separate namespace from fd's, so using any value is OK. however, + * EAL interrupts handler expects fd's to be unique, so use an actual fd + * to guarantee unique timer identifier. + */ + intr_handle.fd = open("/dev/zero", O_RDONLY); + + return 0; +} + +static inline int +timespec_cmp(const struct timespec *now, const struct timespec *at) +{ + if (now->tv_sec < at->tv_sec) + return -1; + if (now->tv_sec > at->tv_sec) + return 1; + if (now->tv_nsec < at->tv_nsec) + return -1; + if (now->tv_nsec > at->tv_nsec) + return 1; return 0; } +static inline uint64_t +diff_ns(struct timespec *now, struct timespec *at) +{ + uint64_t now_ns, at_ns; + + if (timespec_cmp(now, at) >= 0) + return 0; + + now_ns = now->tv_sec * NS_PER_S + now->tv_nsec; + at_ns = at->tv_sec * NS_PER_S + at->tv_nsec; + + return at_ns - now_ns; +} int -rte_eal_alarm_set(uint64_t us __rte_unused, - rte_eal_alarm_callback cb_fn __rte_unused, - void *cb_arg __rte_unused) +eal_alarm_get_timeout_ns(uint64_t *val) { - return -ENOTSUP; + struct alarm_entry *ap; + struct timespec now; + + if (clock_gettime(CLOCK_TYPE_ID, &now) < 0) + return -1; + + if (LIST_EMPTY(&alarm_list)) + return -1; + + ap = LIST_FIRST(&alarm_list); + + *val = diff_ns(&now, &ap->time); + + return 0; +} + +static int +unregister_current_callback(void) +{ + struct alarm_entry *ap; + int ret = 0; + + if (!LIST_EMPTY(&alarm_list)) { + ap = LIST_FIRST(&alarm_list); + + do { + ret = rte_intr_callback_unregister(&intr_handle, + eal_alarm_callback, &ap->time); + } while (ret == -EAGAIN); + } + + return ret; } +static int +register_first_callback(void) +{ + struct alarm_entry *ap; + int ret = 0; + + if (!LIST_EMPTY(&alarm_list)) { + ap = LIST_FIRST(&alarm_list); + + /* register a new callback */ + ret = rte_intr_callback_register(&intr_handle, + eal_alarm_callback, &ap->time); + } + return ret; +} + +static void +eal_alarm_callback(void *arg __rte_unused) +{ + struct timespec now; + struct alarm_entry *ap; + + rte_spinlock_lock(&alarm_list_lk); + ap = LIST_FIRST(&alarm_list); + + if (clock_gettime(CLOCK_TYPE_ID, &now) < 0) + return; + + while (ap != NULL && timespec_cmp(&now, &ap->time) >= 0) { + ap->executing = 1; + ap->executing_id = pthread_self(); + rte_spinlock_unlock(&alarm_list_lk); + + ap->cb_fn(ap->cb_arg); + + rte_spinlock_lock(&alarm_list_lk); + + LIST_REMOVE(ap, next); + free(ap); + + ap = LIST_FIRST(&alarm_list); + } + + /* timer has been deleted from the kqueue, so recreate it if needed */ + register_first_callback(); + + rte_spinlock_unlock(&alarm_list_lk); +} + + int -rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused, - void *cb_arg __rte_unused) +rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg) { - return -ENOTSUP; + struct alarm_entry *ap, *new_alarm; + struct timespec now; + uint64_t ns; + int ret = 0; + + /* check parameters, also ensure us won't cause a uint64_t overflow */ + if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL) + return -EINVAL; + + new_alarm = calloc(1, sizeof(*new_alarm)); + if (new_alarm == NULL) + return -ENOMEM; + + /* use current time to calculate absolute time of alarm */ + clock_gettime(CLOCK_TYPE_ID, &now); + + ns = us * NS_PER_US; + + new_alarm->cb_fn = cb_fn; + new_alarm->cb_arg = cb_arg; + new_alarm->time.tv_nsec = (now.tv_nsec + ns) % NS_PER_S; + new_alarm->time.tv_sec = now.tv_sec + ((now.tv_nsec + ns) / NS_PER_S); + + rte_spinlock_lock(&alarm_list_lk); + + if (LIST_EMPTY(&alarm_list)) + LIST_INSERT_HEAD(&alarm_list, new_alarm, next); + else { + LIST_FOREACH(ap, &alarm_list, next) { + if (timespec_cmp(&new_alarm->time, &ap->time) < 0) { + LIST_INSERT_BEFORE(ap, new_alarm, next); + break; + } + if (LIST_NEXT(ap, next) == NULL) { + LIST_INSERT_AFTER(ap, new_alarm, next); + break; + } + } + } + + /* re-register first callback just in case */ + register_first_callback(); + + rte_spinlock_unlock(&alarm_list_lk); + + return ret; +} + +int +rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) +{ + struct alarm_entry *ap, *ap_prev; + int count = 0; + int err = 0; + int executing; + + if (!cb_fn) { + rte_errno = EINVAL; + return -1; + } + + do { + executing = 0; + rte_spinlock_lock(&alarm_list_lk); + /* remove any matches at the start of the list */ + while (1) { + ap = LIST_FIRST(&alarm_list); + if (ap == NULL) + break; + if (cb_fn != ap->cb_fn) + break; + if (cb_arg != ap->cb_arg && cb_arg != (void *) -1) + break; + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + free(ap); + count++; + } else { + /* If calling from other context, mark that + * alarm is executing so loop can spin till it + * finish. Otherwise we are trying to cancel + * ourselves - mark it by EINPROGRESS. + */ + if (pthread_equal(ap->executing_id, + pthread_self()) == 0) + executing++; + else + err = EINPROGRESS; + + break; + } + } + ap_prev = ap; + + /* now go through list, removing entries not at start */ + LIST_FOREACH(ap, &alarm_list, next) { + /* this won't be true first time through */ + if (cb_fn == ap->cb_fn && + (cb_arg == (void *)-1 || + cb_arg == ap->cb_arg)) { + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + free(ap); + count++; + ap = ap_prev; + } else if (pthread_equal(ap->executing_id, + pthread_self()) == 0) { + executing++; + } else { + err = EINPROGRESS; + } + } + ap_prev = ap; + } + rte_spinlock_unlock(&alarm_list_lk); + } while (executing != 0); + + if (count == 0 && err == 0) + rte_errno = ENOENT; + else if (err) + rte_errno = err; + + rte_spinlock_lock(&alarm_list_lk); + + /* unregister if no alarms left, otherwise re-register first */ + if (LIST_EMPTY(&alarm_list)) + unregister_current_callback(); + else + register_first_callback(); + + rte_spinlock_unlock(&alarm_list_lk); + + return count; } diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm_private.h b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h new file mode 100644 index 00000000..65c71151 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef EAL_ALARM_PRIVATE_H +#define EAL_ALARM_PRIVATE_H + +#include + +/* + * FreeBSD needs a back-channel communication mechanism between interrupt and + * alarm thread, because on FreeBSD, timer period is set up inside the interrupt + * API and not inside alarm API like on Linux. + */ + +int +eal_alarm_get_timeout_ns(uint64_t *val); + +#endif // EAL_ALARM_PRIVATE_H diff --git a/lib/librte_eal/bsdapp/eal/eal_cpuflags.c b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c new file mode 100644 index 00000000..69b161ea --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 Mellanox Technologies, Ltd + */ + +#include +#include + +unsigned long +rte_cpu_getauxval(unsigned long type __rte_unused) +{ + /* not implemented */ + return 0; +} + +int +rte_cpu_strcmp_auxval(unsigned long type __rte_unused, + const char *str __rte_unused) +{ + /* not implemented */ + return -1; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_dev.c b/lib/librte_eal/bsdapp/eal/eal_dev.c new file mode 100644 index 00000000..1c6c51bd --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_dev.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include + +int __rte_experimental +rte_dev_event_monitor_start(void) +{ + RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n"); + return -1; +} + +int __rte_experimental +rte_dev_event_monitor_stop(void) +{ + RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n"); + return -1; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c index be2dbf0e..1e8f5df2 100644 --- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c @@ -19,10 +19,10 @@ * Used in this file to store the hugepage file map on disk */ static void * -create_shared_memory(const char *filename, const size_t mem_size) +map_shared_memory(const char *filename, const size_t mem_size, int flags) { void *retval; - int fd = open(filename, O_CREAT | O_RDWR, 0666); + int fd = open(filename, flags, 0666); if (fd < 0) return NULL; if (ftruncate(fd, mem_size) < 0) { @@ -34,6 +34,18 @@ create_shared_memory(const char *filename, const size_t mem_size) return retval; } +static void * +open_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR); +} + +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT); +} + /* * No hugepage support on freebsd, but we dummy it, using contigmem driver */ @@ -46,13 +58,16 @@ eal_hugepage_info_init(void) /* re-use the linux "internal config" structure for our memory data */ struct hugepage_info *hpi = &internal_config.hugepage_info[0]; struct hugepage_info *tmp_hpi; + unsigned int i; + + internal_config.num_hugepage_sizes = 1; sysctl_size = sizeof(num_buffers); error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers, &sysctl_size, NULL, 0); if (error != 0) { - RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers"); + RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers\n"); return -1; } @@ -61,7 +76,7 @@ eal_hugepage_info_init(void) &sysctl_size, NULL, 0); if (error != 0) { - RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size"); + RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size\n"); return -1; } @@ -81,25 +96,61 @@ eal_hugepage_info_init(void) RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n", num_buffers, (int)(buffer_size>>10)); - internal_config.num_hugepage_sizes = 1; - hpi->hugedir = CONTIGMEM_DEV; + strlcpy(hpi->hugedir, CONTIGMEM_DEV, sizeof(hpi->hugedir)); hpi->hugepage_sz = buffer_size; hpi->num_pages[0] = num_buffers; hpi->lock_descriptor = fd; + /* for no shared files mode, do not create shared memory config */ + if (internal_config.no_shconf) + return 0; + tmp_hpi = create_shared_memory(eal_hugepage_info_path(), - sizeof(struct hugepage_info)); + sizeof(internal_config.hugepage_info)); if (tmp_hpi == NULL ) { RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); return -1; } - memcpy(tmp_hpi, hpi, sizeof(struct hugepage_info)); + memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info)); + + /* we've copied file descriptors along with everything else, but they + * will be invalid in secondary process, so overwrite them + */ + for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) { + struct hugepage_info *tmp = &tmp_hpi[i]; + tmp->lock_descriptor = -1; + } - if ( munmap(tmp_hpi, sizeof(struct hugepage_info)) < 0) { + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); return -1; } return 0; } + +/* copy stuff from shared info into internal config */ +int +eal_hugepage_info_read(void) +{ + struct hugepage_info *hpi = &internal_config.hugepage_info[0]; + struct hugepage_info *tmp_hpi; + + internal_config.num_hugepage_sizes = 1; + + tmp_hpi = open_shared_memory(eal_hugepage_info_path(), + sizeof(internal_config.hugepage_info)); + if (tmp_hpi == NULL) { + RTE_LOG(ERR, EAL, "Failed to open shared memory!\n"); + return -1; + } + + memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info)); + + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { + RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); + return -1; + } + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c index 290d53ab..2feee2d5 100644 --- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c +++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -1,51 +1,479 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ +#include +#include +#include +#include +#include + +#include +#include +#include #include #include + #include "eal_private.h" +#include "eal_alarm_private.h" + +#define MAX_INTR_EVENTS 16 + +/** + * union buffer for reading on different devices + */ +union rte_intr_read_buffer { + char charbuf[16]; /* for others */ +}; + +TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); +TAILQ_HEAD(rte_intr_source_list, rte_intr_source); + +struct rte_intr_callback { + TAILQ_ENTRY(rte_intr_callback) next; + rte_intr_callback_fn cb_fn; /**< callback address */ + void *cb_arg; /**< parameter for callback */ +}; + +struct rte_intr_source { + TAILQ_ENTRY(rte_intr_source) next; + struct rte_intr_handle intr_handle; /**< interrupt handle */ + struct rte_intr_cb_list callbacks; /**< user callbacks */ + uint32_t active; +}; + +/* global spinlock for interrupt data operation */ +static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; + +/* interrupt sources list */ +static struct rte_intr_source_list intr_sources; + +/* interrupt handling thread */ +static pthread_t intr_thread; + +static volatile int kq = -1; + +static int +intr_source_to_kevent(const struct rte_intr_handle *ih, struct kevent *ke) +{ + /* alarm callbacks are special case */ + if (ih->type == RTE_INTR_HANDLE_ALARM) { + uint64_t timeout_ns; + + /* get soonest alarm timeout */ + if (eal_alarm_get_timeout_ns(&timeout_ns) < 0) + return -1; + + ke->filter = EVFILT_TIMER; + /* timers are one shot */ + ke->flags |= EV_ONESHOT; + ke->fflags = NOTE_NSECONDS; + ke->data = timeout_ns; + } else { + ke->filter = EVFILT_READ; + } + ke->ident = ih->fd; + + return 0; +} int rte_intr_callback_register(const struct rte_intr_handle *intr_handle, - rte_intr_callback_fn cb, - void *cb_arg) + rte_intr_callback_fn cb, void *cb_arg) { - RTE_SET_USED(intr_handle); - RTE_SET_USED(cb); - RTE_SET_USED(cb_arg); + struct rte_intr_callback *callback = NULL; + struct rte_intr_source *src = NULL; + int ret, add_event; - return -ENOTSUP; + /* first do parameter checking */ + if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) { + RTE_LOG(ERR, EAL, + "Registering with invalid input parameter\n"); + return -EINVAL; + } + if (kq < 0) { + RTE_LOG(ERR, EAL, "Kqueue is not active: %d\n", kq); + return -ENODEV; + } + + /* allocate a new interrupt callback entity */ + callback = calloc(1, sizeof(*callback)); + if (callback == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + return -ENOMEM; + } + callback->cb_fn = cb; + callback->cb_arg = cb_arg; + + rte_spinlock_lock(&intr_lock); + + /* check if there is at least one callback registered for the fd */ + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->intr_handle.fd == intr_handle->fd) { + /* we had no interrupts for this */ + if (TAILQ_EMPTY(&src->callbacks)) + add_event = 1; + + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + ret = 0; + break; + } + } + + /* no existing callbacks for this - add new source */ + if (src == NULL) { + src = calloc(1, sizeof(*src)); + if (src == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + ret = -ENOMEM; + goto fail; + } else { + src->intr_handle = *intr_handle; + TAILQ_INIT(&src->callbacks); + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + TAILQ_INSERT_TAIL(&intr_sources, src, next); + add_event = 1; + ret = 0; + } + } + + /* add events to the queue. timer events are special as we need to + * re-set the timer. + */ + if (add_event || src->intr_handle.type == RTE_INTR_HANDLE_ALARM) { + struct kevent ke; + + memset(&ke, 0, sizeof(ke)); + ke.flags = EV_ADD; /* mark for addition to the queue */ + + if (intr_source_to_kevent(intr_handle, &ke) < 0) { + RTE_LOG(ERR, EAL, "Cannot convert interrupt handle to kevent\n"); + ret = -ENODEV; + goto fail; + } + + /** + * add the intr file descriptor into wait list. + */ + if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) { + /* currently, nic_uio does not support interrupts, so + * this error will always be triggered and output to the + * user. so, don't output it unless debug log level set. + */ + if (errno == ENODEV) + RTE_LOG(DEBUG, EAL, "Interrupt handle %d not supported\n", + src->intr_handle.fd); + else + RTE_LOG(ERR, EAL, "Error adding fd %d " + "kevent, %s\n", + src->intr_handle.fd, + strerror(errno)); + ret = -errno; + goto fail; + } + } + rte_spinlock_unlock(&intr_lock); + + return ret; +fail: + /* clean up */ + if (src != NULL) { + TAILQ_REMOVE(&(src->callbacks), callback, next); + if (TAILQ_EMPTY(&(src->callbacks))) { + TAILQ_REMOVE(&intr_sources, src, next); + free(src); + } + } + free(callback); + rte_spinlock_unlock(&intr_lock); + return ret; } int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, - rte_intr_callback_fn cb, - void *cb_arg) + rte_intr_callback_fn cb_fn, void *cb_arg) { - RTE_SET_USED(intr_handle); - RTE_SET_USED(cb); - RTE_SET_USED(cb_arg); + int ret; + struct rte_intr_source *src; + struct rte_intr_callback *cb, *next; - return -ENOTSUP; + /* do parameter checking first */ + if (intr_handle == NULL || intr_handle->fd < 0) { + RTE_LOG(ERR, EAL, + "Unregistering with invalid input parameter\n"); + return -EINVAL; + } + if (kq < 0) { + RTE_LOG(ERR, EAL, "Kqueue is not active\n"); + return -ENODEV; + } + + rte_spinlock_lock(&intr_lock); + + /* check if the insterrupt source for the fd is existent */ + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == intr_handle->fd) + break; + + /* No interrupt source registered for the fd */ + if (src == NULL) { + ret = -ENOENT; + + /* interrupt source has some active callbacks right now. */ + } else if (src->active != 0) { + ret = -EAGAIN; + + /* ok to remove. */ + } else { + struct kevent ke; + + ret = 0; + + /* remove it from the kqueue */ + memset(&ke, 0, sizeof(ke)); + ke.flags = EV_DELETE; /* mark for deletion from the queue */ + + if (intr_source_to_kevent(intr_handle, &ke) < 0) { + RTE_LOG(ERR, EAL, "Cannot convert to kevent\n"); + ret = -ENODEV; + goto out; + } + + /** + * remove intr file descriptor from wait list. + */ + if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) { + RTE_LOG(ERR, EAL, "Error removing fd %d kevent, %s\n", + src->intr_handle.fd, strerror(errno)); + /* removing non-existent even is an expected condition + * in some circumstances (e.g. oneshot events). + */ + } + + /*walk through the callbacks and remove all that match. */ + for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { + next = TAILQ_NEXT(cb, next); + if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || + cb->cb_arg == cb_arg)) { + TAILQ_REMOVE(&src->callbacks, cb, next); + free(cb); + ret++; + } + } + + /* all callbacks for that source are removed. */ + if (TAILQ_EMPTY(&src->callbacks)) { + TAILQ_REMOVE(&intr_sources, src, next); + free(src); + } + } +out: + rte_spinlock_unlock(&intr_lock); + + return ret; } int -rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_enable(const struct rte_intr_handle *intr_handle) { - return -ENOTSUP; + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type) { + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; } int -rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_disable(const struct rte_intr_handle *intr_handle) { - return -ENOTSUP; + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type) { + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +static void +eal_intr_process_interrupts(struct kevent *events, int nfds) +{ + struct rte_intr_callback active_cb; + union rte_intr_read_buffer buf; + struct rte_intr_callback *cb; + struct rte_intr_source *src; + bool call = false; + int n, bytes_read; + + for (n = 0; n < nfds; n++) { + int event_fd = events[n].ident; + + rte_spinlock_lock(&intr_lock); + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == event_fd) + break; + if (src == NULL) { + rte_spinlock_unlock(&intr_lock); + continue; + } + + /* mark this interrupt source as active and release the lock. */ + src->active = 1; + rte_spinlock_unlock(&intr_lock); + + /* set the length to be read dor different handle type */ + switch (src->intr_handle.type) { + case RTE_INTR_HANDLE_ALARM: + bytes_read = 0; + call = true; + break; + case RTE_INTR_HANDLE_VDEV: + case RTE_INTR_HANDLE_EXT: + bytes_read = 0; + call = true; + break; + case RTE_INTR_HANDLE_DEV_EVENT: + bytes_read = 0; + call = true; + break; + default: + bytes_read = 1; + break; + } + + if (bytes_read > 0) { + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + bytes_read = read(event_fd, &buf, bytes_read); + if (bytes_read < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + + RTE_LOG(ERR, EAL, "Error reading from file " + "descriptor %d: %s\n", + event_fd, + strerror(errno)); + } else if (bytes_read == 0) + RTE_LOG(ERR, EAL, "Read nothing from file " + "descriptor %d\n", event_fd); + else + call = true; + } + + /* grab a lock, again to call callbacks and update status. */ + rte_spinlock_lock(&intr_lock); + + if (call) { + /* Finally, call all callbacks. */ + TAILQ_FOREACH(cb, &src->callbacks, next) { + + /* make a copy and unlock. */ + active_cb = *cb; + rte_spinlock_unlock(&intr_lock); + + /* call the actual callback */ + active_cb.cb_fn(active_cb.cb_arg); + + /*get the lock back. */ + rte_spinlock_lock(&intr_lock); + } + } + + /* we done with that interrupt source, release it. */ + src->active = 0; + rte_spinlock_unlock(&intr_lock); + } +} + +static void * +eal_intr_thread_main(void *arg __rte_unused) +{ + struct kevent events[MAX_INTR_EVENTS]; + int nfds; + + /* host thread, never break out */ + for (;;) { + /* do not change anything, just wait */ + nfds = kevent(kq, NULL, 0, events, MAX_INTR_EVENTS, NULL); + + /* kevent fail */ + if (nfds < 0) { + if (errno == EINTR) + continue; + RTE_LOG(ERR, EAL, + "kevent returns with fail\n"); + break; + } + /* kevent timeout, will never happen here */ + else if (nfds == 0) + continue; + + /* kevent has at least one fd ready to read */ + eal_intr_process_interrupts(events, nfds); + } + close(kq); + kq = -1; + return NULL; } int rte_eal_intr_init(void) { - return 0; + int ret = 0; + + /* init the global interrupt source head */ + TAILQ_INIT(&intr_sources); + + kq = kqueue(); + if (kq < 0) { + RTE_LOG(ERR, EAL, "Cannot create kqueue instance\n"); + return -1; + } + + /* create the host thread to wait/handle the interrupt */ + ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL, + eal_intr_thread_main, NULL); + if (ret != 0) { + rte_errno = -ret; + RTE_LOG(ERR, EAL, + "Failed to create thread for interrupt handling\n"); + } + + return ret; } int diff --git a/lib/librte_eal/bsdapp/eal/eal_memalloc.c b/lib/librte_eal/bsdapp/eal/eal_memalloc.c new file mode 100644 index 00000000..f7f07abd --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_memalloc.c @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#include + +#include +#include + +#include "eal_memalloc.h" + +int +eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms __rte_unused, + int __rte_unused n_segs, size_t __rte_unused page_sz, + int __rte_unused socket, bool __rte_unused exact) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return -1; +} + +struct rte_memseg * +eal_memalloc_alloc_seg(size_t __rte_unused page_sz, int __rte_unused socket) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return NULL; +} + +int +eal_memalloc_free_seg(struct rte_memseg *ms __rte_unused) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return -1; +} + +int +eal_memalloc_free_seg_bulk(struct rte_memseg **ms __rte_unused, + int n_segs __rte_unused) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return -1; +} + +int +eal_memalloc_sync_with_primary(void) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return -1; +} + +int +eal_memalloc_init(void) +{ + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c index bdfb8828..16d2bc7c 100644 --- a/lib/librte_eal/bsdapp/eal/eal_memory.c +++ b/lib/librte_eal/bsdapp/eal/eal_memory.c @@ -6,10 +6,13 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include "eal_private.h" @@ -41,129 +44,253 @@ rte_eal_hugepage_init(void) struct rte_mem_config *mcfg; uint64_t total_mem = 0; void *addr; - unsigned i, j, seg_idx = 0; + unsigned int i, j, seg_idx = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; /* for debug purposes, hugetlbfs can be disabled */ if (internal_config.no_hugetlbfs) { - addr = malloc(internal_config.memory); - mcfg->memseg[0].iova = (rte_iova_t)(uintptr_t)addr; - mcfg->memseg[0].addr = addr; - mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; - mcfg->memseg[0].len = internal_config.memory; - mcfg->memseg[0].socket_id = 0; + struct rte_memseg_list *msl; + struct rte_fbarray *arr; + struct rte_memseg *ms; + uint64_t page_sz; + int n_segs, cur_seg; + + /* create a memseg list */ + msl = &mcfg->memsegs[0]; + + page_sz = RTE_PGSIZE_4K; + n_segs = internal_config.memory / page_sz; + + if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs, + sizeof(struct rte_memseg))) { + RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n"); + return -1; + } + + addr = mmap(NULL, internal_config.memory, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, + strerror(errno)); + return -1; + } + msl->base_va = addr; + msl->page_sz = page_sz; + msl->socket_id = 0; + + /* populate memsegs. each memseg is 1 page long */ + for (cur_seg = 0; cur_seg < n_segs; cur_seg++) { + arr = &msl->memseg_arr; + + ms = rte_fbarray_get(arr, cur_seg); + if (rte_eal_iova_mode() == RTE_IOVA_VA) + ms->iova = (uintptr_t)addr; + else + ms->iova = RTE_BAD_IOVA; + ms->addr = addr; + ms->hugepage_sz = page_sz; + ms->len = page_sz; + ms->socket_id = 0; + + rte_fbarray_set_used(arr, cur_seg); + + addr = RTE_PTR_ADD(addr, page_sz); + } return 0; } /* map all hugepages and sort them */ for (i = 0; i < internal_config.num_hugepage_sizes; i ++){ struct hugepage_info *hpi; + rte_iova_t prev_end = 0; + int prev_ms_idx = -1; + uint64_t page_sz, mem_needed; + unsigned int n_pages, max_pages; hpi = &internal_config.hugepage_info[i]; - for (j = 0; j < hpi->num_pages[0]; j++) { + page_sz = hpi->hugepage_sz; + max_pages = hpi->num_pages[0]; + mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem, + page_sz); + + n_pages = RTE_MIN(mem_needed / page_sz, max_pages); + + for (j = 0; j < n_pages; j++) { + struct rte_memseg_list *msl; + struct rte_fbarray *arr; struct rte_memseg *seg; + int msl_idx, ms_idx; rte_iova_t physaddr; int error; size_t sysctl_size = sizeof(physaddr); char physaddr_str[64]; + bool is_adjacent; - addr = mmap(NULL, hpi->hugepage_sz, PROT_READ|PROT_WRITE, - MAP_SHARED, hpi->lock_descriptor, - j * EAL_PAGE_SIZE); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", - j, hpi->hugedir); - return -1; - } - - snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem" - ".physaddr.%d", j); - error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size, - NULL, 0); + /* first, check if this segment is IOVA-adjacent to + * the previous one. + */ + snprintf(physaddr_str, sizeof(physaddr_str), + "hw.contigmem.physaddr.%d", j); + error = sysctlbyname(physaddr_str, &physaddr, + &sysctl_size, NULL, 0); if (error < 0) { RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u " "from %s\n", j, hpi->hugedir); return -1; } - seg = &mcfg->memseg[seg_idx++]; + is_adjacent = prev_end != 0 && physaddr == prev_end; + prev_end = physaddr + hpi->hugepage_sz; + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; + msl_idx++) { + bool empty, need_hole; + msl = &mcfg->memsegs[msl_idx]; + arr = &msl->memseg_arr; + + if (msl->page_sz != page_sz) + continue; + + empty = arr->count == 0; + + /* we need a hole if this isn't an empty memseg + * list, and if previous segment was not + * adjacent to current one. + */ + need_hole = !empty && !is_adjacent; + + /* we need 1, plus hole if not adjacent */ + ms_idx = rte_fbarray_find_next_n_free(arr, + 0, 1 + (need_hole ? 1 : 0)); + + /* memseg list is full? */ + if (ms_idx < 0) + continue; + + if (need_hole && prev_ms_idx == ms_idx - 1) + ms_idx++; + prev_ms_idx = ms_idx; + + break; + } + if (msl_idx == RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n", + RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE), + RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE)); + return -1; + } + arr = &msl->memseg_arr; + seg = rte_fbarray_get(arr, ms_idx); + + addr = RTE_PTR_ADD(msl->base_va, + (size_t)msl->page_sz * ms_idx); + + /* address is already mapped in memseg list, so using + * MAP_FIXED here is safe. + */ + addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE, + MAP_SHARED | MAP_FIXED, + hpi->lock_descriptor, + j * EAL_PAGE_SIZE); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", + j, hpi->hugedir); + return -1; + } + seg->addr = addr; seg->iova = physaddr; - seg->hugepage_sz = hpi->hugepage_sz; - seg->len = hpi->hugepage_sz; + seg->hugepage_sz = page_sz; + seg->len = page_sz; seg->nchannel = mcfg->nchannel; seg->nrank = mcfg->nrank; seg->socket_id = 0; + rte_fbarray_set_used(arr, ms_idx); + RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%" PRIx64", len %zu\n", - seg_idx, addr, physaddr, hpi->hugepage_sz); - if (total_mem >= internal_config.memory || - seg_idx >= RTE_MAX_MEMSEG) - break; + seg_idx++, addr, physaddr, page_sz); + + total_mem += seg->len; } + if (total_mem >= internal_config.memory) + break; + } + if (total_mem < internal_config.memory) { + RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, " + "requested: %" PRIu64 "M " + "available: %" PRIu64 "M\n", + internal_config.memory >> 20, total_mem >> 20); + return -1; } return 0; } +struct attach_walk_args { + int fd_hugepage; + int seg_idx; +}; +static int +attach_segment(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) +{ + struct attach_walk_args *wa = arg; + void *addr; + + addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, wa->fd_hugepage, + wa->seg_idx * EAL_PAGE_SIZE); + if (addr == MAP_FAILED || addr != ms->addr) + return -1; + wa->seg_idx++; + + return 0; +} + int rte_eal_hugepage_attach(void) { const struct hugepage_info *hpi; - int fd_hugepage_info, fd_hugepage = -1; - unsigned i = 0; - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int fd_hugepage = -1; + unsigned int i; - /* Obtain a file descriptor for hugepage_info */ - fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY); - if (fd_hugepage_info < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); - return -1; - } + hpi = &internal_config.hugepage_info[0]; - /* Map the shared hugepage_info into the process address spaces */ - hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE, - fd_hugepage_info, 0); - if (hpi == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); - goto error; - } - - /* Obtain a file descriptor for contiguous memory */ - fd_hugepage = open(hpi->hugedir, O_RDWR); - if (fd_hugepage < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", hpi->hugedir); - goto error; - } + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { + const struct hugepage_info *cur_hpi = &hpi[i]; + struct attach_walk_args wa; - /* Map the contiguous memory into each memory segment */ - for (i = 0; i < hpi->num_pages[0]; i++) { + memset(&wa, 0, sizeof(wa)); - void *addr; - struct rte_memseg *seg = &mcfg->memseg[i]; + /* Obtain a file descriptor for contiguous memory */ + fd_hugepage = open(cur_hpi->hugedir, O_RDWR); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", + cur_hpi->hugedir); + goto error; + } + wa.fd_hugepage = fd_hugepage; + wa.seg_idx = 0; - addr = mmap(seg->addr, hpi->hugepage_sz, PROT_READ|PROT_WRITE, - MAP_SHARED|MAP_FIXED, fd_hugepage, - i * EAL_PAGE_SIZE); - if (addr == MAP_FAILED || addr != seg->addr) { + /* Map the contiguous memory into each memory segment */ + if (rte_memseg_walk(attach_segment, &wa) < 0) { RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", - i, hpi->hugedir); + wa.seg_idx, cur_hpi->hugedir); goto error; } + close(fd_hugepage); + fd_hugepage = -1; } /* hugepage_info is no longer required */ - munmap((void *)(uintptr_t)hpi, sizeof(struct hugepage_info)); - close(fd_hugepage_info); - close(fd_hugepage); return 0; error: - if (fd_hugepage_info >= 0) - close(fd_hugepage_info); if (fd_hugepage >= 0) close(fd_hugepage); return -1; @@ -174,3 +301,217 @@ rte_eal_using_phys_addrs(void) { return 0; } + +static uint64_t +get_mem_amount(uint64_t page_sz, uint64_t max_mem) +{ + uint64_t area_sz, max_pages; + + /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */ + max_pages = RTE_MAX_MEMSEG_PER_LIST; + max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem); + + area_sz = RTE_MIN(page_sz * max_pages, max_mem); + + /* make sure the list isn't smaller than the page size */ + area_sz = RTE_MAX(area_sz, page_sz); + + return RTE_ALIGN(area_sz, page_sz); +} + +#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i" +static int +alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz, + int n_segs, int socket_id, int type_msl_idx) +{ + char name[RTE_FBARRAY_NAME_LEN]; + + snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id, + type_msl_idx); + if (rte_fbarray_init(&msl->memseg_arr, name, n_segs, + sizeof(struct rte_memseg))) { + RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n", + rte_strerror(rte_errno)); + return -1; + } + + msl->page_sz = page_sz; + msl->socket_id = socket_id; + msl->base_va = NULL; + + RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n", + (size_t)page_sz >> 10, socket_id); + + return 0; +} + +static int +alloc_va_space(struct rte_memseg_list *msl) +{ + uint64_t page_sz; + size_t mem_sz; + void *addr; + int flags = 0; + +#ifdef RTE_ARCH_PPC_64 + flags |= MAP_HUGETLB; +#endif + + page_sz = msl->page_sz; + mem_sz = page_sz * msl->memseg_arr.len; + + addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags); + if (addr == NULL) { + if (rte_errno == EADDRNOTAVAIL) + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n", + (unsigned long long)mem_sz, msl->base_va); + else + RTE_LOG(ERR, EAL, "Cannot reserve memory\n"); + return -1; + } + msl->base_va = addr; + + return 0; +} + + +static int +memseg_primary_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int hpi_idx, msl_idx = 0; + struct rte_memseg_list *msl; + uint64_t max_mem, total_mem; + + /* no-huge does not need this at all */ + if (internal_config.no_hugetlbfs) + return 0; + + /* FreeBSD has an issue where core dump will dump the entire memory + * contents, including anonymous zero-page memory. Therefore, while we + * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will + * also be further limiting total memory amount to whatever memory is + * available to us through contigmem driver (plus spacing blocks). + * + * so, at each stage, we will be checking how much memory we are + * preallocating, and adjust all the values accordingly. + */ + + max_mem = (uint64_t)RTE_MAX_MEM_MB << 20; + total_mem = 0; + + /* create memseg lists */ + for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes; + hpi_idx++) { + uint64_t max_type_mem, total_type_mem = 0; + uint64_t avail_mem; + int type_msl_idx, max_segs, avail_segs, total_segs = 0; + struct hugepage_info *hpi; + uint64_t hugepage_sz; + + hpi = &internal_config.hugepage_info[hpi_idx]; + hugepage_sz = hpi->hugepage_sz; + + /* no NUMA support on FreeBSD */ + + /* check if we've already exceeded total memory amount */ + if (total_mem >= max_mem) + break; + + /* first, calculate theoretical limits according to config */ + max_type_mem = RTE_MIN(max_mem - total_mem, + (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20); + max_segs = RTE_MAX_MEMSEG_PER_TYPE; + + /* now, limit all of that to whatever will actually be + * available to us, because without dynamic allocation support, + * all of that extra memory will be sitting there being useless + * and slowing down core dumps in case of a crash. + * + * we need (N*2)-1 segments because we cannot guarantee that + * each segment will be IOVA-contiguous with the previous one, + * so we will allocate more and put spaces inbetween segments + * that are non-contiguous. + */ + avail_segs = (hpi->num_pages[0] * 2) - 1; + avail_mem = avail_segs * hugepage_sz; + + max_type_mem = RTE_MIN(avail_mem, max_type_mem); + max_segs = RTE_MIN(avail_segs, max_segs); + + type_msl_idx = 0; + while (total_type_mem < max_type_mem && + total_segs < max_segs) { + uint64_t cur_max_mem, cur_mem; + unsigned int n_segs; + + if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, + "No more space in memseg lists, please increase %s\n", + RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); + return -1; + } + + msl = &mcfg->memsegs[msl_idx++]; + + cur_max_mem = max_type_mem - total_type_mem; + + cur_mem = get_mem_amount(hugepage_sz, + cur_max_mem); + n_segs = cur_mem / hugepage_sz; + + if (alloc_memseg_list(msl, hugepage_sz, n_segs, + 0, type_msl_idx)) + return -1; + + total_segs += msl->memseg_arr.len; + total_type_mem = total_segs * hugepage_sz; + type_msl_idx++; + + if (alloc_va_space(msl)) { + RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n"); + return -1; + } + } + total_mem += total_type_mem; + } + return 0; +} + +static int +memseg_secondary_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int msl_idx = 0; + struct rte_memseg_list *msl; + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + + msl = &mcfg->memsegs[msl_idx]; + + /* skip empty memseg lists */ + if (msl->memseg_arr.len == 0) + continue; + + if (rte_fbarray_attach(&msl->memseg_arr)) { + RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n"); + return -1; + } + + /* preallocate VA space */ + if (alloc_va_space(msl)) { + RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n"); + return -1; + } + } + + return 0; +} + +int +rte_eal_memseg_init(void) +{ + return rte_eal_process_type() == RTE_PROC_PRIMARY ? + memseg_primary_init() : + memseg_secondary_init(); +} diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c index d602daf8..309b5872 100644 --- a/lib/librte_eal/bsdapp/eal/eal_thread.c +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c @@ -119,7 +119,7 @@ eal_thread_loop(__attribute__((unused)) void *arg) if (eal_thread_set_affinity() < 0) rte_panic("cannot set affinity\n"); - ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset)); RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n", lcore_id, thread_id, cpuset, ret == 0 ? "" : "..."); diff --git a/lib/librte_eal/bsdapp/eal/meson.build b/lib/librte_eal/bsdapp/eal/meson.build index e83fc919..3945b529 100644 --- a/lib/librte_eal/bsdapp/eal/meson.build +++ b/lib/librte_eal/bsdapp/eal/meson.build @@ -4,12 +4,17 @@ env_objs = [] env_headers = [] env_sources = files('eal_alarm.c', + 'eal_cpuflags.c', 'eal_debug.c', 'eal_hugepage_info.c', 'eal_interrupts.c', 'eal_lcore.c', + 'eal_memalloc.c', 'eal_thread.c', 'eal_timer.c', 'eal.c', 'eal_memory.c', + 'eal_dev.c' ) + +deps += ['kvargs'] diff --git a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile deleted file mode 100644 index b6f92d55..00000000 --- a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2010-2014 Intel Corporation -# - -KMOD= nic_uio -SRCS= nic_uio.c device_if.h bus_if.h pci_if.h - -.include diff --git a/lib/librte_eal/bsdapp/nic_uio/Makefile b/lib/librte_eal/bsdapp/nic_uio/Makefile deleted file mode 100644 index 376ef3a3..00000000 --- a/lib/librte_eal/bsdapp/nic_uio/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2010-2014 Intel Corporation - -include $(RTE_SDK)/mk/rte.vars.mk - -# -# module name and path -# -MODULE = nic_uio - -# -# CFLAGS -# -MODULE_CFLAGS += -I$(SRCDIR) -MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -MODULE_CFLAGS += -Winline -Wall -Werror -MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h - -# -# all source are stored in SRCS-y -# -SRCS-y := nic_uio.c - -include $(RTE_SDK)/mk/rte.bsdmodule.mk diff --git a/lib/librte_eal/bsdapp/nic_uio/meson.build b/lib/librte_eal/bsdapp/nic_uio/meson.build deleted file mode 100644 index 4bdaf969..00000000 --- a/lib/librte_eal/bsdapp/nic_uio/meson.build +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2017 Intel Corporation - -sources = files('nic_uio.c') diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c deleted file mode 100644 index 401b487e..00000000 --- a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c +++ /dev/null @@ -1,350 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation - */ -#include -__FBSDID("$FreeBSD$"); - -#include /* defines used in kernel.h */ -#include -#include /* types used in module initialization */ -#include /* cdevsw struct */ -#include /* structs, prototypes for pci bus stuff and DEVMETHOD */ -#include -#include -#include -#include - -#include -#include /* For pci_get macros! */ -#include /* The softc holds our per-instance data. */ -#include -#include -#include -#include -#include - - -#define MAX_BARS (PCIR_MAX_BAR_0 + 1) - -#define MAX_DETACHED_DEVICES 128 -static device_t detached_devices[MAX_DETACHED_DEVICES] = {}; -static int num_detached = 0; - -struct nic_uio_softc { - device_t dev_t; - struct cdev *my_cdev; - int bar_id[MAX_BARS]; - struct resource *bar_res[MAX_BARS]; - u_long bar_start[MAX_BARS]; - u_long bar_size[MAX_BARS]; -}; - -/* Function prototypes */ -static d_open_t nic_uio_open; -static d_close_t nic_uio_close; -static d_mmap_t nic_uio_mmap; -static d_mmap_single_t nic_uio_mmap_single; -static int nic_uio_probe(device_t dev); -static int nic_uio_attach(device_t dev); -static int nic_uio_detach(device_t dev); -static int nic_uio_shutdown(void); -static int nic_uio_modevent(module_t mod, int type, void *arg); - -static struct cdevsw uio_cdevsw = { - .d_name = "nic_uio", - .d_version = D_VERSION, - .d_open = nic_uio_open, - .d_close = nic_uio_close, - .d_mmap = nic_uio_mmap, - .d_mmap_single = nic_uio_mmap_single, -}; - -static device_method_t nic_uio_methods[] = { - DEVMETHOD(device_probe, nic_uio_probe), - DEVMETHOD(device_attach, nic_uio_attach), - DEVMETHOD(device_detach, nic_uio_detach), - DEVMETHOD_END -}; - -struct device { - int vend; - int dev; -}; - -struct pci_bdf { - uint32_t bus; - uint32_t devid; - uint32_t function; -}; - -static devclass_t nic_uio_devclass; - -DEFINE_CLASS_0(nic_uio, nic_uio_driver, nic_uio_methods, sizeof(struct nic_uio_softc)); -DRIVER_MODULE(nic_uio, pci, nic_uio_driver, nic_uio_devclass, nic_uio_modevent, 0); - -static int -nic_uio_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, - int prot, vm_memattr_t *memattr) -{ - *paddr = offset; - return 0; -} - -static int -nic_uio_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, - struct vm_object **obj, int nprot) -{ - /* - * The BAR index is encoded in the offset. Divide the offset by - * PAGE_SIZE to get the index of the bar requested by the user - * app. - */ - unsigned bar = *offset/PAGE_SIZE; - struct nic_uio_softc *sc = cdev->si_drv1; - - if (bar >= MAX_BARS) - return EINVAL; - - if (sc->bar_res[bar] == NULL) { - sc->bar_id[bar] = PCIR_BAR(bar); - - if (PCI_BAR_IO(pci_read_config(sc->dev_t, sc->bar_id[bar], 4))) - sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_IOPORT, - &sc->bar_id[bar], RF_ACTIVE); - else - sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_MEMORY, - &sc->bar_id[bar], RF_ACTIVE); - } - if (sc->bar_res[bar] == NULL) - return ENXIO; - - sc->bar_start[bar] = rman_get_start(sc->bar_res[bar]); - sc->bar_size[bar] = rman_get_size(sc->bar_res[bar]); - - device_printf(sc->dev_t, "Bar %u @ %lx, size %lx\n", bar, - sc->bar_start[bar], sc->bar_size[bar]); - - *offset = sc->bar_start[bar]; - *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset, - curthread->td_ucred); - return 0; -} - - -int -nic_uio_open(struct cdev *dev, int oflags, int devtype, struct thread *td) -{ - return 0; -} - -int -nic_uio_close(struct cdev *dev, int fflag, int devtype, struct thread *td) -{ - return 0; -} - -static int -nic_uio_probe (device_t dev) -{ - int i; - unsigned int bus = pci_get_bus(dev); - unsigned int device = pci_get_slot(dev); - unsigned int function = pci_get_function(dev); - - char bdf_str[256]; - char *token, *remaining; - - /* First check if we found this on load */ - for (i = 0; i < num_detached; i++) - if (bus == pci_get_bus(detached_devices[i]) && - device == pci_get_slot(detached_devices[i]) && - function == pci_get_function(detached_devices[i])) { - device_set_desc(dev, "DPDK PCI Device"); - return BUS_PROBE_SPECIFIC; - } - - /* otherwise check if it's a new device and if it matches the BDF */ - memset(bdf_str, 0, sizeof(bdf_str)); - TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); - remaining = bdf_str; - while (1) { - if (remaining == NULL || remaining[0] == '\0') - break; - token = strsep(&remaining, ",:"); - if (token == NULL) - break; - bus = strtol(token, NULL, 10); - token = strsep(&remaining, ",:"); - if (token == NULL) - break; - device = strtol(token, NULL, 10); - token = strsep(&remaining, ",:"); - if (token == NULL) - break; - function = strtol(token, NULL, 10); - - if (bus == pci_get_bus(dev) && - device == pci_get_slot(dev) && - function == pci_get_function(dev)) { - - if (num_detached < MAX_DETACHED_DEVICES) { - printf("%s: probed dev=%p\n", - __func__, dev); - detached_devices[num_detached++] = dev; - device_set_desc(dev, "DPDK PCI Device"); - return BUS_PROBE_SPECIFIC; - } else { - printf("%s: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n", - __func__, MAX_DETACHED_DEVICES, - dev); - break; - } - } - } - - return ENXIO; -} - -static int -nic_uio_attach(device_t dev) -{ - int i; - struct nic_uio_softc *sc; - - sc = device_get_softc(dev); - sc->dev_t = dev; - sc->my_cdev = make_dev(&uio_cdevsw, device_get_unit(dev), - UID_ROOT, GID_WHEEL, 0600, "uio@pci:%u:%u:%u", - pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); - if (sc->my_cdev == NULL) - return ENXIO; - sc->my_cdev->si_drv1 = sc; - - for (i = 0; i < MAX_BARS; i++) - sc->bar_res[i] = NULL; - - pci_enable_busmaster(dev); - - return 0; -} - -static int -nic_uio_detach(device_t dev) -{ - int i; - struct nic_uio_softc *sc; - sc = device_get_softc(dev); - - for (i = 0; i < MAX_BARS; i++) - if (sc->bar_res[i] != NULL) { - - if (PCI_BAR_IO(pci_read_config(dev, sc->bar_id[i], 4))) - bus_release_resource(dev, SYS_RES_IOPORT, sc->bar_id[i], - sc->bar_res[i]); - else - bus_release_resource(dev, SYS_RES_MEMORY, sc->bar_id[i], - sc->bar_res[i]); - } - - if (sc->my_cdev != NULL) - destroy_dev(sc->my_cdev); - return 0; -} - -static void -nic_uio_load(void) -{ - uint32_t bus, device, function; - device_t dev; - char bdf_str[256]; - char *token, *remaining; - - memset(bdf_str, 0, sizeof(bdf_str)); - TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); - remaining = bdf_str; - printf("nic_uio: hw.nic_uio.bdfs = '%s'\n", bdf_str); - /* - * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f". - * But the code below does not try differentiate between : and , - * and just blindly uses 3 tokens at a time to construct a - * bus/device/function tuple. - * - * There is no checking on strtol() return values, but this should - * be OK. Worst case is it cannot convert and returns 0. This - * could give us a different BDF than intended, but as long as the - * PCI device/vendor ID does not match it will not matter. - */ - while (1) { - if (remaining == NULL || remaining[0] == '\0') - break; - token = strsep(&remaining, ",:"); - if (token == NULL) - break; - bus = strtol(token, NULL, 10); - token = strsep(&remaining, ",:"); - if (token == NULL) - break; - device = strtol(token, NULL, 10); - token = strsep(&remaining, ",:"); - if (token == NULL) - break; - function = strtol(token, NULL, 10); - - dev = pci_find_bsf(bus, device, function); - if (dev == NULL) - continue; - - if (num_detached < MAX_DETACHED_DEVICES) { - printf("nic_uio_load: detaching and storing dev=%p\n", - dev); - detached_devices[num_detached++] = dev; - } else { - printf("nic_uio_load: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n", - MAX_DETACHED_DEVICES, dev); - } - device_detach(dev); - } -} - -static void -nic_uio_unload(void) -{ - int i; - printf("nic_uio_unload: entered...\n"); - - for (i = 0; i < num_detached; i++) { - printf("nic_uio_unload: calling to device_probe_and_attach for dev=%p...\n", - detached_devices[i]); - device_probe_and_attach(detached_devices[i]); - printf("nic_uio_unload: done.\n"); - } - - printf("nic_uio_unload: leaving...\n"); -} - -static int -nic_uio_shutdown(void) -{ - return 0; -} - -static int -nic_uio_modevent(module_t mod, int type, void *arg) -{ - - switch (type) { - case MOD_LOAD: - nic_uio_load(); - break; - case MOD_UNLOAD: - nic_uio_unload(); - break; - case MOD_SHUTDOWN: - nic_uio_shutdown(); - break; - default: - break; - } - - return 0; -} diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index ea824a3a..cca68826 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -11,12 +11,12 @@ INC += rte_per_lcore.h rte_random.h INC += rte_tailq.h rte_interrupts.h rte_alarm.h INC += rte_string_fns.h rte_version.h INC += rte_eal_memconfig.h rte_malloc_heap.h -INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h +INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h INC += rte_service.h rte_service_component.h INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h -INC += rte_reciprocal.h +INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c index 88f1cbe3..caf3dc83 100644 --- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c +++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c @@ -1,34 +1,6 @@ -/* - * BSD LICENSE - * - * Copyright (C) Cavium, Inc. 2015. - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Cavium, Inc nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) Cavium, Inc. 2015. + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #include "rte_cpuflags.h" @@ -133,22 +105,10 @@ const struct feature_entry rte_cpu_feature_table[] = { static void rte_cpu_get_features(hwcap_registers_t out) { - int auxv_fd; - _Elfx_auxv_t auxv; - - auxv_fd = open("/proc/self/auxv", O_RDONLY); - assert(auxv_fd != -1); - while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { - if (auxv.a_type == AT_HWCAP) { - out[REG_HWCAP] = auxv.a_un.a_val; - } else if (auxv.a_type == AT_HWCAP2) { - out[REG_HWCAP2] = auxv.a_un.a_val; - } else if (auxv.a_type == AT_PLATFORM) { - if (!strcmp((const char *)auxv.a_un.a_val, PLATFORM_STR)) - out[REG_PLATFORM] = 0x0001; - } - } - close(auxv_fd); + out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP); + out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2); + if (!rte_cpu_strcmp_auxval(AT_PLATFORM, PLATFORM_STR)) + out[REG_PLATFORM] = 0x0001; } /* diff --git a/lib/librte_eal/common/arch/arm/rte_hypervisor.c b/lib/librte_eal/common/arch/arm/rte_hypervisor.c index 3792fe2c..08a1c97d 100644 --- a/lib/librte_eal/common/arch/arm/rte_hypervisor.c +++ b/lib/librte_eal/common/arch/arm/rte_hypervisor.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2017 Mellanox Technologies, Ltd. + * Copyright 2017 Mellanox Technologies, Ltd */ #include "rte_hypervisor.h" diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c index 970a61c5..e7a82452 100644 --- a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c +++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c @@ -104,19 +104,8 @@ const struct feature_entry rte_cpu_feature_table[] = { static void rte_cpu_get_features(hwcap_registers_t out) { - int auxv_fd; - Elf64_auxv_t auxv; - - auxv_fd = open("/proc/self/auxv", O_RDONLY); - assert(auxv_fd != -1); - while (read(auxv_fd, &auxv, - sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { - if (auxv.a_type == AT_HWCAP) - out[REG_HWCAP] = auxv.a_un.a_val; - else if (auxv.a_type == AT_HWCAP2) - out[REG_HWCAP2] = auxv.a_un.a_val; - } - close(auxv_fd); + out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP); + out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2); } /* diff --git a/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c b/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c index 3792fe2c..08a1c97d 100644 --- a/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c +++ b/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2017 Mellanox Technologies, Ltd. + * Copyright 2017 Mellanox Technologies, Ltd */ #include "rte_hypervisor.h" diff --git a/lib/librte_eal/common/arch/x86/rte_hypervisor.c b/lib/librte_eal/common/arch/x86/rte_hypervisor.c index edf07be1..c38cfc09 100644 --- a/lib/librte_eal/common/arch/x86/rte_hypervisor.c +++ b/lib/librte_eal/common/arch/x86/rte_hypervisor.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2017 Mellanox Technologies, Ltd. + * Copyright 2017 Mellanox Technologies, Ltd */ #include "rte_hypervisor.h" diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c index 3e022d51..0943851c 100644 --- a/lib/librte_eal/common/eal_common_bus.c +++ b/lib/librte_eal/common/eal_common_bus.c @@ -36,6 +36,7 @@ #include #include +#include #include "eal_private.h" @@ -212,7 +213,7 @@ rte_bus_find_by_device_name(const char *str) char name[RTE_DEV_NAME_MAX_LEN]; char *c; - snprintf(name, sizeof(name), "%s", str); + strlcpy(name, str, sizeof(name)); c = strchr(name, ','); if (c != NULL) c[0] = '\0'; diff --git a/lib/librte_eal/common/eal_common_class.c b/lib/librte_eal/common/eal_common_class.c new file mode 100644 index 00000000..404a9065 --- /dev/null +++ b/lib/librte_eal/common/eal_common_class.c @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 Gaëtan Rivet + */ + +#include +#include +#include + +#include +#include + +struct rte_class_list rte_class_list = + TAILQ_HEAD_INITIALIZER(rte_class_list); + +__rte_experimental void +rte_class_register(struct rte_class *class) +{ + RTE_VERIFY(class); + RTE_VERIFY(class->name && strlen(class->name)); + + TAILQ_INSERT_TAIL(&rte_class_list, class, next); + RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name); +} + +__rte_experimental void +rte_class_unregister(struct rte_class *class) +{ + TAILQ_REMOVE(&rte_class_list, class, next); + RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name); +} + +__rte_experimental +struct rte_class * +rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp, + const void *data) +{ + struct rte_class *cls; + + if (start != NULL) + cls = TAILQ_NEXT(start, next); + else + cls = TAILQ_FIRST(&rte_class_list); + while (cls != NULL) { + if (cmp(cls, data) == 0) + break; + cls = TAILQ_NEXT(cls, next); + } + return cls; +} + +static int +cmp_class_name(const struct rte_class *class, const void *_name) +{ + const char *name = _name; + + return strcmp(class->name, name); +} + +__rte_experimental +struct rte_class * +rte_class_find_by_name(const char *name) +{ + return rte_class_find(NULL, cmp_class_name, (const void *)name); +} diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index cd071442..678dbcac 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -10,24 +10,62 @@ #include #include +#include #include #include #include +#include +#include #include +#include +#include #include "eal_private.h" -static int cmp_detached_dev_name(const struct rte_device *dev, - const void *_name) -{ - const char *name = _name; +/** + * The device event callback description. + * + * It contains callback address to be registered by user application, + * the pointer to the parameters for callback, and the device name. + */ +struct dev_event_callback { + TAILQ_ENTRY(dev_event_callback) next; /**< Callbacks list */ + rte_dev_event_cb_fn cb_fn; /**< Callback address */ + void *cb_arg; /**< Callback parameter */ + char *dev_name; /**< Callback device name, NULL is for all device */ + uint32_t active; /**< Callback is executing */ +}; - /* skip attached devices */ - if (dev->driver != NULL) - return 1; +/** @internal Structure to keep track of registered callbacks */ +TAILQ_HEAD(dev_event_cb_list, dev_event_callback); - return strcmp(dev->name, name); -} +/* The device event callback list for all registered callbacks. */ +static struct dev_event_cb_list dev_event_cbs; + +/* spinlock for device callbacks */ +static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER; + +struct dev_next_ctx { + struct rte_dev_iterator *it; + const char *bus_str; + const char *cls_str; +}; + +#define CTX(it, bus_str, cls_str) \ + (&(const struct dev_next_ctx){ \ + .it = it, \ + .bus_str = bus_str, \ + .cls_str = cls_str, \ + }) + +#define ITCTX(ptr) \ + (((struct dev_next_ctx *)(intptr_t)ptr)->it) + +#define BUSCTX(ptr) \ + (((struct dev_next_ctx *)(intptr_t)ptr)->bus_str) + +#define CLSCTX(ptr) \ + (((struct dev_next_ctx *)(intptr_t)ptr)->cls_str) static int cmp_dev_name(const struct rte_device *dev, const void *_name) { @@ -89,29 +127,12 @@ int rte_eal_dev_detach(struct rte_device *dev) return ret; } -static char * -full_dev_name(const char *bus, const char *dev, const char *args) -{ - char *name; - size_t len; - - len = snprintf(NULL, 0, "%s:%s,%s", bus, dev, args) + 1; - name = calloc(1, len); - if (name == NULL) { - RTE_LOG(ERR, EAL, "Could not allocate full device name\n"); - return NULL; - } - snprintf(name, len, "%s:%s,%s", bus, dev, args); - return name; -} - int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname, const char *devargs) { struct rte_bus *bus; struct rte_device *dev; struct rte_devargs *da; - char *name; int ret; bus = rte_bus_find_by_name(busname); @@ -126,21 +147,16 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn return -ENOTSUP; } - name = full_dev_name(busname, devname, devargs); - if (name == NULL) - return -ENOMEM; - da = calloc(1, sizeof(*da)); - if (da == NULL) { - ret = -ENOMEM; - goto err_name; - } + if (da == NULL) + return -ENOMEM; - ret = rte_eal_devargs_parse(name, da); + ret = rte_devargs_parsef(da, "%s:%s,%s", + busname, devname, devargs); if (ret) goto err_devarg; - ret = rte_eal_devargs_insert(da); + ret = rte_devargs_insert(da); if (ret) goto err_devarg; @@ -148,30 +164,32 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn if (ret) goto err_devarg; - dev = bus->find_device(NULL, cmp_detached_dev_name, devname); + dev = bus->find_device(NULL, cmp_dev_name, devname); if (dev == NULL) { - RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n", + RTE_LOG(ERR, EAL, "Cannot find device (%s)\n", devname); ret = -ENODEV; goto err_devarg; } + if (dev->driver != NULL) { + RTE_LOG(ERR, EAL, "Device is already plugged\n"); + return -EEXIST; + } + ret = bus->plug(dev); if (ret) { RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", dev->name); goto err_devarg; } - free(name); return 0; err_devarg: - if (rte_eal_devargs_remove(busname, devname)) { + if (rte_devargs_remove(busname, devname)) { free(da->args); free(da); } -err_name: - free(name); return ret; } @@ -200,10 +218,349 @@ rte_eal_hotplug_remove(const char *busname, const char *devname) return -EINVAL; } + if (dev->driver == NULL) { + RTE_LOG(ERR, EAL, "Device is already unplugged\n"); + return -ENOENT; + } + ret = bus->unplug(dev); if (ret) RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", dev->name); - rte_eal_devargs_remove(busname, devname); + rte_devargs_remove(busname, devname); + return ret; +} + +int __rte_experimental +rte_dev_event_callback_register(const char *device_name, + rte_dev_event_cb_fn cb_fn, + void *cb_arg) +{ + struct dev_event_callback *event_cb; + int ret; + + if (!cb_fn) + return -EINVAL; + + rte_spinlock_lock(&dev_event_lock); + + if (TAILQ_EMPTY(&dev_event_cbs)) + TAILQ_INIT(&dev_event_cbs); + + TAILQ_FOREACH(event_cb, &dev_event_cbs, next) { + if (event_cb->cb_fn == cb_fn && event_cb->cb_arg == cb_arg) { + if (device_name == NULL && event_cb->dev_name == NULL) + break; + if (device_name == NULL || event_cb->dev_name == NULL) + continue; + if (!strcmp(event_cb->dev_name, device_name)) + break; + } + } + + /* create a new callback. */ + if (event_cb == NULL) { + event_cb = malloc(sizeof(struct dev_event_callback)); + if (event_cb != NULL) { + event_cb->cb_fn = cb_fn; + event_cb->cb_arg = cb_arg; + event_cb->active = 0; + if (!device_name) { + event_cb->dev_name = NULL; + } else { + event_cb->dev_name = strdup(device_name); + if (event_cb->dev_name == NULL) { + ret = -ENOMEM; + goto error; + } + } + TAILQ_INSERT_TAIL(&dev_event_cbs, event_cb, next); + } else { + RTE_LOG(ERR, EAL, + "Failed to allocate memory for device " + "event callback."); + ret = -ENOMEM; + goto error; + } + } else { + RTE_LOG(ERR, EAL, + "The callback is already exist, no need " + "to register again.\n"); + ret = -EEXIST; + } + + rte_spinlock_unlock(&dev_event_lock); + return 0; +error: + free(event_cb); + rte_spinlock_unlock(&dev_event_lock); + return ret; +} + +int __rte_experimental +rte_dev_event_callback_unregister(const char *device_name, + rte_dev_event_cb_fn cb_fn, + void *cb_arg) +{ + int ret = 0; + struct dev_event_callback *event_cb, *next; + + if (!cb_fn) + return -EINVAL; + + rte_spinlock_lock(&dev_event_lock); + /*walk through the callbacks and remove all that match. */ + for (event_cb = TAILQ_FIRST(&dev_event_cbs); event_cb != NULL; + event_cb = next) { + + next = TAILQ_NEXT(event_cb, next); + + if (device_name != NULL && event_cb->dev_name != NULL) { + if (!strcmp(event_cb->dev_name, device_name)) { + if (event_cb->cb_fn != cb_fn || + (cb_arg != (void *)-1 && + event_cb->cb_arg != cb_arg)) + continue; + } + } else if (device_name != NULL) { + continue; + } + + /* + * if this callback is not executing right now, + * then remove it. + */ + if (event_cb->active == 0) { + TAILQ_REMOVE(&dev_event_cbs, event_cb, next); + free(event_cb); + ret++; + } else { + continue; + } + } + rte_spinlock_unlock(&dev_event_lock); return ret; } + +void +dev_callback_process(char *device_name, enum rte_dev_event_type event) +{ + struct dev_event_callback *cb_lst; + + if (device_name == NULL) + return; + + rte_spinlock_lock(&dev_event_lock); + + TAILQ_FOREACH(cb_lst, &dev_event_cbs, next) { + if (cb_lst->dev_name) { + if (strcmp(cb_lst->dev_name, device_name)) + continue; + } + cb_lst->active = 1; + rte_spinlock_unlock(&dev_event_lock); + cb_lst->cb_fn(device_name, event, + cb_lst->cb_arg); + rte_spinlock_lock(&dev_event_lock); + cb_lst->active = 0; + } + rte_spinlock_unlock(&dev_event_lock); +} + +__rte_experimental +int +rte_dev_iterator_init(struct rte_dev_iterator *it, + const char *dev_str) +{ + struct rte_devargs devargs; + struct rte_class *cls = NULL; + struct rte_bus *bus = NULL; + + /* Having both bus_str and cls_str NULL is illegal, + * marking this iterator as invalid unless + * everything goes well. + */ + it->bus_str = NULL; + it->cls_str = NULL; + + devargs.data = dev_str; + if (rte_devargs_layers_parse(&devargs, dev_str)) + goto get_out; + + bus = devargs.bus; + cls = devargs.cls; + /* The string should have at least + * one layer specified. + */ + if (bus == NULL && cls == NULL) { + RTE_LOG(ERR, EAL, + "Either bus or class must be specified.\n"); + rte_errno = EINVAL; + goto get_out; + } + if (bus != NULL && bus->dev_iterate == NULL) { + RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name); + rte_errno = ENOTSUP; + goto get_out; + } + if (cls != NULL && cls->dev_iterate == NULL) { + RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name); + rte_errno = ENOTSUP; + goto get_out; + } + it->bus_str = devargs.bus_str; + it->cls_str = devargs.cls_str; + it->dev_str = dev_str; + it->bus = bus; + it->cls = cls; + it->device = NULL; + it->class_device = NULL; +get_out: + return -rte_errno; +} + +static char * +dev_str_sane_copy(const char *str) +{ + size_t end; + char *copy; + + end = strcspn(str, ",/"); + if (str[end] == ',') { + copy = strdup(&str[end + 1]); + } else { + /* '/' or '\0' */ + copy = strdup(""); + } + if (copy == NULL) { + rte_errno = ENOMEM; + } else { + char *slash; + + slash = strchr(copy, '/'); + if (slash != NULL) + slash[0] = '\0'; + } + return copy; +} + +static int +class_next_dev_cmp(const struct rte_class *cls, + const void *ctx) +{ + struct rte_dev_iterator *it; + const char *cls_str = NULL; + void *dev; + + if (cls->dev_iterate == NULL) + return 1; + it = ITCTX(ctx); + cls_str = CLSCTX(ctx); + dev = it->class_device; + /* it->cls_str != NULL means a class + * was specified in the devstr. + */ + if (it->cls_str != NULL && cls != it->cls) + return 1; + /* If an error occurred previously, + * no need to test further. + */ + if (rte_errno != 0) + return -1; + dev = cls->dev_iterate(dev, cls_str, it); + it->class_device = dev; + return dev == NULL; +} + +static int +bus_next_dev_cmp(const struct rte_bus *bus, + const void *ctx) +{ + struct rte_device *dev = NULL; + struct rte_class *cls = NULL; + struct rte_dev_iterator *it; + const char *bus_str = NULL; + + if (bus->dev_iterate == NULL) + return 1; + it = ITCTX(ctx); + bus_str = BUSCTX(ctx); + dev = it->device; + /* it->bus_str != NULL means a bus + * was specified in the devstr. + */ + if (it->bus_str != NULL && bus != it->bus) + return 1; + /* If an error occurred previously, + * no need to test further. + */ + if (rte_errno != 0) + return -1; + if (it->cls_str == NULL) { + dev = bus->dev_iterate(dev, bus_str, it); + goto end; + } + /* cls_str != NULL */ + if (dev == NULL) { +next_dev_on_bus: + dev = bus->dev_iterate(dev, bus_str, it); + it->device = dev; + } + if (dev == NULL) + return 1; + if (it->cls != NULL) + cls = TAILQ_PREV(it->cls, rte_class_list, next); + cls = rte_class_find(cls, class_next_dev_cmp, ctx); + if (cls != NULL) { + it->cls = cls; + goto end; + } + goto next_dev_on_bus; +end: + it->device = dev; + return dev == NULL; +} +__rte_experimental +struct rte_device * +rte_dev_iterator_next(struct rte_dev_iterator *it) +{ + struct rte_bus *bus = NULL; + int old_errno = rte_errno; + char *bus_str = NULL; + char *cls_str = NULL; + + rte_errno = 0; + if (it->bus_str == NULL && it->cls_str == NULL) { + /* Invalid iterator. */ + rte_errno = EINVAL; + return NULL; + } + if (it->bus != NULL) + bus = TAILQ_PREV(it->bus, rte_bus_list, next); + if (it->bus_str != NULL) { + bus_str = dev_str_sane_copy(it->bus_str); + if (bus_str == NULL) + goto out; + } + if (it->cls_str != NULL) { + cls_str = dev_str_sane_copy(it->cls_str); + if (cls_str == NULL) + goto out; + } + while ((bus = rte_bus_find(bus, bus_next_dev_cmp, + CTX(it, bus_str, cls_str)))) { + if (it->device != NULL) { + it->bus = bus; + goto out; + } + if (it->bus_str != NULL || + rte_errno != 0) + break; + } + if (rte_errno == 0) + rte_errno = old_errno; +out: + free(bus_str); + free(cls_str); + return it->device; +} diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c index 810b3e18..dac2402a 100644 --- a/lib/librte_eal/common/eal_common_devargs.c +++ b/lib/librte_eal/common/eal_common_devargs.c @@ -11,13 +11,22 @@ #include #include +#include +#include +#include #include #include #include +#include +#include +#include #include #include "eal_private.h" +/** user device double-linked queue type definition */ +TAILQ_HEAD(rte_devargs_list, rte_devargs); + /** Global list of user devices */ struct rte_devargs_list devargs_list = TAILQ_HEAD_INITIALIZER(devargs_list); @@ -52,22 +61,164 @@ rte_eal_parse_devargs_str(const char *devargs_str, return 0; } +static size_t +devargs_layer_count(const char *s) +{ + size_t i = s ? 1 : 0; + + while (s != NULL && s[0] != '\0') { + i += s[0] == '/'; + s++; + } + return i; +} + +int +rte_devargs_layers_parse(struct rte_devargs *devargs, + const char *devstr) +{ + struct { + const char *key; + const char *str; + struct rte_kvargs *kvlist; + } layers[] = { + { "bus=", NULL, NULL, }, + { "class=", NULL, NULL, }, + { "driver=", NULL, NULL, }, + }; + struct rte_kvargs_pair *kv = NULL; + struct rte_class *cls = NULL; + struct rte_bus *bus = NULL; + const char *s = devstr; + size_t nblayer; + size_t i = 0; + int ret = 0; + + /* Split each sub-lists. */ + nblayer = devargs_layer_count(devstr); + if (nblayer > RTE_DIM(layers)) { + RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n", + nblayer); + ret = -E2BIG; + goto get_out; + } + + /* If the devargs points the devstr + * as source data, then it should not allocate + * anything and keep referring only to it. + */ + if (devargs->data != devstr) { + devargs->data = strdup(devstr); + if (devargs->data == NULL) { + RTE_LOG(ERR, EAL, "OOM\n"); + ret = -ENOMEM; + goto get_out; + } + s = devargs->data; + } + + while (s != NULL) { + if (i >= RTE_DIM(layers)) { + RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s); + ret = -EINVAL; + goto get_out; + } + /* + * The last layer is free-form. + * The "driver" key is not required (but accepted). + */ + if (strncmp(layers[i].key, s, strlen(layers[i].key)) && + i != RTE_DIM(layers) - 1) + goto next_layer; + layers[i].str = s; + layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/"); + if (layers[i].kvlist == NULL) { + RTE_LOG(ERR, EAL, "Could not parse %s\n", s); + ret = -EINVAL; + goto get_out; + } + s = strchr(s, '/'); + if (s != NULL) + s++; +next_layer: + i++; + } + + /* Parse each sub-list. */ + for (i = 0; i < RTE_DIM(layers); i++) { + if (layers[i].kvlist == NULL) + continue; + kv = &layers[i].kvlist->pairs[0]; + if (strcmp(kv->key, "bus") == 0) { + bus = rte_bus_find_by_name(kv->value); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n", + kv->value); + ret = -EFAULT; + goto get_out; + } + } else if (strcmp(kv->key, "class") == 0) { + cls = rte_class_find_by_name(kv->value); + if (cls == NULL) { + RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n", + kv->value); + ret = -EFAULT; + goto get_out; + } + } else if (strcmp(kv->key, "driver") == 0) { + /* Ignore */ + continue; + } + } + + /* Fill devargs fields. */ + devargs->bus_str = layers[0].str; + devargs->cls_str = layers[1].str; + devargs->drv_str = layers[2].str; + devargs->bus = bus; + devargs->cls = cls; + + /* If we own the data, clean up a bit + * the several layers string, to ease + * their parsing afterward. + */ + if (devargs->data != devstr) { + char *s = (void *)(intptr_t)(devargs->data); + + while ((s = strchr(s, '/'))) { + *s = '\0'; + s++; + } + } + +get_out: + for (i = 0; i < RTE_DIM(layers); i++) { + if (layers[i].kvlist) + rte_kvargs_free(layers[i].kvlist); + } + if (ret != 0) + rte_errno = -ret; + return ret; +} + static int bus_name_cmp(const struct rte_bus *bus, const void *name) { return strncmp(bus->name, name, strlen(bus->name)); } -int __rte_experimental -rte_eal_devargs_parse(const char *dev, struct rte_devargs *da) +__rte_experimental +int +rte_devargs_parse(struct rte_devargs *da, const char *dev) { struct rte_bus *bus = NULL; const char *devname; const size_t maxlen = sizeof(da->name); size_t i; - if (dev == NULL || da == NULL) + if (da == NULL) return -EINVAL; + /* Retrieve eventual bus info */ do { devname = dev; @@ -84,7 +235,7 @@ rte_eal_devargs_parse(const char *dev, struct rte_devargs *da) da->name[i] = devname[i]; i++; if (i == maxlen) { - fprintf(stderr, "WARNING: Parsing \"%s\": device name should be shorter than %zu\n", + RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n", dev, maxlen); da->name[i - 1] = '\0'; return -EINVAL; @@ -94,7 +245,7 @@ rte_eal_devargs_parse(const char *dev, struct rte_devargs *da) if (bus == NULL) { bus = rte_bus_find_by_device_name(da->name); if (bus == NULL) { - fprintf(stderr, "ERROR: failed to parse device \"%s\"\n", + RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n", da->name); return -EFAULT; } @@ -106,18 +257,46 @@ rte_eal_devargs_parse(const char *dev, struct rte_devargs *da) else da->args = strdup(""); if (da->args == NULL) { - fprintf(stderr, "ERROR: not enough memory to parse arguments\n"); + RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n"); return -ENOMEM; } return 0; } +__rte_experimental +int +rte_devargs_parsef(struct rte_devargs *da, const char *format, ...) +{ + va_list ap; + size_t len; + char *dev; + + if (da == NULL) + return -EINVAL; + + va_start(ap, format); + len = vsnprintf(NULL, 0, format, ap); + va_end(ap); + + dev = calloc(1, len + 1); + if (dev == NULL) { + RTE_LOG(ERR, EAL, "not enough memory to parse device\n"); + return -ENOMEM; + } + + va_start(ap, format); + vsnprintf(dev, len + 1, format, ap); + va_end(ap); + + return rte_devargs_parse(da, dev); +} + int __rte_experimental -rte_eal_devargs_insert(struct rte_devargs *da) +rte_devargs_insert(struct rte_devargs *da) { int ret; - ret = rte_eal_devargs_remove(da->bus->name, da->name); + ret = rte_devargs_remove(da->bus->name, da->name); if (ret < 0) return ret; TAILQ_INSERT_TAIL(&devargs_list, da, next); @@ -125,8 +304,9 @@ rte_eal_devargs_insert(struct rte_devargs *da) } /* store a whitelist parameter for later parsing */ +__rte_experimental int -rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str) +rte_devargs_add(enum rte_devtype devtype, const char *devargs_str) { struct rte_devargs *devargs = NULL; struct rte_bus *bus = NULL; @@ -137,7 +317,7 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str) if (devargs == NULL) goto fail; - if (rte_eal_devargs_parse(dev, devargs)) + if (rte_devargs_parse(devargs, dev)) goto fail; devargs->type = devtype; bus = devargs->bus; @@ -162,7 +342,7 @@ fail: } int __rte_experimental -rte_eal_devargs_remove(const char *busname, const char *devname) +rte_devargs_remove(const char *busname, const char *devname) { struct rte_devargs *d; void *tmp; @@ -180,8 +360,9 @@ rte_eal_devargs_remove(const char *busname, const char *devname) } /* count the number of devices of a specified type */ +__rte_experimental unsigned int -rte_eal_devargs_type_count(enum rte_devtype devtype) +rte_devargs_type_count(enum rte_devtype devtype) { struct rte_devargs *devargs; unsigned int count = 0; @@ -195,8 +376,9 @@ rte_eal_devargs_type_count(enum rte_devtype devtype) } /* dump the user devices on the console */ +__rte_experimental void -rte_eal_devargs_dump(FILE *f) +rte_devargs_dump(FILE *f) { struct rte_devargs *devargs; @@ -207,3 +389,23 @@ rte_eal_devargs_dump(FILE *f) devargs->name, devargs->args); } } + +/* bus-aware rte_devargs iterator. */ +__rte_experimental +struct rte_devargs * +rte_devargs_next(const char *busname, const struct rte_devargs *start) +{ + struct rte_devargs *da; + + if (start != NULL) + da = TAILQ_NEXT(start, next); + else + da = TAILQ_FIRST(&devargs_list); + while (da != NULL) { + if (busname == NULL || + (strcmp(busname, da->bus->name) == 0)) + return da; + da = TAILQ_NEXT(da, next); + } + return NULL; +} diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c new file mode 100644 index 00000000..43caf3ce --- /dev/null +++ b/lib/librte_eal/common/eal_common_fbarray.c @@ -0,0 +1,1239 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_private.h" + +#include "rte_fbarray.h" + +#define MASK_SHIFT 6ULL +#define MASK_ALIGN (1ULL << MASK_SHIFT) +#define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT) +#define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN)) +#define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod) + +/* + * This is a mask that is always stored at the end of array, to provide fast + * way of finding free/used spots without looping through each element. + */ + +struct used_mask { + unsigned int n_masks; + uint64_t data[]; +}; + +static size_t +calc_mask_size(unsigned int len) +{ + /* mask must be multiple of MASK_ALIGN, even though length of array + * itself may not be aligned on that boundary. + */ + len = RTE_ALIGN_CEIL(len, MASK_ALIGN); + return sizeof(struct used_mask) + + sizeof(uint64_t) * MASK_LEN_TO_IDX(len); +} + +static size_t +calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len) +{ + size_t data_sz = elt_sz * len; + size_t msk_sz = calc_mask_size(len); + return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz); +} + +static struct used_mask * +get_used_mask(void *data, unsigned int elt_sz, unsigned int len) +{ + return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len); +} + +static int +resize_and_map(int fd, void *addr, size_t len) +{ + char path[PATH_MAX]; + void *map_addr; + + if (ftruncate(fd, len)) { + RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path); + /* pass errno up the chain */ + rte_errno = errno; + return -1; + } + + map_addr = mmap(addr, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, 0); + if (map_addr != addr) { + RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno)); + /* pass errno up the chain */ + rte_errno = errno; + return -1; + } + return 0; +} + +static int +find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, + bool used) +{ + const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, + arr->len); + unsigned int msk_idx, lookahead_idx, first, first_mod; + unsigned int last, last_mod; + uint64_t last_msk, ignore_msk; + + /* + * mask only has granularity of MASK_ALIGN, but start may not be aligned + * on that boundary, so construct a special mask to exclude anything we + * don't want to see to avoid confusing ctz. + */ + first = MASK_LEN_TO_IDX(start); + first_mod = MASK_LEN_TO_MOD(start); + ignore_msk = ~((1ULL << first_mod) - 1); + + /* array length may not be aligned, so calculate ignore mask for last + * mask index. + */ + last = MASK_LEN_TO_IDX(arr->len); + last_mod = MASK_LEN_TO_MOD(arr->len); + last_msk = ~(-1ULL << last_mod); + + for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) { + uint64_t cur_msk, lookahead_msk; + unsigned int run_start, clz, left; + bool found = false; + /* + * The process of getting n consecutive bits for arbitrary n is + * a bit involved, but here it is in a nutshell: + * + * 1. let n be the number of consecutive bits we're looking for + * 2. check if n can fit in one mask, and if so, do n-1 + * rshift-ands to see if there is an appropriate run inside + * our current mask + * 2a. if we found a run, bail out early + * 2b. if we didn't find a run, proceed + * 3. invert the mask and count leading zeroes (that is, count + * how many consecutive set bits we had starting from the + * end of current mask) as k + * 3a. if k is 0, continue to next mask + * 3b. if k is not 0, we have a potential run + * 4. to satisfy our requirements, next mask must have n-k + * consecutive set bits right at the start, so we will do + * (n-k-1) rshift-ands and check if first bit is set. + * + * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until + * we either run out of masks, lose the run, or find what we + * were looking for. + */ + cur_msk = msk->data[msk_idx]; + left = n; + + /* if we're looking for free spaces, invert the mask */ + if (!used) + cur_msk = ~cur_msk; + + /* combine current ignore mask with last index ignore mask */ + if (msk_idx == last) + ignore_msk |= last_msk; + + /* if we have an ignore mask, ignore once */ + if (ignore_msk) { + cur_msk &= ignore_msk; + ignore_msk = 0; + } + + /* if n can fit in within a single mask, do a search */ + if (n <= MASK_ALIGN) { + uint64_t tmp_msk = cur_msk; + unsigned int s_idx; + for (s_idx = 0; s_idx < n - 1; s_idx++) + tmp_msk &= tmp_msk >> 1ULL; + /* we found what we were looking for */ + if (tmp_msk != 0) { + run_start = __builtin_ctzll(tmp_msk); + return MASK_GET_IDX(msk_idx, run_start); + } + } + + /* + * we didn't find our run within the mask, or n > MASK_ALIGN, + * so we're going for plan B. + */ + + /* count leading zeroes on inverted mask */ + if (~cur_msk == 0) + clz = sizeof(cur_msk) * 8; + else + clz = __builtin_clzll(~cur_msk); + + /* if there aren't any runs at the end either, just continue */ + if (clz == 0) + continue; + + /* we have a partial run at the end, so try looking ahead */ + run_start = MASK_ALIGN - clz; + left -= clz; + + for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks; + lookahead_idx++) { + unsigned int s_idx, need; + lookahead_msk = msk->data[lookahead_idx]; + + /* if we're looking for free space, invert the mask */ + if (!used) + lookahead_msk = ~lookahead_msk; + + /* figure out how many consecutive bits we need here */ + need = RTE_MIN(left, MASK_ALIGN); + + for (s_idx = 0; s_idx < need - 1; s_idx++) + lookahead_msk &= lookahead_msk >> 1ULL; + + /* if first bit is not set, we've lost the run */ + if ((lookahead_msk & 1) == 0) { + /* + * we've scanned this far, so we know there are + * no runs in the space we've lookahead-scanned + * as well, so skip that on next iteration. + */ + ignore_msk = ~((1ULL << need) - 1); + msk_idx = lookahead_idx; + break; + } + + left -= need; + + /* check if we've found what we were looking for */ + if (left == 0) { + found = true; + break; + } + } + + /* we didn't find anything, so continue */ + if (!found) + continue; + + return MASK_GET_IDX(msk_idx, run_start); + } + /* we didn't find anything */ + rte_errno = used ? ENOENT : ENOSPC; + return -1; +} + +static int +find_next(const struct rte_fbarray *arr, unsigned int start, bool used) +{ + const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, + arr->len); + unsigned int idx, first, first_mod; + unsigned int last, last_mod; + uint64_t last_msk, ignore_msk; + + /* + * mask only has granularity of MASK_ALIGN, but start may not be aligned + * on that boundary, so construct a special mask to exclude anything we + * don't want to see to avoid confusing ctz. + */ + first = MASK_LEN_TO_IDX(start); + first_mod = MASK_LEN_TO_MOD(start); + ignore_msk = ~((1ULL << first_mod) - 1ULL); + + /* array length may not be aligned, so calculate ignore mask for last + * mask index. + */ + last = MASK_LEN_TO_IDX(arr->len); + last_mod = MASK_LEN_TO_MOD(arr->len); + last_msk = ~(-(1ULL) << last_mod); + + for (idx = first; idx < msk->n_masks; idx++) { + uint64_t cur = msk->data[idx]; + int found; + + /* if we're looking for free entries, invert mask */ + if (!used) + cur = ~cur; + + if (idx == last) + cur &= last_msk; + + /* ignore everything before start on first iteration */ + if (idx == first) + cur &= ignore_msk; + + /* check if we have any entries */ + if (cur == 0) + continue; + + /* + * find first set bit - that will correspond to whatever it is + * that we're looking for. + */ + found = __builtin_ctzll(cur); + return MASK_GET_IDX(idx, found); + } + /* we didn't find anything */ + rte_errno = used ? ENOENT : ENOSPC; + return -1; +} + +static int +find_contig(const struct rte_fbarray *arr, unsigned int start, bool used) +{ + const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, + arr->len); + unsigned int idx, first, first_mod; + unsigned int last, last_mod; + uint64_t last_msk; + unsigned int need_len, result = 0; + + /* array length may not be aligned, so calculate ignore mask for last + * mask index. + */ + last = MASK_LEN_TO_IDX(arr->len); + last_mod = MASK_LEN_TO_MOD(arr->len); + last_msk = ~(-(1ULL) << last_mod); + + first = MASK_LEN_TO_IDX(start); + first_mod = MASK_LEN_TO_MOD(start); + for (idx = first; idx < msk->n_masks; idx++, result += need_len) { + uint64_t cur = msk->data[idx]; + unsigned int run_len; + + need_len = MASK_ALIGN; + + /* if we're looking for free entries, invert mask */ + if (!used) + cur = ~cur; + + /* if this is last mask, ignore everything after last bit */ + if (idx == last) + cur &= last_msk; + + /* ignore everything before start on first iteration */ + if (idx == first) { + cur >>= first_mod; + /* at the start, we don't need the full mask len */ + need_len -= first_mod; + } + + /* we will be looking for zeroes, so invert the mask */ + cur = ~cur; + + /* if mask is zero, we have a complete run */ + if (cur == 0) + continue; + + /* + * see if current run ends before mask end. + */ + run_len = __builtin_ctzll(cur); + + /* add however many zeroes we've had in the last run and quit */ + if (run_len < need_len) { + result += run_len; + break; + } + } + return result; +} + +static int +find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, + bool used) +{ + const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, + arr->len); + unsigned int msk_idx, lookbehind_idx, first, first_mod; + uint64_t ignore_msk; + + /* + * mask only has granularity of MASK_ALIGN, but start may not be aligned + * on that boundary, so construct a special mask to exclude anything we + * don't want to see to avoid confusing ctz. + */ + first = MASK_LEN_TO_IDX(start); + first_mod = MASK_LEN_TO_MOD(start); + /* we're going backwards, so mask must start from the top */ + ignore_msk = first_mod == MASK_ALIGN - 1 ? + -1ULL : /* prevent overflow */ + ~(-1ULL << (first_mod + 1)); + + /* go backwards, include zero */ + msk_idx = first; + do { + uint64_t cur_msk, lookbehind_msk; + unsigned int run_start, run_end, ctz, left; + bool found = false; + /* + * The process of getting n consecutive bits from the top for + * arbitrary n is a bit involved, but here it is in a nutshell: + * + * 1. let n be the number of consecutive bits we're looking for + * 2. check if n can fit in one mask, and if so, do n-1 + * lshift-ands to see if there is an appropriate run inside + * our current mask + * 2a. if we found a run, bail out early + * 2b. if we didn't find a run, proceed + * 3. invert the mask and count trailing zeroes (that is, count + * how many consecutive set bits we had starting from the + * start of current mask) as k + * 3a. if k is 0, continue to next mask + * 3b. if k is not 0, we have a potential run + * 4. to satisfy our requirements, next mask must have n-k + * consecutive set bits at the end, so we will do (n-k-1) + * lshift-ands and check if last bit is set. + * + * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until + * we either run out of masks, lose the run, or find what we + * were looking for. + */ + cur_msk = msk->data[msk_idx]; + left = n; + + /* if we're looking for free spaces, invert the mask */ + if (!used) + cur_msk = ~cur_msk; + + /* if we have an ignore mask, ignore once */ + if (ignore_msk) { + cur_msk &= ignore_msk; + ignore_msk = 0; + } + + /* if n can fit in within a single mask, do a search */ + if (n <= MASK_ALIGN) { + uint64_t tmp_msk = cur_msk; + unsigned int s_idx; + for (s_idx = 0; s_idx < n - 1; s_idx++) + tmp_msk &= tmp_msk << 1ULL; + /* we found what we were looking for */ + if (tmp_msk != 0) { + /* clz will give us offset from end of mask, and + * we only get the end of our run, not start, + * so adjust result to point to where start + * would have been. + */ + run_start = MASK_ALIGN - + __builtin_clzll(tmp_msk) - n; + return MASK_GET_IDX(msk_idx, run_start); + } + } + + /* + * we didn't find our run within the mask, or n > MASK_ALIGN, + * so we're going for plan B. + */ + + /* count trailing zeroes on inverted mask */ + if (~cur_msk == 0) + ctz = sizeof(cur_msk) * 8; + else + ctz = __builtin_ctzll(~cur_msk); + + /* if there aren't any runs at the start either, just + * continue + */ + if (ctz == 0) + continue; + + /* we have a partial run at the start, so try looking behind */ + run_end = MASK_GET_IDX(msk_idx, ctz); + left -= ctz; + + /* go backwards, include zero */ + lookbehind_idx = msk_idx - 1; + + /* we can't lookbehind as we've run out of masks, so stop */ + if (msk_idx == 0) + break; + + do { + const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1); + unsigned int s_idx, need; + + lookbehind_msk = msk->data[lookbehind_idx]; + + /* if we're looking for free space, invert the mask */ + if (!used) + lookbehind_msk = ~lookbehind_msk; + + /* figure out how many consecutive bits we need here */ + need = RTE_MIN(left, MASK_ALIGN); + + for (s_idx = 0; s_idx < need - 1; s_idx++) + lookbehind_msk &= lookbehind_msk << 1ULL; + + /* if last bit is not set, we've lost the run */ + if ((lookbehind_msk & last_bit) == 0) { + /* + * we've scanned this far, so we know there are + * no runs in the space we've lookbehind-scanned + * as well, so skip that on next iteration. + */ + ignore_msk = -1ULL << need; + msk_idx = lookbehind_idx; + break; + } + + left -= need; + + /* check if we've found what we were looking for */ + if (left == 0) { + found = true; + break; + } + } while ((lookbehind_idx--) != 0); /* decrement after check to + * include zero + */ + + /* we didn't find anything, so continue */ + if (!found) + continue; + + /* we've found what we were looking for, but we only know where + * the run ended, so calculate start position. + */ + return run_end - n; + } while (msk_idx-- != 0); /* decrement after check to include zero */ + /* we didn't find anything */ + rte_errno = used ? ENOENT : ENOSPC; + return -1; +} + +static int +find_prev(const struct rte_fbarray *arr, unsigned int start, bool used) +{ + const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, + arr->len); + unsigned int idx, first, first_mod; + uint64_t ignore_msk; + + /* + * mask only has granularity of MASK_ALIGN, but start may not be aligned + * on that boundary, so construct a special mask to exclude anything we + * don't want to see to avoid confusing clz. + */ + first = MASK_LEN_TO_IDX(start); + first_mod = MASK_LEN_TO_MOD(start); + /* we're going backwards, so mask must start from the top */ + ignore_msk = first_mod == MASK_ALIGN - 1 ? + -1ULL : /* prevent overflow */ + ~(-1ULL << (first_mod + 1)); + + /* go backwards, include zero */ + idx = first; + do { + uint64_t cur = msk->data[idx]; + int found; + + /* if we're looking for free entries, invert mask */ + if (!used) + cur = ~cur; + + /* ignore everything before start on first iteration */ + if (idx == first) + cur &= ignore_msk; + + /* check if we have any entries */ + if (cur == 0) + continue; + + /* + * find last set bit - that will correspond to whatever it is + * that we're looking for. we're counting trailing zeroes, thus + * the value we get is counted from end of mask, so calculate + * position from start of mask. + */ + found = MASK_ALIGN - __builtin_clzll(cur) - 1; + + return MASK_GET_IDX(idx, found); + } while (idx-- != 0); /* decrement after check to include zero*/ + + /* we didn't find anything */ + rte_errno = used ? ENOENT : ENOSPC; + return -1; +} + +static int +find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used) +{ + const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, + arr->len); + unsigned int idx, first, first_mod; + unsigned int need_len, result = 0; + + first = MASK_LEN_TO_IDX(start); + first_mod = MASK_LEN_TO_MOD(start); + + /* go backwards, include zero */ + idx = first; + do { + uint64_t cur = msk->data[idx]; + unsigned int run_len; + + need_len = MASK_ALIGN; + + /* if we're looking for free entries, invert mask */ + if (!used) + cur = ~cur; + + /* ignore everything after start on first iteration */ + if (idx == first) { + unsigned int end_len = MASK_ALIGN - first_mod - 1; + cur <<= end_len; + /* at the start, we don't need the full mask len */ + need_len -= end_len; + } + + /* we will be looking for zeroes, so invert the mask */ + cur = ~cur; + + /* if mask is zero, we have a complete run */ + if (cur == 0) + goto endloop; + + /* + * see where run ends, starting from the end. + */ + run_len = __builtin_clzll(cur); + + /* add however many zeroes we've had in the last run and quit */ + if (run_len < need_len) { + result += run_len; + break; + } +endloop: + result += need_len; + } while (idx-- != 0); /* decrement after check to include zero */ + return result; +} + +static int +set_used(struct rte_fbarray *arr, unsigned int idx, bool used) +{ + struct used_mask *msk; + uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx); + unsigned int msk_idx = MASK_LEN_TO_IDX(idx); + bool already_used; + int ret = -1; + + if (arr == NULL || idx >= arr->len) { + rte_errno = EINVAL; + return -1; + } + msk = get_used_mask(arr->data, arr->elt_sz, arr->len); + ret = 0; + + /* prevent array from changing under us */ + rte_rwlock_write_lock(&arr->rwlock); + + already_used = (msk->data[msk_idx] & msk_bit) != 0; + + /* nothing to be done */ + if (used == already_used) + goto out; + + if (used) { + msk->data[msk_idx] |= msk_bit; + arr->count++; + } else { + msk->data[msk_idx] &= ~msk_bit; + arr->count--; + } +out: + rte_rwlock_write_unlock(&arr->rwlock); + + return ret; +} + +static int +fully_validate(const char *name, unsigned int elt_sz, unsigned int len) +{ + if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) { + rte_errno = EINVAL; + return -1; + } + + if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) { + rte_errno = ENAMETOOLONG; + return -1; + } + return 0; +} + +int __rte_experimental +rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, + unsigned int elt_sz) +{ + size_t page_sz, mmap_len; + char path[PATH_MAX]; + struct used_mask *msk; + void *data = NULL; + int fd = -1; + + if (arr == NULL) { + rte_errno = EINVAL; + return -1; + } + + if (fully_validate(name, elt_sz, len)) + return -1; + + page_sz = sysconf(_SC_PAGESIZE); + if (page_sz == (size_t)-1) + goto fail; + + /* calculate our memory limits */ + mmap_len = calc_data_size(page_sz, elt_sz, len); + + data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0); + if (data == NULL) + goto fail; + + if (internal_config.no_shconf) { + /* remap virtual area as writable */ + void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (new_data == MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n", + __func__, strerror(errno)); + goto fail; + } + } else { + eal_get_fbarray_path(path, sizeof(path), name); + + /* + * Each fbarray is unique to process namespace, i.e. the + * filename depends on process prefix. Try to take out a lock + * and see if we succeed. If we don't, someone else is using it + * already. + */ + fd = open(path, O_CREAT | O_RDWR, 0600); + if (fd < 0) { + RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", + __func__, path, strerror(errno)); + rte_errno = errno; + goto fail; + } else if (flock(fd, LOCK_EX | LOCK_NB)) { + RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", + __func__, path, strerror(errno)); + rte_errno = EBUSY; + goto fail; + } + + /* take out a non-exclusive lock, so that other processes could + * still attach to it, but no other process could reinitialize + * it. + */ + if (flock(fd, LOCK_SH | LOCK_NB)) { + rte_errno = errno; + goto fail; + } + + if (resize_and_map(fd, data, mmap_len)) + goto fail; + + /* we've mmap'ed the file, we can now close the fd */ + close(fd); + } + + /* initialize the data */ + memset(data, 0, mmap_len); + + /* populate data structure */ + strlcpy(arr->name, name, sizeof(arr->name)); + arr->data = data; + arr->len = len; + arr->elt_sz = elt_sz; + arr->count = 0; + + msk = get_used_mask(data, elt_sz, len); + msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN)); + + rte_rwlock_init(&arr->rwlock); + + return 0; +fail: + if (data) + munmap(data, mmap_len); + if (fd >= 0) + close(fd); + return -1; +} + +int __rte_experimental +rte_fbarray_attach(struct rte_fbarray *arr) +{ + size_t page_sz, mmap_len; + char path[PATH_MAX]; + void *data = NULL; + int fd = -1; + + if (arr == NULL) { + rte_errno = EINVAL; + return -1; + } + + /* + * we don't need to synchronize attach as two values we need (element + * size and array length) are constant for the duration of life of + * the array, so the parts we care about will not race. + */ + + if (fully_validate(arr->name, arr->elt_sz, arr->len)) + return -1; + + page_sz = sysconf(_SC_PAGESIZE); + if (page_sz == (size_t)-1) + goto fail; + + mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); + + data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0); + if (data == NULL) + goto fail; + + eal_get_fbarray_path(path, sizeof(path), arr->name); + + fd = open(path, O_RDWR); + if (fd < 0) { + rte_errno = errno; + goto fail; + } + + /* lock the file, to let others know we're using it */ + if (flock(fd, LOCK_SH | LOCK_NB)) { + rte_errno = errno; + goto fail; + } + + if (resize_and_map(fd, data, mmap_len)) + goto fail; + + close(fd); + + /* we're done */ + + return 0; +fail: + if (data) + munmap(data, mmap_len); + if (fd >= 0) + close(fd); + return -1; +} + +int __rte_experimental +rte_fbarray_detach(struct rte_fbarray *arr) +{ + if (arr == NULL) { + rte_errno = EINVAL; + return -1; + } + + /* + * we don't need to synchronize detach as two values we need (element + * size and total capacity) are constant for the duration of life of + * the array, so the parts we care about will not race. if the user is + * detaching while doing something else in the same process, we can't + * really do anything about it, things will blow up either way. + */ + + size_t page_sz = sysconf(_SC_PAGESIZE); + + if (page_sz == (size_t)-1) + return -1; + + /* this may already be unmapped (e.g. repeated call from previously + * failed destroy(), but this is on user, we can't (easily) know if this + * is still mapped. + */ + munmap(arr->data, calc_data_size(page_sz, arr->elt_sz, arr->len)); + + return 0; +} + +int __rte_experimental +rte_fbarray_destroy(struct rte_fbarray *arr) +{ + int fd, ret; + char path[PATH_MAX]; + + ret = rte_fbarray_detach(arr); + if (ret) + return ret; + + /* try deleting the file */ + eal_get_fbarray_path(path, sizeof(path), arr->name); + + fd = open(path, O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open fbarray file: %s\n", + strerror(errno)); + return -1; + } + if (flock(fd, LOCK_EX | LOCK_NB)) { + RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n"); + rte_errno = EBUSY; + ret = -1; + } else { + ret = 0; + unlink(path); + memset(arr, 0, sizeof(*arr)); + } + close(fd); + + return ret; +} + +void * __rte_experimental +rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx) +{ + void *ret = NULL; + if (arr == NULL) { + rte_errno = EINVAL; + return NULL; + } + + if (idx >= arr->len) { + rte_errno = EINVAL; + return NULL; + } + + ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz); + + return ret; +} + +int __rte_experimental +rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx) +{ + return set_used(arr, idx, true); +} + +int __rte_experimental +rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx) +{ + return set_used(arr, idx, false); +} + +int __rte_experimental +rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx) +{ + struct used_mask *msk; + int msk_idx; + uint64_t msk_bit; + int ret = -1; + + if (arr == NULL || idx >= arr->len) { + rte_errno = EINVAL; + return -1; + } + + /* prevent array from changing under us */ + rte_rwlock_read_lock(&arr->rwlock); + + msk = get_used_mask(arr->data, arr->elt_sz, arr->len); + msk_idx = MASK_LEN_TO_IDX(idx); + msk_bit = 1ULL << MASK_LEN_TO_MOD(idx); + + ret = (msk->data[msk_idx] & msk_bit) != 0; + + rte_rwlock_read_unlock(&arr->rwlock); + + return ret; +} + +static int +fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used) +{ + int ret = -1; + + if (arr == NULL || start >= arr->len) { + rte_errno = EINVAL; + return -1; + } + + /* prevent array from changing under us */ + rte_rwlock_read_lock(&arr->rwlock); + + /* cheap checks to prevent doing useless work */ + if (!used) { + if (arr->len == arr->count) { + rte_errno = ENOSPC; + goto out; + } + if (arr->count == 0) { + ret = start; + goto out; + } + } else { + if (arr->count == 0) { + rte_errno = ENOENT; + goto out; + } + if (arr->len == arr->count) { + ret = start; + goto out; + } + } + if (next) + ret = find_next(arr, start, used); + else + ret = find_prev(arr, start, used); +out: + rte_rwlock_read_unlock(&arr->rwlock); + return ret; +} + +int __rte_experimental +rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find(arr, start, true, false); +} + +int __rte_experimental +rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find(arr, start, true, true); +} + +int __rte_experimental +rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find(arr, start, false, false); +} + +int __rte_experimental +rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find(arr, start, false, true); +} + +static int +fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n, + bool next, bool used) +{ + int ret = -1; + + if (arr == NULL || start >= arr->len || n > arr->len || n == 0) { + rte_errno = EINVAL; + return -1; + } + if (next && (arr->len - start) < n) { + rte_errno = used ? ENOENT : ENOSPC; + return -1; + } + if (!next && start < (n - 1)) { + rte_errno = used ? ENOENT : ENOSPC; + return -1; + } + + /* prevent array from changing under us */ + rte_rwlock_read_lock(&arr->rwlock); + + /* cheap checks to prevent doing useless work */ + if (!used) { + if (arr->len == arr->count || arr->len - arr->count < n) { + rte_errno = ENOSPC; + goto out; + } + if (arr->count == 0) { + ret = next ? start : start - n + 1; + goto out; + } + } else { + if (arr->count < n) { + rte_errno = ENOENT; + goto out; + } + if (arr->count == arr->len) { + ret = next ? start : start - n + 1; + goto out; + } + } + + if (next) + ret = find_next_n(arr, start, n, used); + else + ret = find_prev_n(arr, start, n, used); +out: + rte_rwlock_read_unlock(&arr->rwlock); + return ret; +} + +int __rte_experimental +rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start, + unsigned int n) +{ + return fbarray_find_n(arr, start, n, true, false); +} + +int __rte_experimental +rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start, + unsigned int n) +{ + return fbarray_find_n(arr, start, n, true, true); +} + +int __rte_experimental +rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start, + unsigned int n) +{ + return fbarray_find_n(arr, start, n, false, false); +} + +int __rte_experimental +rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start, + unsigned int n) +{ + return fbarray_find_n(arr, start, n, false, true); +} + +static int +fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next, + bool used) +{ + int ret = -1; + + if (arr == NULL || start >= arr->len) { + rte_errno = EINVAL; + return -1; + } + + /* prevent array from changing under us */ + rte_rwlock_read_lock(&arr->rwlock); + + /* cheap checks to prevent doing useless work */ + if (used) { + if (arr->count == 0) { + ret = 0; + goto out; + } + if (next && arr->count == arr->len) { + ret = arr->len - start; + goto out; + } + if (!next && arr->count == arr->len) { + ret = start + 1; + goto out; + } + } else { + if (arr->len == arr->count) { + ret = 0; + goto out; + } + if (next && arr->count == 0) { + ret = arr->len - start; + goto out; + } + if (!next && arr->count == 0) { + ret = start + 1; + goto out; + } + } + + if (next) + ret = find_contig(arr, start, used); + else + ret = find_rev_contig(arr, start, used); +out: + rte_rwlock_read_unlock(&arr->rwlock); + return ret; +} + +int __rte_experimental +rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find_contig(arr, start, true, false); +} + +int __rte_experimental +rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find_contig(arr, start, true, true); +} + +int __rte_experimental +rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find_contig(arr, start, false, false); +} + +int __rte_experimental +rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find_contig(arr, start, false, true); +} + +int __rte_experimental +rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt) +{ + void *end; + int ret = -1; + + /* + * no need to synchronize as it doesn't matter if underlying data + * changes - we're doing pointer arithmetic here. + */ + + if (arr == NULL || elt == NULL) { + rte_errno = EINVAL; + return -1; + } + end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len); + if (elt < arr->data || elt >= end) { + rte_errno = EINVAL; + return -1; + } + + ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz; + + return ret; +} + +void __rte_experimental +rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f) +{ + struct used_mask *msk; + unsigned int i; + + if (arr == NULL || f == NULL) { + rte_errno = EINVAL; + return; + } + + if (fully_validate(arr->name, arr->elt_sz, arr->len)) { + fprintf(f, "Invalid file-backed array\n"); + goto out; + } + + /* prevent array from changing under us */ + rte_rwlock_read_lock(&arr->rwlock); + + fprintf(f, "File-backed array: %s\n", arr->name); + fprintf(f, "size: %i occupied: %i elt_sz: %i\n", + arr->len, arr->count, arr->elt_sz); + + msk = get_used_mask(arr->data, arr->elt_sz, arr->len); + + for (i = 0; i < msk->n_masks; i++) + fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]); +out: + rte_rwlock_read_unlock(&arr->rwlock); +} diff --git a/lib/librte_eal/common/eal_common_hypervisor.c b/lib/librte_eal/common/eal_common_hypervisor.c index c3b4c621..5388b81a 100644 --- a/lib/librte_eal/common/eal_common_hypervisor.c +++ b/lib/librte_eal/common/eal_common_hypervisor.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2017 Mellanox Technologies, Ltd. + * Copyright 2017 Mellanox Technologies, Ltd */ #include "rte_hypervisor.h" diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c index 7724fa43..3167e9d7 100644 --- a/lib/librte_eal/common/eal_common_lcore.c +++ b/lib/librte_eal/common/eal_common_lcore.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -16,6 +17,19 @@ #include "eal_private.h" #include "eal_thread.h" +static int +socket_id_cmp(const void *a, const void *b) +{ + const int *lcore_id_a = a; + const int *lcore_id_b = b; + + if (*lcore_id_a < *lcore_id_b) + return -1; + if (*lcore_id_a > *lcore_id_b) + return 1; + return 0; +} + /* * Parse /sys/devices/system/cpu to get the number of physical and logical * processors on the machine. The function will fill the cpu_info @@ -28,6 +42,8 @@ rte_eal_cpu_init(void) struct rte_config *config = rte_eal_get_configuration(); unsigned lcore_id; unsigned count = 0; + unsigned int socket_id, prev_socket_id; + int lcore_to_socket_id[RTE_MAX_LCORE]; /* * Parse the maximum set of logical cores, detect the subset of running @@ -39,6 +55,19 @@ rte_eal_cpu_init(void) /* init cpuset for per lcore config */ CPU_ZERO(&lcore_config[lcore_id].cpuset); + /* find socket first */ + socket_id = eal_cpu_socket_id(lcore_id); + if (socket_id >= RTE_MAX_NUMA_NODES) { +#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID + socket_id = 0; +#else + RTE_LOG(ERR, EAL, "Socket ID (%u) is greater than RTE_MAX_NUMA_NODES (%d)\n", + socket_id, RTE_MAX_NUMA_NODES); + return -1; +#endif + } + lcore_to_socket_id[lcore_id] = socket_id; + /* in 1:1 mapping, record related cpu detected state */ lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id); if (lcore_config[lcore_id].detected == 0) { @@ -54,18 +83,7 @@ rte_eal_cpu_init(void) config->lcore_role[lcore_id] = ROLE_RTE; lcore_config[lcore_id].core_role = ROLE_RTE; lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id); - lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id); - if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) { -#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID - lcore_config[lcore_id].socket_id = 0; -#else - RTE_LOG(ERR, EAL, "Socket ID (%u) is greater than " - "RTE_MAX_NUMA_NODES (%d)\n", - lcore_config[lcore_id].socket_id, - RTE_MAX_NUMA_NODES); - return -1; -#endif - } + lcore_config[lcore_id].socket_id = socket_id; RTE_LOG(DEBUG, EAL, "Detected lcore %u as " "core %u on socket %u\n", lcore_id, lcore_config[lcore_id].core_id, @@ -79,5 +97,38 @@ rte_eal_cpu_init(void) RTE_MAX_LCORE); RTE_LOG(INFO, EAL, "Detected %u lcore(s)\n", config->lcore_count); + /* sort all socket id's in ascending order */ + qsort(lcore_to_socket_id, RTE_DIM(lcore_to_socket_id), + sizeof(lcore_to_socket_id[0]), socket_id_cmp); + + prev_socket_id = -1; + config->numa_node_count = 0; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + socket_id = lcore_to_socket_id[lcore_id]; + if (socket_id != prev_socket_id) + config->numa_nodes[config->numa_node_count++] = + socket_id; + prev_socket_id = socket_id; + } + RTE_LOG(INFO, EAL, "Detected %u NUMA nodes\n", config->numa_node_count); + return 0; } + +unsigned int __rte_experimental +rte_socket_count(void) +{ + const struct rte_config *config = rte_eal_get_configuration(); + return config->numa_node_count; +} + +int __rte_experimental +rte_socket_id_by_idx(unsigned int idx) +{ + const struct rte_config *config = rte_eal_get_configuration(); + if (idx >= config->numa_node_count) { + rte_errno = EINVAL; + return -1; + } + return config->numa_nodes[idx]; +} diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c index 37b2e20e..c714a4bd 100644 --- a/lib/librte_eal/common/eal_common_log.c +++ b/lib/librte_eal/common/eal_common_log.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,23 @@ struct rte_logs rte_logs = { .file = NULL, }; +struct rte_eal_opt_loglevel { + /** Next list entry */ + TAILQ_ENTRY(rte_eal_opt_loglevel) next; + /** Compiled regular expression obtained from the option */ + regex_t re_match; + /** Glob match string option */ + char *pattern; + /** Log level value obtained from the option */ + uint32_t level; +}; + +TAILQ_HEAD(rte_eal_opt_loglevel_list, rte_eal_opt_loglevel); + +/** List of valid EAL log level options */ +static struct rte_eal_opt_loglevel_list opt_loglevel_list = + TAILQ_HEAD_INITIALIZER(opt_loglevel_list); + /* Stream to use for logging if rte_logs.file is NULL */ static FILE *default_log_stream; @@ -89,9 +107,9 @@ rte_log_set_level(uint32_t type, uint32_t level) return 0; } -/* set level */ +/* set log level by regular expression */ int -rte_log_set_level_regexp(const char *pattern, uint32_t level) +rte_log_set_level_regexp(const char *regex, uint32_t level) { regex_t r; size_t i; @@ -99,7 +117,7 @@ rte_log_set_level_regexp(const char *pattern, uint32_t level) if (level > RTE_LOG_DEBUG) return -1; - if (regcomp(&r, pattern, 0) != 0) + if (regcomp(&r, regex, 0) != 0) return -1; for (i = 0; i < rte_logs.dynamic_types_len; i++) { @@ -115,6 +133,69 @@ rte_log_set_level_regexp(const char *pattern, uint32_t level) return 0; } +/* + * Save the type string and the loglevel for later dynamic + * logtypes which may register later. + */ +static int rte_log_save_level(int priority, + const char *regex, const char *pattern) +{ + struct rte_eal_opt_loglevel *opt_ll = NULL; + + opt_ll = malloc(sizeof(*opt_ll)); + if (opt_ll == NULL) + goto fail; + + opt_ll->level = priority; + + if (regex) { + opt_ll->pattern = NULL; + if (regcomp(&opt_ll->re_match, regex, 0) != 0) + goto fail; + } else if (pattern) { + opt_ll->pattern = strdup(pattern); + if (opt_ll->pattern == NULL) + goto fail; + } else + goto fail; + + TAILQ_INSERT_HEAD(&opt_loglevel_list, opt_ll, next); + return 0; +fail: + free(opt_ll); + return -1; +} + +int rte_log_save_regexp(const char *regex, int tmp) +{ + return rte_log_save_level(tmp, regex, NULL); +} + +/* set log level based on glob (file match) pattern */ +int +rte_log_set_level_pattern(const char *pattern, uint32_t level) +{ + size_t i; + + if (level > RTE_LOG_DEBUG) + return -1; + + for (i = 0; i < rte_logs.dynamic_types_len; i++) { + if (rte_logs.dynamic_types[i].name == NULL) + continue; + + if (fnmatch(pattern, rte_logs.dynamic_types[i].name, 0) == 0) + rte_logs.dynamic_types[i].loglevel = level; + } + + return 0; +} + +int rte_log_save_pattern(const char *pattern, int priority) +{ + return rte_log_save_level(priority, NULL, pattern); +} + /* get the current loglevel for the message being processed */ int rte_log_cur_msg_loglevel(void) { @@ -186,6 +267,36 @@ rte_log_register(const char *name) return ret; } +/* Register an extended log type and try to pick its level from EAL options */ +int __rte_experimental +rte_log_register_type_and_pick_level(const char *name, uint32_t level_def) +{ + struct rte_eal_opt_loglevel *opt_ll; + uint32_t level = level_def; + int type; + + type = rte_log_register(name); + if (type < 0) + return type; + + TAILQ_FOREACH(opt_ll, &opt_loglevel_list, next) { + if (opt_ll->level > RTE_LOG_DEBUG) + continue; + + if (opt_ll->pattern) { + if (fnmatch(opt_ll->pattern, name, 0)) + level = opt_ll->level; + } else { + if (regexec(&opt_ll->re_match, name, 0, NULL, 0) == 0) + level = opt_ll->level; + } + } + + rte_logs.dynamic_types[type].loglevel = level; + + return type; +} + struct logtype { uint32_t log_id; const char *logtype; @@ -224,9 +335,7 @@ static const struct logtype logtype_strings[] = { }; /* Logging should be first initializer (before drivers and bus) */ -RTE_INIT_PRIO(rte_log_init, 101); -static void -rte_log_init(void) +RTE_INIT_PRIO(rte_log_init, LOG) { uint32_t i; diff --git a/lib/librte_eal/common/eal_common_memalloc.c b/lib/librte_eal/common/eal_common_memalloc.c new file mode 100644 index 00000000..1d41ea11 --- /dev/null +++ b/lib/librte_eal/common/eal_common_memalloc.c @@ -0,0 +1,364 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" +#include "eal_memalloc.h" + +struct mem_event_callback_entry { + TAILQ_ENTRY(mem_event_callback_entry) next; + char name[RTE_MEM_EVENT_CALLBACK_NAME_LEN]; + rte_mem_event_callback_t clb; + void *arg; +}; + +struct mem_alloc_validator_entry { + TAILQ_ENTRY(mem_alloc_validator_entry) next; + char name[RTE_MEM_ALLOC_VALIDATOR_NAME_LEN]; + rte_mem_alloc_validator_t clb; + int socket_id; + size_t limit; +}; + +/** Double linked list of actions. */ +TAILQ_HEAD(mem_event_callback_entry_list, mem_event_callback_entry); +TAILQ_HEAD(mem_alloc_validator_entry_list, mem_alloc_validator_entry); + +static struct mem_event_callback_entry_list mem_event_callback_list = + TAILQ_HEAD_INITIALIZER(mem_event_callback_list); +static rte_rwlock_t mem_event_rwlock = RTE_RWLOCK_INITIALIZER; + +static struct mem_alloc_validator_entry_list mem_alloc_validator_list = + TAILQ_HEAD_INITIALIZER(mem_alloc_validator_list); +static rte_rwlock_t mem_alloc_validator_rwlock = RTE_RWLOCK_INITIALIZER; + +static struct mem_event_callback_entry * +find_mem_event_callback(const char *name, void *arg) +{ + struct mem_event_callback_entry *r; + + TAILQ_FOREACH(r, &mem_event_callback_list, next) { + if (!strcmp(r->name, name) && r->arg == arg) + break; + } + return r; +} + +static struct mem_alloc_validator_entry * +find_mem_alloc_validator(const char *name, int socket_id) +{ + struct mem_alloc_validator_entry *r; + + TAILQ_FOREACH(r, &mem_alloc_validator_list, next) { + if (!strcmp(r->name, name) && r->socket_id == socket_id) + break; + } + return r; +} + +bool +eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start, + size_t len) +{ + void *end, *aligned_start, *aligned_end; + size_t pgsz = (size_t)msl->page_sz; + const struct rte_memseg *ms; + + /* for IOVA_VA, it's always contiguous */ + if (rte_eal_iova_mode() == RTE_IOVA_VA) + return true; + + /* for legacy memory, it's always contiguous */ + if (internal_config.legacy_mem) + return true; + + end = RTE_PTR_ADD(start, len); + + /* for nohuge, we check pagemap, otherwise check memseg */ + if (!rte_eal_has_hugepages()) { + rte_iova_t cur, expected; + + aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz); + aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz); + + /* if start and end are on the same page, bail out early */ + if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz) + return true; + + /* skip first iteration */ + cur = rte_mem_virt2iova(aligned_start); + expected = cur + pgsz; + aligned_start = RTE_PTR_ADD(aligned_start, pgsz); + + while (aligned_start < aligned_end) { + cur = rte_mem_virt2iova(aligned_start); + if (cur != expected) + return false; + aligned_start = RTE_PTR_ADD(aligned_start, pgsz); + expected += pgsz; + } + } else { + int start_seg, end_seg, cur_seg; + rte_iova_t cur, expected; + + aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz); + aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz); + + start_seg = RTE_PTR_DIFF(aligned_start, msl->base_va) / + pgsz; + end_seg = RTE_PTR_DIFF(aligned_end, msl->base_va) / + pgsz; + + /* if start and end are on the same page, bail out early */ + if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz) + return true; + + /* skip first iteration */ + ms = rte_fbarray_get(&msl->memseg_arr, start_seg); + cur = ms->iova; + expected = cur + pgsz; + + /* if we can't access IOVA addresses, assume non-contiguous */ + if (cur == RTE_BAD_IOVA) + return false; + + for (cur_seg = start_seg + 1; cur_seg < end_seg; + cur_seg++, expected += pgsz) { + ms = rte_fbarray_get(&msl->memseg_arr, cur_seg); + + if (ms->iova != expected) + return false; + } + } + return true; +} + +int +eal_memalloc_mem_event_callback_register(const char *name, + rte_mem_event_callback_t clb, void *arg) +{ + struct mem_event_callback_entry *entry; + int ret, len; + if (name == NULL || clb == NULL) { + rte_errno = EINVAL; + return -1; + } + len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN); + if (len == 0) { + rte_errno = EINVAL; + return -1; + } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) { + rte_errno = ENAMETOOLONG; + return -1; + } + rte_rwlock_write_lock(&mem_event_rwlock); + + entry = find_mem_event_callback(name, arg); + if (entry != NULL) { + rte_errno = EEXIST; + ret = -1; + goto unlock; + } + + entry = malloc(sizeof(*entry)); + if (entry == NULL) { + rte_errno = ENOMEM; + ret = -1; + goto unlock; + } + + /* callback successfully created and is valid, add it to the list */ + entry->clb = clb; + entry->arg = arg; + strlcpy(entry->name, name, RTE_MEM_EVENT_CALLBACK_NAME_LEN); + TAILQ_INSERT_TAIL(&mem_event_callback_list, entry, next); + + ret = 0; + + RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' registered\n", + name, arg); + +unlock: + rte_rwlock_write_unlock(&mem_event_rwlock); + return ret; +} + +int +eal_memalloc_mem_event_callback_unregister(const char *name, void *arg) +{ + struct mem_event_callback_entry *entry; + int ret, len; + + if (name == NULL) { + rte_errno = EINVAL; + return -1; + } + len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN); + if (len == 0) { + rte_errno = EINVAL; + return -1; + } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) { + rte_errno = ENAMETOOLONG; + return -1; + } + rte_rwlock_write_lock(&mem_event_rwlock); + + entry = find_mem_event_callback(name, arg); + if (entry == NULL) { + rte_errno = ENOENT; + ret = -1; + goto unlock; + } + TAILQ_REMOVE(&mem_event_callback_list, entry, next); + free(entry); + + ret = 0; + + RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' unregistered\n", + name, arg); + +unlock: + rte_rwlock_write_unlock(&mem_event_rwlock); + return ret; +} + +void +eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start, + size_t len) +{ + struct mem_event_callback_entry *entry; + + rte_rwlock_read_lock(&mem_event_rwlock); + + TAILQ_FOREACH(entry, &mem_event_callback_list, next) { + RTE_LOG(DEBUG, EAL, "Calling mem event callback '%s:%p'\n", + entry->name, entry->arg); + entry->clb(event, start, len, entry->arg); + } + + rte_rwlock_read_unlock(&mem_event_rwlock); +} + +int +eal_memalloc_mem_alloc_validator_register(const char *name, + rte_mem_alloc_validator_t clb, int socket_id, size_t limit) +{ + struct mem_alloc_validator_entry *entry; + int ret, len; + if (name == NULL || clb == NULL || socket_id < 0) { + rte_errno = EINVAL; + return -1; + } + len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN); + if (len == 0) { + rte_errno = EINVAL; + return -1; + } else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) { + rte_errno = ENAMETOOLONG; + return -1; + } + rte_rwlock_write_lock(&mem_alloc_validator_rwlock); + + entry = find_mem_alloc_validator(name, socket_id); + if (entry != NULL) { + rte_errno = EEXIST; + ret = -1; + goto unlock; + } + + entry = malloc(sizeof(*entry)); + if (entry == NULL) { + rte_errno = ENOMEM; + ret = -1; + goto unlock; + } + + /* callback successfully created and is valid, add it to the list */ + entry->clb = clb; + entry->socket_id = socket_id; + entry->limit = limit; + strlcpy(entry->name, name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN); + TAILQ_INSERT_TAIL(&mem_alloc_validator_list, entry, next); + + ret = 0; + + RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i with limit %zu registered\n", + name, socket_id, limit); + +unlock: + rte_rwlock_write_unlock(&mem_alloc_validator_rwlock); + return ret; +} + +int +eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id) +{ + struct mem_alloc_validator_entry *entry; + int ret, len; + + if (name == NULL || socket_id < 0) { + rte_errno = EINVAL; + return -1; + } + len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN); + if (len == 0) { + rte_errno = EINVAL; + return -1; + } else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) { + rte_errno = ENAMETOOLONG; + return -1; + } + rte_rwlock_write_lock(&mem_alloc_validator_rwlock); + + entry = find_mem_alloc_validator(name, socket_id); + if (entry == NULL) { + rte_errno = ENOENT; + ret = -1; + goto unlock; + } + TAILQ_REMOVE(&mem_alloc_validator_list, entry, next); + free(entry); + + ret = 0; + + RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i unregistered\n", + name, socket_id); + +unlock: + rte_rwlock_write_unlock(&mem_alloc_validator_rwlock); + return ret; +} + +int +eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len) +{ + struct mem_alloc_validator_entry *entry; + int ret = 0; + + rte_rwlock_read_lock(&mem_alloc_validator_rwlock); + + TAILQ_FOREACH(entry, &mem_alloc_validator_list, next) { + if (entry->socket_id != socket_id || entry->limit > new_len) + continue; + RTE_LOG(DEBUG, EAL, "Calling mem alloc validator '%s' on socket %i\n", + entry->name, entry->socket_id); + if (entry->clb(socket_id, entry->limit, new_len) < 0) + ret = -1; + } + + rte_rwlock_read_unlock(&mem_alloc_validator_rwlock); + + return ret; +} diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 852f3bb9..fbfb1b05 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -2,82 +2,385 @@ * Copyright(c) 2010-2014 Intel Corporation */ +#include #include #include #include #include +#include #include #include #include #include +#include #include #include #include +#include #include +#include "eal_memalloc.h" #include "eal_private.h" #include "eal_internal_cfg.h" /* - * Return a pointer to a read-only table of struct rte_physmem_desc - * elements, containing the layout of all addressable physical - * memory. The last element of the table contains a NULL address. + * Try to mmap *size bytes in /dev/zero. If it is successful, return the + * pointer to the mmap'd area and keep *size unmodified. Else, retry + * with a smaller zone: decrease *size by hugepage_sz until it reaches + * 0. In this case, return NULL. Note: this function returns an address + * which is a multiple of hugepage size. */ -const struct rte_memseg * -rte_eal_get_physmem_layout(void) + +#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i" + +static void *next_baseaddr; +static uint64_t system_page_sz; + +void * +eal_get_virtual_area(void *requested_addr, size_t *size, + size_t page_sz, int flags, int mmap_flags) +{ + bool addr_is_hint, allow_shrink, unmap, no_align; + uint64_t map_sz; + void *mapped_addr, *aligned_addr; + + if (system_page_sz == 0) + system_page_sz = sysconf(_SC_PAGESIZE); + + mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS; + + RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); + + addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0; + allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0; + unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0; + + if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 && + rte_eal_process_type() == RTE_PROC_PRIMARY) + next_baseaddr = (void *) internal_config.base_virtaddr; + + if (requested_addr == NULL && next_baseaddr != NULL) { + requested_addr = next_baseaddr; + requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz); + addr_is_hint = true; + } + + /* we don't need alignment of resulting pointer in the following cases: + * + * 1. page size is equal to system size + * 2. we have a requested address, and it is page-aligned, and we will + * be discarding the address if we get a different one. + * + * for all other cases, alignment is potentially necessary. + */ + no_align = (requested_addr != NULL && + requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) && + !addr_is_hint) || + page_sz == system_page_sz; + + do { + map_sz = no_align ? *size : *size + page_sz; + if (map_sz > SIZE_MAX) { + RTE_LOG(ERR, EAL, "Map size too big\n"); + rte_errno = E2BIG; + return NULL; + } + + mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ, + mmap_flags, -1, 0); + if (mapped_addr == MAP_FAILED && allow_shrink) + *size -= page_sz; + } while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0); + + /* align resulting address - if map failed, we will ignore the value + * anyway, so no need to add additional checks. + */ + aligned_addr = no_align ? mapped_addr : + RTE_PTR_ALIGN(mapped_addr, page_sz); + + if (*size == 0) { + RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n", + strerror(errno)); + rte_errno = errno; + return NULL; + } else if (mapped_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", + strerror(errno)); + /* pass errno up the call chain */ + rte_errno = errno; + return NULL; + } else if (requested_addr != NULL && !addr_is_hint && + aligned_addr != requested_addr) { + RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n", + requested_addr, aligned_addr); + munmap(mapped_addr, map_sz); + rte_errno = EADDRNOTAVAIL; + return NULL; + } else if (requested_addr != NULL && addr_is_hint && + aligned_addr != requested_addr) { + RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n", + requested_addr, aligned_addr); + RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n"); + } else if (next_baseaddr != NULL) { + next_baseaddr = RTE_PTR_ADD(aligned_addr, *size); + } + + RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", + aligned_addr, *size); + + if (unmap) { + munmap(mapped_addr, map_sz); + } else if (!no_align) { + void *map_end, *aligned_end; + size_t before_len, after_len; + + /* when we reserve space with alignment, we add alignment to + * mapping size. On 32-bit, if 1GB alignment was requested, this + * would waste 1GB of address space, which is a luxury we cannot + * afford. so, if alignment was performed, check if any unneeded + * address space can be unmapped back. + */ + + map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz); + aligned_end = RTE_PTR_ADD(aligned_addr, *size); + + /* unmap space before aligned mmap address */ + before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr); + if (before_len > 0) + munmap(mapped_addr, before_len); + + /* unmap space after aligned end mmap address */ + after_len = RTE_PTR_DIFF(map_end, aligned_end); + if (after_len > 0) + munmap(aligned_end, after_len); + } + + return aligned_addr; +} + +static struct rte_memseg * +virt2memseg(const void *addr, const struct rte_memseg_list *msl) +{ + const struct rte_fbarray *arr; + void *start, *end; + int ms_idx; + + if (msl == NULL) + return NULL; + + /* a memseg list was specified, check if it's the right one */ + start = msl->base_va; + end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len); + + if (addr < start || addr >= end) + return NULL; + + /* now, calculate index */ + arr = &msl->memseg_arr; + ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz; + return rte_fbarray_get(arr, ms_idx); +} + +static struct rte_memseg_list * +virt2memseg_list(const void *addr) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl; + int msl_idx; + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + void *start, *end; + msl = &mcfg->memsegs[msl_idx]; + + start = msl->base_va; + end = RTE_PTR_ADD(start, + (size_t)msl->page_sz * msl->memseg_arr.len); + if (addr >= start && addr < end) + break; + } + /* if we didn't find our memseg list */ + if (msl_idx == RTE_MAX_MEMSEG_LISTS) + return NULL; + return msl; +} + +__rte_experimental struct rte_memseg_list * +rte_mem_virt2memseg_list(const void *addr) +{ + return virt2memseg_list(addr); +} + +struct virtiova { + rte_iova_t iova; + void *virt; +}; +static int +find_virt(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) +{ + struct virtiova *vi = arg; + if (vi->iova >= ms->iova && vi->iova < (ms->iova + ms->len)) { + size_t offset = vi->iova - ms->iova; + vi->virt = RTE_PTR_ADD(ms->addr, offset); + /* stop the walk */ + return 1; + } + return 0; +} +static int +find_virt_legacy(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, size_t len, void *arg) { - return rte_eal_get_configuration()->mem_config->memseg; + struct virtiova *vi = arg; + if (vi->iova >= ms->iova && vi->iova < (ms->iova + len)) { + size_t offset = vi->iova - ms->iova; + vi->virt = RTE_PTR_ADD(ms->addr, offset); + /* stop the walk */ + return 1; + } + return 0; } +__rte_experimental void * +rte_mem_iova2virt(rte_iova_t iova) +{ + struct virtiova vi; + + memset(&vi, 0, sizeof(vi)); + + vi.iova = iova; + /* for legacy mem, we can get away with scanning VA-contiguous segments, + * as we know they are PA-contiguous as well + */ + if (internal_config.legacy_mem) + rte_memseg_contig_walk(find_virt_legacy, &vi); + else + rte_memseg_walk(find_virt, &vi); + + return vi.virt; +} + +__rte_experimental struct rte_memseg * +rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl) +{ + return virt2memseg(addr, msl != NULL ? msl : + rte_mem_virt2memseg_list(addr)); +} + +static int +physmem_size(const struct rte_memseg_list *msl, void *arg) +{ + uint64_t *total_len = arg; + + *total_len += msl->memseg_arr.count * msl->page_sz; + + return 0; +} /* get the total size of memory */ uint64_t rte_eal_get_physmem_size(void) { - const struct rte_mem_config *mcfg; - unsigned i = 0; uint64_t total_len = 0; - /* get pointer to global configuration */ - mcfg = rte_eal_get_configuration()->mem_config; + rte_memseg_list_walk(physmem_size, &total_len); - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if (mcfg->memseg[i].addr == NULL) - break; + return total_len; +} - total_len += mcfg->memseg[i].len; - } +static int +dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms, + void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int msl_idx, ms_idx; + FILE *f = arg; - return total_len; + msl_idx = msl - mcfg->memsegs; + if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS) + return -1; + + ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); + if (ms_idx < 0) + return -1; + + fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, " + "virt:%p, socket_id:%"PRId32", " + "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", " + "nrank:%"PRIx32"\n", + msl_idx, ms_idx, + ms->iova, + ms->len, + ms->addr, + ms->socket_id, + ms->hugepage_sz, + ms->nchannel, + ms->nrank); + + return 0; } -/* Dump the physical memory layout on console */ -void -rte_dump_physmem_layout(FILE *f) +/* + * Defining here because declared in rte_memory.h, but the actual implementation + * is in eal_common_memalloc.c, like all other memalloc internals. + */ +int __rte_experimental +rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb, + void *arg) { - const struct rte_mem_config *mcfg; - unsigned i = 0; + /* FreeBSD boots with legacy mem enabled by default */ + if (internal_config.legacy_mem) { + RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n"); + rte_errno = ENOTSUP; + return -1; + } + return eal_memalloc_mem_event_callback_register(name, clb, arg); +} - /* get pointer to global configuration */ - mcfg = rte_eal_get_configuration()->mem_config; +int __rte_experimental +rte_mem_event_callback_unregister(const char *name, void *arg) +{ + /* FreeBSD boots with legacy mem enabled by default */ + if (internal_config.legacy_mem) { + RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n"); + rte_errno = ENOTSUP; + return -1; + } + return eal_memalloc_mem_event_callback_unregister(name, arg); +} - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if (mcfg->memseg[i].addr == NULL) - break; +int __rte_experimental +rte_mem_alloc_validator_register(const char *name, + rte_mem_alloc_validator_t clb, int socket_id, size_t limit) +{ + /* FreeBSD boots with legacy mem enabled by default */ + if (internal_config.legacy_mem) { + RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n"); + rte_errno = ENOTSUP; + return -1; + } + return eal_memalloc_mem_alloc_validator_register(name, clb, socket_id, + limit); +} - fprintf(f, "Segment %u: IOVA:0x%"PRIx64", len:%zu, " - "virt:%p, socket_id:%"PRId32", " - "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", " - "nrank:%"PRIx32"\n", i, - mcfg->memseg[i].iova, - mcfg->memseg[i].len, - mcfg->memseg[i].addr, - mcfg->memseg[i].socket_id, - mcfg->memseg[i].hugepage_sz, - mcfg->memseg[i].nchannel, - mcfg->memseg[i].nrank); +int __rte_experimental +rte_mem_alloc_validator_unregister(const char *name, int socket_id) +{ + /* FreeBSD boots with legacy mem enabled by default */ + if (internal_config.legacy_mem) { + RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n"); + rte_errno = ENOTSUP; + return -1; } + return eal_memalloc_mem_alloc_validator_unregister(name, socket_id); +} + +/* Dump the physical memory layout on console */ +void +rte_dump_physmem_layout(FILE *f) +{ + rte_memseg_walk(dump_memseg, f); } /* return the number of memory channels */ @@ -117,20 +420,165 @@ rte_mem_lock_page(const void *virt) return mlock((void *)aligned, page_size); } +int __rte_experimental +rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i, ms_idx, ret = 0; + + for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { + struct rte_memseg_list *msl = &mcfg->memsegs[i]; + const struct rte_memseg *ms; + struct rte_fbarray *arr; + + if (msl->memseg_arr.count == 0) + continue; + + arr = &msl->memseg_arr; + + ms_idx = rte_fbarray_find_next_used(arr, 0); + while (ms_idx >= 0) { + int n_segs; + size_t len; + + ms = rte_fbarray_get(arr, ms_idx); + + /* find how many more segments there are, starting with + * this one. + */ + n_segs = rte_fbarray_find_contig_used(arr, ms_idx); + len = n_segs * msl->page_sz; + + ret = func(msl, ms, len, arg); + if (ret) + return ret; + ms_idx = rte_fbarray_find_next_used(arr, + ms_idx + n_segs); + } + } + return 0; +} + +int __rte_experimental +rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int ret = 0; + + /* do not allow allocations/frees/init while we iterate */ + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_contig_walk_thread_unsafe(func, arg); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; +} + +int __rte_experimental +rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i, ms_idx, ret = 0; + + for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { + struct rte_memseg_list *msl = &mcfg->memsegs[i]; + const struct rte_memseg *ms; + struct rte_fbarray *arr; + + if (msl->memseg_arr.count == 0) + continue; + + arr = &msl->memseg_arr; + + ms_idx = rte_fbarray_find_next_used(arr, 0); + while (ms_idx >= 0) { + ms = rte_fbarray_get(arr, ms_idx); + ret = func(msl, ms, arg); + if (ret) + return ret; + ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1); + } + } + return 0; +} + +int __rte_experimental +rte_memseg_walk(rte_memseg_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int ret = 0; + + /* do not allow allocations/frees/init while we iterate */ + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_walk_thread_unsafe(func, arg); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; +} + +int __rte_experimental +rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i, ret = 0; + + for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { + struct rte_memseg_list *msl = &mcfg->memsegs[i]; + + if (msl->base_va == NULL) + continue; + + ret = func(msl, arg); + if (ret) + return ret; + } + return 0; +} + +int __rte_experimental +rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int ret = 0; + + /* do not allow allocations/frees/init while we iterate */ + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_list_walk_thread_unsafe(func, arg); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; +} + /* init memory subsystem */ int rte_eal_memory_init(void) { + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int retval; RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n"); - const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ? + if (!mcfg) + return -1; + + /* lock mem hotplug here, to prevent races while we init */ + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + + if (rte_eal_memseg_init() < 0) + goto fail; + + if (eal_memalloc_init() < 0) + goto fail; + + retval = rte_eal_process_type() == RTE_PROC_PRIMARY ? rte_eal_hugepage_init() : rte_eal_hugepage_attach(); if (retval < 0) - return -1; + goto fail; if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0) - return -1; + goto fail; return 0; +fail: + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + return -1; } diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 1ab3ade2..7300fe05 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -28,88 +28,49 @@ static inline const struct rte_memzone * memzone_lookup_thread_unsafe(const char *name) { - const struct rte_mem_config *mcfg; + struct rte_mem_config *mcfg; + struct rte_fbarray *arr; const struct rte_memzone *mz; - unsigned i = 0; + int i = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; /* * the algorithm is not optimal (linear), but there are few * zones and this function should be called at init only */ - for (i = 0; i < RTE_MAX_MEMZONE; i++) { - mz = &mcfg->memzone[i]; - if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) - return &mcfg->memzone[i]; + i = rte_fbarray_find_next_used(arr, 0); + while (i >= 0) { + mz = rte_fbarray_get(arr, i); + if (mz->addr != NULL && + !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) + return mz; + i = rte_fbarray_find_next_used(arr, i + 1); } - - return NULL; -} - -static inline struct rte_memzone * -get_next_free_memzone(void) -{ - struct rte_mem_config *mcfg; - unsigned i = 0; - - /* get pointer to global configuration */ - mcfg = rte_eal_get_configuration()->mem_config; - - for (i = 0; i < RTE_MAX_MEMZONE; i++) { - if (mcfg->memzone[i].addr == NULL) - return &mcfg->memzone[i]; - } - return NULL; } -/* This function will return the greatest free block if a heap has been - * specified. If no heap has been specified, it will return the heap and - * length of the greatest free block available in all heaps */ -static size_t -find_heap_max_free_elem(int *s, unsigned align) -{ - struct rte_mem_config *mcfg; - struct rte_malloc_socket_stats stats; - int i, socket = *s; - size_t len = 0; - - /* get pointer to global configuration */ - mcfg = rte_eal_get_configuration()->mem_config; - - for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { - if ((socket != SOCKET_ID_ANY) && (socket != i)) - continue; - - malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats); - if (stats.greatest_free_size > len) { - len = stats.greatest_free_size; - *s = i; - } - } - - if (len < MALLOC_ELEM_OVERHEAD + align) - return 0; - - return len - MALLOC_ELEM_OVERHEAD - align; -} - static const struct rte_memzone * memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, - int socket_id, unsigned flags, unsigned align, unsigned bound) + int socket_id, unsigned int flags, unsigned int align, + unsigned int bound) { struct rte_memzone *mz; struct rte_mem_config *mcfg; + struct rte_fbarray *arr; + void *mz_addr; size_t requested_len; - int socket, i; + int mz_idx; + bool contig; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; /* no more room in config */ - if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) { + if (arr->count >= arr->len) { RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__); rte_errno = ENOSPC; return NULL; @@ -148,8 +109,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, return NULL; } - len += RTE_CACHE_LINE_MASK; - len &= ~((size_t) RTE_CACHE_LINE_MASK); + len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE); /* save minimal requested length */ requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); @@ -169,40 +129,22 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, if (!rte_eal_has_hugepages()) socket_id = SOCKET_ID_ANY; - if (len == 0) { - if (bound != 0) - requested_len = bound; - else { - requested_len = find_heap_max_free_elem(&socket_id, align); - if (requested_len == 0) { - rte_errno = ENOMEM; - return NULL; - } - } - } + contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0; + /* malloc only cares about size flags, remove contig flag from flags */ + flags &= ~RTE_MEMZONE_IOVA_CONTIG; - if (socket_id == SOCKET_ID_ANY) - socket = malloc_get_numa_socket(); - else - socket = socket_id; - - /* allocate memory on heap */ - void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL, - requested_len, flags, align, bound); - - if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) { - /* try other heaps */ - for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { - if (socket == i) - continue; - - mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i], - NULL, requested_len, flags, align, bound); - if (mz_addr != NULL) - break; - } + if (len == 0 && bound == 0) { + /* no size constraints were placed, so use malloc elem len */ + requested_len = 0; + mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags, + align, contig); + } else { + if (len == 0) + requested_len = bound; + /* allocate memory on heap */ + mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, + flags, align, bound, contig); } - if (mz_addr == NULL) { rte_errno = ENOMEM; return NULL; @@ -211,33 +153,38 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* fill the zone in config */ - mz = get_next_free_memzone(); + mz_idx = rte_fbarray_find_next_free(arr, 0); + + if (mz_idx < 0) { + mz = NULL; + } else { + rte_fbarray_set_used(arr, mz_idx); + mz = rte_fbarray_get(arr, mz_idx); + } if (mz == NULL) { - RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room " - "in config!\n", __func__); - malloc_elem_free(elem); + RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone\n", __func__); + malloc_heap_free(elem); rte_errno = ENOSPC; return NULL; } - mcfg->memzone_cnt++; snprintf(mz->name, sizeof(mz->name), "%s", name); mz->iova = rte_malloc_virt2iova(mz_addr); mz->addr = mz_addr; - mz->len = (requested_len == 0 ? elem->size : requested_len); - mz->hugepage_sz = elem->ms->hugepage_sz; - mz->socket_id = elem->ms->socket_id; + mz->len = requested_len == 0 ? + elem->size - elem->pad - MALLOC_ELEM_OVERHEAD : + requested_len; + mz->hugepage_sz = elem->msl->page_sz; + mz->socket_id = elem->msl->socket_id; mz->flags = 0; - mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg; return mz; } static const struct rte_memzone * -rte_memzone_reserve_thread_safe(const char *name, size_t len, - int socket_id, unsigned flags, unsigned align, - unsigned bound) +rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id, + unsigned int flags, unsigned int align, unsigned int bound) { struct rte_mem_config *mcfg; const struct rte_memzone *mz = NULL; @@ -296,34 +243,38 @@ int rte_memzone_free(const struct rte_memzone *mz) { struct rte_mem_config *mcfg; + struct rte_fbarray *arr; + struct rte_memzone *found_mz; int ret = 0; - void *addr; + void *addr = NULL; unsigned idx; if (mz == NULL) return -EINVAL; mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; rte_rwlock_write_lock(&mcfg->mlock); - idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); - idx = idx / sizeof(struct rte_memzone); + idx = rte_fbarray_find_idx(arr, mz); + found_mz = rte_fbarray_get(arr, idx); - addr = mcfg->memzone[idx].addr; - if (addr == NULL) + if (found_mz == NULL) { + ret = -EINVAL; + } else if (found_mz->addr == NULL) { + RTE_LOG(ERR, EAL, "Memzone is not allocated\n"); ret = -EINVAL; - else if (mcfg->memzone_cnt == 0) { - rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n", - __func__); } else { - memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx])); - mcfg->memzone_cnt--; + addr = found_mz->addr; + memset(found_mz, 0, sizeof(*found_mz)); + rte_fbarray_set_free(arr, idx); } rte_rwlock_write_unlock(&mcfg->mlock); - rte_free(addr); + if (addr != NULL) + rte_free(addr); return ret; } @@ -348,31 +299,61 @@ rte_memzone_lookup(const char *name) return memzone; } +static void +dump_memzone(const struct rte_memzone *mz, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl = NULL; + void *cur_addr, *mz_end; + struct rte_memseg *ms; + int mz_idx, ms_idx; + size_t page_sz; + FILE *f = arg; + + mz_idx = rte_fbarray_find_idx(&mcfg->memzones, mz); + + fprintf(f, "Zone %u: name:<%s>, len:0x%zx, virt:%p, " + "socket_id:%"PRId32", flags:%"PRIx32"\n", + mz_idx, + mz->name, + mz->len, + mz->addr, + mz->socket_id, + mz->flags); + + /* go through each page occupied by this memzone */ + msl = rte_mem_virt2memseg_list(mz->addr); + if (!msl) { + RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n"); + return; + } + page_sz = (size_t)mz->hugepage_sz; + cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, page_sz); + mz_end = RTE_PTR_ADD(cur_addr, mz->len); + + fprintf(f, "physical segments used:\n"); + ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / page_sz; + ms = rte_fbarray_get(&msl->memseg_arr, ms_idx); + + do { + fprintf(f, " addr: %p iova: 0x%" PRIx64 " " + "len: 0x%zx " + "pagesz: 0x%zx\n", + cur_addr, ms->iova, ms->len, page_sz); + + /* advance VA to next page */ + cur_addr = RTE_PTR_ADD(cur_addr, page_sz); + + /* memzones occupy contiguous segments */ + ++ms; + } while (cur_addr < mz_end); +} + /* Dump all reserved memory zones on console */ void rte_memzone_dump(FILE *f) { - struct rte_mem_config *mcfg; - unsigned i = 0; - - /* get pointer to global configuration */ - mcfg = rte_eal_get_configuration()->mem_config; - - rte_rwlock_read_lock(&mcfg->mlock); - /* dump all zones */ - for (i=0; imemzone[i].addr == NULL) - break; - fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx" - ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i, - mcfg->memzone[i].name, - mcfg->memzone[i].iova, - mcfg->memzone[i].len, - mcfg->memzone[i].addr, - mcfg->memzone[i].socket_id, - mcfg->memzone[i].flags); - } - rte_rwlock_read_unlock(&mcfg->mlock); + rte_memzone_walk(dump_memzone, f); } /* @@ -382,30 +363,27 @@ int rte_eal_memzone_init(void) { struct rte_mem_config *mcfg; - const struct rte_memseg *memseg; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; - /* secondary processes don't need to initialise anything */ - if (rte_eal_process_type() == RTE_PROC_SECONDARY) - return 0; + rte_rwlock_write_lock(&mcfg->mlock); - memseg = rte_eal_get_physmem_layout(); - if (memseg == NULL) { - RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__); + if (rte_eal_process_type() == RTE_PROC_PRIMARY && + rte_fbarray_init(&mcfg->memzones, "memzone", + RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) { + RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n"); + return -1; + } else if (rte_eal_process_type() == RTE_PROC_SECONDARY && + rte_fbarray_attach(&mcfg->memzones)) { + RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n"); + rte_rwlock_write_unlock(&mcfg->mlock); return -1; } - rte_rwlock_write_lock(&mcfg->mlock); - - /* delete all zones */ - mcfg->memzone_cnt = 0; - memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); - rte_rwlock_write_unlock(&mcfg->mlock); - return rte_eal_malloc_heap_init(); + return 0; } /* Walk all reserved memory zones */ @@ -413,14 +391,18 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *), void *arg) { struct rte_mem_config *mcfg; - unsigned i; + struct rte_fbarray *arr; + int i; mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; rte_rwlock_read_lock(&mcfg->mlock); - for (i=0; imemzone[i].addr != NULL) - (*func)(&mcfg->memzone[i], arg); + i = rte_fbarray_find_next_used(arr, 0); + while (i >= 0) { + struct rte_memzone *mz = rte_fbarray_get(arr, i); + (*func)(mz, arg); + i = rte_fbarray_find_next_used(arr, i + 1); } rte_rwlock_read_unlock(&mcfg->mlock); } diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 9f2f8d25..dd5f9740 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -27,6 +27,7 @@ #include "eal_internal_cfg.h" #include "eal_options.h" #include "eal_filesystem.h" +#include "eal_private.h" #define BITS_PER_HEX 4 #define LCORE_OPT_LST 1 @@ -65,14 +66,18 @@ eal_long_options[] = { {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM }, {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM }, {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM }, + {OPT_IN_MEMORY, 0, NULL, OPT_IN_MEMORY_NUM }, {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM }, {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM }, {OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM }, {OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM }, + {OPT_SOCKET_LIMIT, 1, NULL, OPT_SOCKET_LIMIT_NUM }, {OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM }, {OPT_VDEV, 1, NULL, OPT_VDEV_NUM }, {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM }, {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM }, + {OPT_LEGACY_MEM, 0, NULL, OPT_LEGACY_MEM_NUM }, + {OPT_SINGLE_FILE_SEGMENTS, 0, NULL, OPT_SINGLE_FILE_SEGMENTS_NUM}, {0, 0, NULL, 0 } }; @@ -151,7 +156,7 @@ eal_option_device_parse(void) TAILQ_FOREACH_SAFE(devopt, &devopt_list, next, tmp) { if (ret == 0) { - ret = rte_eal_devargs_add(devopt->type, devopt->arg); + ret = rte_devargs_add(devopt->type, devopt->arg); if (ret) RTE_LOG(ERR, EAL, "Unable to parse device '%s'\n", devopt->arg); @@ -176,9 +181,16 @@ eal_reset_internal_config(struct internal_config *internal_cfg) /* zero out the NUMA config */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) internal_cfg->socket_mem[i] = 0; + internal_cfg->force_socket_limits = 0; + /* zero out the NUMA limits config */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_cfg->socket_limit[i] = 0; /* zero out hugedir descriptors */ - for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) { + memset(&internal_cfg->hugepage_info[i], 0, + sizeof(internal_cfg->hugepage_info[0])); internal_cfg->hugepage_info[i].lock_descriptor = -1; + } internal_cfg->base_virtaddr = 0; internal_cfg->syslog_facility = LOG_DAEMON; @@ -194,6 +206,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg) internal_cfg->vmware_tsc_map = 0; internal_cfg->create_uio_dev = 0; internal_cfg->user_mbuf_pool_ops_name = NULL; + internal_cfg->init_complete = 0; } static int @@ -308,6 +321,7 @@ eal_parse_service_coremask(const char *coremask) unsigned int count = 0; char c; int val; + uint32_t taken_lcore_count = 0; if (coremask == NULL) return -1; @@ -341,7 +355,7 @@ eal_parse_service_coremask(const char *coremask) if (master_lcore_parsed && cfg->master_lcore == lcore) { RTE_LOG(ERR, EAL, - "Error: lcore %u is master lcore, cannot use as service core\n", + "lcore %u is master lcore, cannot use as service core\n", idx); return -1; } @@ -351,6 +365,10 @@ eal_parse_service_coremask(const char *coremask) "lcore %u unavailable\n", idx); return -1; } + + if (cfg->lcore_role[idx] == ROLE_RTE) + taken_lcore_count++; + lcore_config[idx].core_role = ROLE_SERVICE; count++; } @@ -367,10 +385,27 @@ eal_parse_service_coremask(const char *coremask) if (count == 0) return -1; + if (core_parsed && taken_lcore_count != count) { + RTE_LOG(WARNING, EAL, + "Not all service cores are in the coremask. " + "Please ensure -c or -l includes service cores\n"); + } + cfg->service_lcore_count = count; return 0; } +static int +eal_service_cores_parsed(void) +{ + int idx; + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + if (lcore_config[idx].core_role == ROLE_SERVICE) + return 1; + } + return 0; +} + static int eal_parse_coremask(const char *coremask) { @@ -380,6 +415,11 @@ eal_parse_coremask(const char *coremask) char c; int val; + if (eal_service_cores_parsed()) + RTE_LOG(WARNING, EAL, + "Service cores parsed before dataplane cores. " + "Please ensure -c is before -s or -S\n"); + if (coremask == NULL) return -1; /* Remove all blank characters ahead and after . @@ -411,6 +451,7 @@ eal_parse_coremask(const char *coremask) "unavailable\n", idx); return -1; } + cfg->lcore_role[idx] = ROLE_RTE; lcore_config[idx].core_index = count; count++; @@ -442,6 +483,7 @@ eal_parse_service_corelist(const char *corelist) unsigned count = 0; char *end = NULL; int min, max; + uint32_t taken_lcore_count = 0; if (corelist == NULL) return -1; @@ -483,6 +525,9 @@ eal_parse_service_corelist(const char *corelist) idx); return -1; } + if (cfg->lcore_role[idx] == ROLE_RTE) + taken_lcore_count++; + lcore_config[idx].core_role = ROLE_SERVICE; count++; @@ -497,6 +542,12 @@ eal_parse_service_corelist(const char *corelist) if (count == 0) return -1; + if (core_parsed && taken_lcore_count != count) { + RTE_LOG(WARNING, EAL, + "Not all service cores were in the coremask. " + "Please ensure -c or -l includes service cores\n"); + } + return 0; } @@ -509,6 +560,11 @@ eal_parse_corelist(const char *corelist) char *end = NULL; int min, max; + if (eal_service_cores_parsed()) + RTE_LOG(WARNING, EAL, + "Service cores parsed before dataplane cores. " + "Please ensure -l is before -s or -S\n"); + if (corelist == NULL) return -1; @@ -583,7 +639,8 @@ eal_parse_master_lcore(const char *arg) /* ensure master core is not used as service core */ if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) { - RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n"); + RTE_LOG(ERR, EAL, + "Error: Master lcore is used as a service core\n"); return -1; } @@ -875,7 +932,7 @@ static int eal_parse_syslog(const char *facility, struct internal_config *conf) { int i; - static struct { + static const struct { const char *name; int value; } map[] = { @@ -911,43 +968,92 @@ eal_parse_syslog(const char *facility, struct internal_config *conf) } static int -eal_parse_log_level(const char *arg) +eal_parse_log_priority(const char *level) { - char *end, *str, *type, *level; + static const char * const levels[] = { + [RTE_LOG_EMERG] = "emergency", + [RTE_LOG_ALERT] = "alert", + [RTE_LOG_CRIT] = "critical", + [RTE_LOG_ERR] = "error", + [RTE_LOG_WARNING] = "warning", + [RTE_LOG_NOTICE] = "notice", + [RTE_LOG_INFO] = "info", + [RTE_LOG_DEBUG] = "debug", + }; + size_t len = strlen(level); unsigned long tmp; + char *end; + unsigned int i; - str = strdup(arg); - if (str == NULL) + if (len == 0) return -1; - if (strchr(str, ',') == NULL) { - type = NULL; - level = str; - } else { - type = strsep(&str, ","); - level = strsep(&str, ","); + /* look for named values, skip 0 which is not a valid level */ + for (i = 1; i < RTE_DIM(levels); i++) { + if (strncmp(levels[i], level, len) == 0) + return i; } + /* not a string, maybe it is numeric */ errno = 0; tmp = strtoul(level, &end, 0); /* check for errors */ - if ((errno != 0) || (level[0] == '\0') || - end == NULL || (*end != '\0')) - goto fail; + if (errno != 0 || end == NULL || *end != '\0' || + tmp >= UINT32_MAX) + return -1; - /* log_level is a uint32_t */ - if (tmp >= UINT32_MAX) - goto fail; + return tmp; +} + +static int +eal_parse_log_level(const char *arg) +{ + const char *pattern = NULL; + const char *regex = NULL; + char *str, *level; + int priority; + + str = strdup(arg); + if (str == NULL) + return -1; - if (type == NULL) { - rte_log_set_global_level(tmp); - } else if (rte_log_set_level_regexp(type, tmp) < 0) { - printf("cannot set log level %s,%lu\n", - type, tmp); + if ((level = strchr(str, ','))) { + regex = str; + *level++ = '\0'; + } else if ((level = strchr(str, ':'))) { + pattern = str; + *level++ = '\0'; + } else { + level = str; + } + + priority = eal_parse_log_priority(level); + if (priority < 0) { + fprintf(stderr, "invalid log priority: %s\n", level); goto fail; } + if (regex) { + if (rte_log_set_level_regexp(regex, priority) < 0) { + fprintf(stderr, "cannot set log level %s,%d\n", + pattern, priority); + goto fail; + } + if (rte_log_save_regexp(regex, priority) < 0) + goto fail; + } else if (pattern) { + if (rte_log_set_level_pattern(pattern, priority) < 0) { + fprintf(stderr, "cannot set log level %s:%d\n", + pattern, priority); + goto fail; + } + if (rte_log_save_pattern(pattern, priority) < 0) + goto fail; + } else { + rte_log_set_global_level(priority); + } + free(str); return 0; @@ -1089,6 +1195,8 @@ eal_parse_common_option(int opt, const char *optarg, case OPT_NO_HUGE_NUM: conf->no_hugetlbfs = 1; + /* no-huge is legacy mem */ + conf->legacy_mem = 1; break; case OPT_NO_PCI_NUM: @@ -1107,6 +1215,13 @@ eal_parse_common_option(int opt, const char *optarg, conf->no_shconf = 1; break; + case OPT_IN_MEMORY_NUM: + conf->in_memory = 1; + /* in-memory is a superset of noshconf and huge-unlink */ + conf->no_shconf = 1; + conf->hugepage_unlink = 1; + break; + case OPT_PROC_TYPE_NUM: conf->process_type = eal_parse_proc_type(optarg); break; @@ -1160,6 +1275,12 @@ eal_parse_common_option(int opt, const char *optarg, core_parsed = LCORE_OPT_MAP; break; + case OPT_LEGACY_MEM_NUM: + conf->legacy_mem = 1; + break; + case OPT_SINGLE_FILE_SEGMENTS_NUM: + conf->single_file_segments = 1; + break; /* don't know what to do, leave this to caller */ default: @@ -1252,12 +1373,23 @@ eal_check_common_options(struct internal_config *internal_cfg) "be specified together with --"OPT_NO_HUGE"\n"); return -1; } - - if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) { + if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink && + !internal_cfg->in_memory) { RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot " "be specified together with --"OPT_NO_HUGE"\n"); return -1; } + if (internal_config.force_socket_limits && internal_config.legacy_mem) { + RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT + " is only supported in non-legacy memory mode\n"); + } + if (internal_cfg->single_file_segments && + internal_cfg->hugepage_unlink) { + RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is " + "not compatible with neither --"OPT_IN_MEMORY" nor " + "--"OPT_HUGE_UNLINK"\n"); + return -1; + } return 0; } @@ -1302,10 +1434,12 @@ eal_common_usage(void) " --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n" " --"OPT_SYSLOG" Set syslog facility\n" " --"OPT_LOG_LEVEL"= Set global log level\n" - " --"OPT_LOG_LEVEL"=,\n" + " --"OPT_LOG_LEVEL"=:\n" " Set specific log level\n" " -v Display version information on startup\n" " -h, --help This help\n" + " --"OPT_IN_MEMORY" Operate entirely in memory. This will\n" + " disable secondary process support\n" "\nEAL options for DEBUG use only:\n" " --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n" " --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n" diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c index caa8774a..9fcb9121 100644 --- a/lib/librte_eal/common/eal_common_proc.c +++ b/lib/librte_eal/common/eal_common_proc.c @@ -13,18 +13,21 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include #include #include #include +#include #include "eal_private.h" #include "eal_filesystem.h" @@ -51,6 +54,7 @@ enum mp_type { MP_MSG, /* Share message with peers, will not block */ MP_REQ, /* Request for information, Will block for a reply */ MP_REP, /* Response to previously-received request */ + MP_IGN, /* Response telling requester to ignore this response */ }; struct mp_msg_internal { @@ -58,31 +62,66 @@ struct mp_msg_internal { struct rte_mp_msg msg; }; -struct sync_request { - TAILQ_ENTRY(sync_request) next; - int reply_received; +struct async_request_param { + rte_mp_async_reply_t clb; + struct rte_mp_reply user_reply; + struct timespec end; + int n_responses_processed; +}; + +struct pending_request { + TAILQ_ENTRY(pending_request) next; + enum { + REQUEST_TYPE_SYNC, + REQUEST_TYPE_ASYNC + } type; char dst[PATH_MAX]; struct rte_mp_msg *request; struct rte_mp_msg *reply; - pthread_cond_t cond; + int reply_received; + RTE_STD_C11 + union { + struct { + struct async_request_param *param; + } async; + struct { + pthread_cond_t cond; + } sync; + }; }; -TAILQ_HEAD(sync_request_list, sync_request); +TAILQ_HEAD(pending_request_list, pending_request); static struct { - struct sync_request_list requests; + struct pending_request_list requests; pthread_mutex_t lock; -} sync_requests = { - .requests = TAILQ_HEAD_INITIALIZER(sync_requests.requests), - .lock = PTHREAD_MUTEX_INITIALIZER +} pending_requests = { + .requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests), + .lock = PTHREAD_MUTEX_INITIALIZER, + /**< used in async requests only */ }; -static struct sync_request * -find_sync_request(const char *dst, const char *act_name) +/* forward declarations */ +static int +mp_send(struct rte_mp_msg *msg, const char *peer, int type); + +/* for use with alarm callback */ +static void +async_reply_handle(void *arg); + +/* for use with process_msg */ +static struct pending_request * +async_reply_handle_thread_unsafe(void *arg); + +static void +trigger_async_action(struct pending_request *req); + +static struct pending_request * +find_pending_request(const char *dst, const char *act_name) { - struct sync_request *r; + struct pending_request *r; - TAILQ_FOREACH(r, &sync_requests.requests, next) { + TAILQ_FOREACH(r, &pending_requests.requests, next) { if (!strcmp(r->dst, dst) && !strcmp(r->request->name, act_name)) break; @@ -91,6 +130,17 @@ find_sync_request(const char *dst, const char *act_name) return r; } +static void +create_socket_path(const char *name, char *buf, int len) +{ + const char *prefix = eal_mp_socket_path(); + + if (strlen(name) > 0) + snprintf(buf, len, "%s_%s", prefix, name); + else + strlcpy(buf, prefix, len); +} + int rte_eal_primary_proc_alive(const char *config_file_path) { @@ -159,7 +209,7 @@ rte_mp_action_register(const char *name, rte_mp_t action) rte_errno = ENOMEM; return -1; } - strcpy(entry->action_name, name); + strlcpy(entry->action_name, name, sizeof(entry->action_name)); entry->action = action; pthread_mutex_lock(&mp_mutex_action); @@ -241,23 +291,35 @@ read_msg(struct mp_msg_internal *m, struct sockaddr_un *s) static void process_msg(struct mp_msg_internal *m, struct sockaddr_un *s) { - struct sync_request *sync_req; + struct pending_request *pending_req; struct action_entry *entry; struct rte_mp_msg *msg = &m->msg; rte_mp_t action = NULL; RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name); - if (m->type == MP_REP) { - pthread_mutex_lock(&sync_requests.lock); - sync_req = find_sync_request(s->sun_path, msg->name); - if (sync_req) { - memcpy(sync_req->reply, msg, sizeof(*msg)); - sync_req->reply_received = 1; - pthread_cond_signal(&sync_req->cond); + if (m->type == MP_REP || m->type == MP_IGN) { + struct pending_request *req = NULL; + + pthread_mutex_lock(&pending_requests.lock); + pending_req = find_pending_request(s->sun_path, msg->name); + if (pending_req) { + memcpy(pending_req->reply, msg, sizeof(*msg)); + /* -1 indicates that we've been asked to ignore */ + pending_req->reply_received = + m->type == MP_REP ? 1 : -1; + + if (pending_req->type == REQUEST_TYPE_SYNC) + pthread_cond_signal(&pending_req->sync.cond); + else if (pending_req->type == REQUEST_TYPE_ASYNC) + req = async_reply_handle_thread_unsafe( + pending_req); } else RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name); - pthread_mutex_unlock(&sync_requests.lock); + pthread_mutex_unlock(&pending_requests.lock); + + if (req != NULL) + trigger_async_action(req); return; } @@ -267,10 +329,25 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s) action = entry->action; pthread_mutex_unlock(&mp_mutex_action); - if (!action) - RTE_LOG(ERR, EAL, "Cannot find action: %s\n", msg->name); - else if (action(msg, s->sun_path) < 0) + if (!action) { + if (m->type == MP_REQ && !internal_config.init_complete) { + /* if this is a request, and init is not yet complete, + * and callback wasn't registered, we should tell the + * requester to ignore our existence because we're not + * yet ready to process this request. + */ + struct rte_mp_msg dummy; + + memset(&dummy, 0, sizeof(dummy)); + strlcpy(dummy.name, msg->name, sizeof(dummy.name)); + mp_send(&dummy, s->sun_path, MP_IGN); + } else { + RTE_LOG(ERR, EAL, "Cannot find action: %s\n", + msg->name); + } + } else if (action(msg, s->sun_path) < 0) { RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name); + } } static void * @@ -287,11 +364,159 @@ mp_handle(void *arg __rte_unused) return NULL; } +static int +timespec_cmp(const struct timespec *a, const struct timespec *b) +{ + if (a->tv_sec < b->tv_sec) + return -1; + if (a->tv_sec > b->tv_sec) + return 1; + if (a->tv_nsec < b->tv_nsec) + return -1; + if (a->tv_nsec > b->tv_nsec) + return 1; + return 0; +} + +enum async_action { + ACTION_FREE, /**< free the action entry, but don't trigger callback */ + ACTION_TRIGGER /**< trigger callback, then free action entry */ +}; + +static enum async_action +process_async_request(struct pending_request *sr, const struct timespec *now) +{ + struct async_request_param *param; + struct rte_mp_reply *reply; + bool timeout, last_msg; + + param = sr->async.param; + reply = ¶m->user_reply; + + /* did we timeout? */ + timeout = timespec_cmp(¶m->end, now) <= 0; + + /* if we received a response, adjust relevant data and copy mesasge. */ + if (sr->reply_received == 1 && sr->reply) { + struct rte_mp_msg *msg, *user_msgs, *tmp; + + msg = sr->reply; + user_msgs = reply->msgs; + + tmp = realloc(user_msgs, sizeof(*msg) * + (reply->nb_received + 1)); + if (!tmp) { + RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n", + sr->dst, sr->request->name); + /* this entry is going to be removed and its message + * dropped, but we don't want to leak memory, so + * continue. + */ + } else { + user_msgs = tmp; + reply->msgs = user_msgs; + memcpy(&user_msgs[reply->nb_received], + msg, sizeof(*msg)); + reply->nb_received++; + } + + /* mark this request as processed */ + param->n_responses_processed++; + } else if (sr->reply_received == -1) { + /* we were asked to ignore this process */ + reply->nb_sent--; + } else if (timeout) { + /* count it as processed response, but don't increment + * nb_received. + */ + param->n_responses_processed++; + } + + free(sr->reply); + + last_msg = param->n_responses_processed == reply->nb_sent; + + return last_msg ? ACTION_TRIGGER : ACTION_FREE; +} + +static void +trigger_async_action(struct pending_request *sr) +{ + struct async_request_param *param; + struct rte_mp_reply *reply; + + param = sr->async.param; + reply = ¶m->user_reply; + + param->clb(sr->request, reply); + + /* clean up */ + free(sr->async.param->user_reply.msgs); + free(sr->async.param); + free(sr->request); + free(sr); +} + +static struct pending_request * +async_reply_handle_thread_unsafe(void *arg) +{ + struct pending_request *req = (struct pending_request *)arg; + enum async_action action; + struct timespec ts_now; + struct timeval now; + + if (gettimeofday(&now, NULL) < 0) { + RTE_LOG(ERR, EAL, "Cannot get current time\n"); + goto no_trigger; + } + ts_now.tv_nsec = now.tv_usec * 1000; + ts_now.tv_sec = now.tv_sec; + + action = process_async_request(req, &ts_now); + + TAILQ_REMOVE(&pending_requests.requests, req, next); + + if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) { + /* if we failed to cancel the alarm because it's already in + * progress, don't proceed because otherwise we will end up + * handling the same message twice. + */ + if (rte_errno == EINPROGRESS) { + RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n"); + goto no_trigger; + } + RTE_LOG(ERR, EAL, "Failed to cancel alarm\n"); + } + + if (action == ACTION_TRIGGER) + return req; +no_trigger: + free(req); + return NULL; +} + +static void +async_reply_handle(void *arg) +{ + struct pending_request *req; + + pthread_mutex_lock(&pending_requests.lock); + req = async_reply_handle_thread_unsafe(arg); + pthread_mutex_unlock(&pending_requests.lock); + + if (req != NULL) + trigger_async_action(req); +} + static int open_socket_fd(void) { + char peer_name[PATH_MAX] = {0}; struct sockaddr_un un; - const char *prefix = eal_mp_socket_path(); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + snprintf(peer_name, sizeof(peer_name), + "%d_%"PRIx64, getpid(), rte_rdtsc()); mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0); if (mp_fd < 0) { @@ -301,13 +526,11 @@ open_socket_fd(void) memset(&un, 0, sizeof(un)); un.sun_family = AF_UNIX; - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix); - else { - snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64, - prefix, getpid(), rte_rdtsc()); - } + + create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path)); + unlink(un.sun_path); /* May still exist since last run */ + if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) { RTE_LOG(ERR, EAL, "failed to bind %s: %s\n", un.sun_path, strerror(errno)); @@ -342,54 +565,70 @@ unlink_sockets(const char *filter) return 0; } -static void -unlink_socket_by_path(const char *path) -{ - char *filename; - char *fullpath = strdup(path); - - if (!fullpath) - return; - filename = basename(fullpath); - unlink_sockets(filename); - free(fullpath); - RTE_LOG(INFO, EAL, "Remove socket %s\n", path); -} - int rte_mp_channel_init(void) { - char thread_name[RTE_MAX_THREAD_NAME_LEN]; - char *path; - pthread_t tid; + char path[PATH_MAX]; + int dir_fd; + pthread_t mp_handle_tid; + + /* in no shared files mode, we do not have secondary processes support, + * so no need to initialize IPC. + */ + if (internal_config.no_shconf) { + RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n"); + return 0; + } - snprintf(mp_filter, PATH_MAX, ".%s_unix_*", - internal_config.hugefile_prefix); + /* create filter path */ + create_socket_path("*", path, sizeof(path)); + strlcpy(mp_filter, basename(path), sizeof(mp_filter)); - path = strdup(eal_mp_socket_path()); - snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path)); - free(path); + /* path may have been modified, so recreate it */ + create_socket_path("*", path, sizeof(path)); + strlcpy(mp_dir_path, dirname(path), sizeof(mp_dir_path)); + + /* lock the directory */ + dir_fd = open(mp_dir_path, O_RDONLY); + if (dir_fd < 0) { + RTE_LOG(ERR, EAL, "failed to open %s: %s\n", + mp_dir_path, strerror(errno)); + return -1; + } + + if (flock(dir_fd, LOCK_EX)) { + RTE_LOG(ERR, EAL, "failed to lock %s: %s\n", + mp_dir_path, strerror(errno)); + close(dir_fd); + return -1; + } if (rte_eal_process_type() == RTE_PROC_PRIMARY && - unlink_sockets(mp_filter)) { + unlink_sockets(mp_filter)) { RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n"); + close(dir_fd); return -1; } - if (open_socket_fd() < 0) + if (open_socket_fd() < 0) { + close(dir_fd); return -1; + } - if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) { + if (rte_ctrl_thread_create(&mp_handle_tid, "rte_mp_handle", + NULL, mp_handle, NULL) < 0) { RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n", strerror(errno)); close(mp_fd); + close(dir_fd); mp_fd = -1; return -1; } - /* try best to set thread name */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle"); - rte_thread_setname(tid, thread_name); + /* unlock the directory */ + flock(dir_fd, LOCK_UN); + close(dir_fd); + return 0; } @@ -416,7 +655,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type) memset(&dst, 0, sizeof(dst)); dst.sun_family = AF_UNIX; - snprintf(dst.sun_path, sizeof(dst.sun_path), "%s", dst_path); + strlcpy(dst.sun_path, dst_path, sizeof(dst.sun_path)); memset(&msgh, 0, sizeof(msgh)); memset(control, 0, sizeof(control)); @@ -444,13 +683,12 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type) if (snd < 0) { rte_errno = errno; /* Check if it caused by peer process exits */ - if (errno == -ECONNREFUSED) { - /* We don't unlink the primary's socket here */ - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - unlink_socket_by_path(dst_path); + if (errno == ECONNREFUSED && + rte_eal_process_type() == RTE_PROC_PRIMARY) { + unlink(dst_path); return 0; } - if (errno == -ENOBUFS) { + if (errno == ENOBUFS) { RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n", dst_path); return 0; @@ -466,7 +704,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type) static int mp_send(struct rte_mp_msg *msg, const char *peer, int type) { - int ret = 0; + int dir_fd, ret = 0; DIR *mp_dir; struct dirent *ent; @@ -488,14 +726,32 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type) rte_errno = errno; return -1; } + + dir_fd = dirfd(mp_dir); + /* lock the directory to prevent processes spinning up while we send */ + if (flock(dir_fd, LOCK_SH)) { + RTE_LOG(ERR, EAL, "Unable to lock directory %s\n", + mp_dir_path); + rte_errno = errno; + closedir(mp_dir); + return -1; + } + while ((ent = readdir(mp_dir))) { + char path[PATH_MAX]; + if (fnmatch(mp_filter, ent->d_name, 0) != 0) continue; - if (send_msg(ent->d_name, msg, type) < 0) + snprintf(path, sizeof(path), "%s/%s", mp_dir_path, + ent->d_name); + if (send_msg(path, msg, type) < 0) ret = -1; } + /* unlock the dir */ + flock(dir_fd, LOCK_UN); + /* dir_fd automatically closed on closedir */ closedir(mp_dir); return ret; } @@ -539,25 +795,82 @@ rte_mp_sendmsg(struct rte_mp_msg *msg) } static int -mp_request_one(const char *dst, struct rte_mp_msg *req, +mp_request_async(const char *dst, struct rte_mp_msg *req, + struct async_request_param *param, const struct timespec *ts) +{ + struct rte_mp_msg *reply_msg; + struct pending_request *pending_req, *exist; + int ret; + + pending_req = calloc(1, sizeof(*pending_req)); + reply_msg = calloc(1, sizeof(*reply_msg)); + if (pending_req == NULL || reply_msg == NULL) { + RTE_LOG(ERR, EAL, "Could not allocate space for sync request\n"); + rte_errno = ENOMEM; + ret = -1; + goto fail; + } + + pending_req->type = REQUEST_TYPE_ASYNC; + strlcpy(pending_req->dst, dst, sizeof(pending_req->dst)); + pending_req->request = req; + pending_req->reply = reply_msg; + pending_req->async.param = param; + + /* queue already locked by caller */ + + exist = find_pending_request(dst, req->name); + if (exist) { + RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name); + rte_errno = EEXIST; + ret = -1; + goto fail; + } + + ret = send_msg(dst, req, MP_REQ); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n", + dst, req->name); + ret = -1; + goto fail; + } else if (ret == 0) { + ret = 0; + goto fail; + } + TAILQ_INSERT_TAIL(&pending_requests.requests, pending_req, next); + + param->user_reply.nb_sent++; + + if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000, + async_reply_handle, pending_req) < 0) { + RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n", + dst, req->name); + rte_panic("Fix the above shit to properly free all memory\n"); + } + + return 0; +fail: + free(pending_req); + free(reply_msg); + return ret; +} + +static int +mp_request_sync(const char *dst, struct rte_mp_msg *req, struct rte_mp_reply *reply, const struct timespec *ts) { int ret; - struct timeval now; struct rte_mp_msg msg, *tmp; - struct sync_request sync_req, *exist; - - sync_req.reply_received = 0; - strcpy(sync_req.dst, dst); - sync_req.request = req; - sync_req.reply = &msg; - pthread_cond_init(&sync_req.cond, NULL); - - pthread_mutex_lock(&sync_requests.lock); - exist = find_sync_request(dst, req->name); - if (!exist) - TAILQ_INSERT_TAIL(&sync_requests.requests, &sync_req, next); - pthread_mutex_unlock(&sync_requests.lock); + struct pending_request pending_req, *exist; + + pending_req.type = REQUEST_TYPE_SYNC; + pending_req.reply_received = 0; + strlcpy(pending_req.dst, dst, sizeof(pending_req.dst)); + pending_req.request = req; + pending_req.reply = &msg; + pthread_cond_init(&pending_req.sync.cond, NULL); + + exist = find_pending_request(dst, req->name); if (exist) { RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name); rte_errno = EEXIST; @@ -572,33 +885,31 @@ mp_request_one(const char *dst, struct rte_mp_msg *req, } else if (ret == 0) return 0; + TAILQ_INSERT_TAIL(&pending_requests.requests, &pending_req, next); + reply->nb_sent++; - pthread_mutex_lock(&sync_requests.lock); do { - pthread_cond_timedwait(&sync_req.cond, &sync_requests.lock, ts); - /* Check spurious wakeups */ - if (sync_req.reply_received == 1) - break; - /* Check if time is out */ - if (gettimeofday(&now, NULL) < 0) - break; - if (now.tv_sec < ts->tv_sec) - break; - else if (now.tv_sec == ts->tv_sec && - now.tv_usec * 1000 < ts->tv_nsec) - break; - } while (1); - /* We got the lock now */ - TAILQ_REMOVE(&sync_requests.requests, &sync_req, next); - pthread_mutex_unlock(&sync_requests.lock); + ret = pthread_cond_timedwait(&pending_req.sync.cond, + &pending_requests.lock, ts); + } while (ret != 0 && ret != ETIMEDOUT); + + TAILQ_REMOVE(&pending_requests.requests, &pending_req, next); - if (sync_req.reply_received == 0) { + if (pending_req.reply_received == 0) { RTE_LOG(ERR, EAL, "Fail to recv reply for request %s:%s\n", dst, req->name); rte_errno = ETIMEDOUT; return -1; } + if (pending_req.reply_received == -1) { + RTE_LOG(DEBUG, EAL, "Asked to ignore response\n"); + /* not receiving this message is not an error, so decrement + * number of sent messages + */ + reply->nb_sent--; + return 0; + } tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1)); if (!tmp) { @@ -614,10 +925,10 @@ mp_request_one(const char *dst, struct rte_mp_msg *req, } int __rte_experimental -rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply, +rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply, const struct timespec *ts) { - int ret = 0; + int dir_fd, ret = 0; DIR *mp_dir; struct dirent *ent; struct timeval now; @@ -627,6 +938,12 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply, if (check_input(req) == false) return -1; + + if (internal_config.no_shconf) { + RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n"); + return 0; + } + if (gettimeofday(&now, NULL) < 0) { RTE_LOG(ERR, EAL, "Faile to get current time\n"); rte_errno = errno; @@ -642,8 +959,12 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply, reply->msgs = NULL; /* for secondary process, send request to the primary process only */ - if (rte_eal_process_type() == RTE_PROC_SECONDARY) - return mp_request_one(eal_mp_socket_path(), req, reply, &end); + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + pthread_mutex_lock(&pending_requests.lock); + ret = mp_request_sync(eal_mp_socket_path(), req, reply, &end); + pthread_mutex_unlock(&pending_requests.lock); + return ret; + } /* for primary process, broadcast request, and collect reply 1 by 1 */ mp_dir = opendir(mp_dir_path); @@ -653,22 +974,193 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply, return -1; } + dir_fd = dirfd(mp_dir); + /* lock the directory to prevent processes spinning up while we send */ + if (flock(dir_fd, LOCK_SH)) { + RTE_LOG(ERR, EAL, "Unable to lock directory %s\n", + mp_dir_path); + closedir(mp_dir); + rte_errno = errno; + return -1; + } + + pthread_mutex_lock(&pending_requests.lock); while ((ent = readdir(mp_dir))) { + char path[PATH_MAX]; + if (fnmatch(mp_filter, ent->d_name, 0) != 0) continue; - if (mp_request_one(ent->d_name, req, reply, &end)) + snprintf(path, sizeof(path), "%s/%s", mp_dir_path, + ent->d_name); + + /* unlocks the mutex while waiting for response, + * locks on receive + */ + if (mp_request_sync(path, req, reply, &end)) ret = -1; } + pthread_mutex_unlock(&pending_requests.lock); + /* unlock the directory */ + flock(dir_fd, LOCK_UN); + /* dir_fd automatically closed on closedir */ closedir(mp_dir); return ret; } int __rte_experimental -rte_mp_reply(struct rte_mp_msg *msg, const char *peer) +rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, + rte_mp_async_reply_t clb) { + struct rte_mp_msg *copy; + struct pending_request *dummy; + struct async_request_param *param; + struct rte_mp_reply *reply; + int dir_fd, ret = 0; + DIR *mp_dir; + struct dirent *ent; + struct timeval now; + struct timespec *end; + bool dummy_used = false; + + RTE_LOG(DEBUG, EAL, "request: %s\n", req->name); + + if (check_input(req) == false) + return -1; + + if (internal_config.no_shconf) { + RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n"); + return 0; + } + + if (gettimeofday(&now, NULL) < 0) { + RTE_LOG(ERR, EAL, "Faile to get current time\n"); + rte_errno = errno; + return -1; + } + copy = calloc(1, sizeof(*copy)); + dummy = calloc(1, sizeof(*dummy)); + param = calloc(1, sizeof(*param)); + if (copy == NULL || dummy == NULL || param == NULL) { + RTE_LOG(ERR, EAL, "Failed to allocate memory for async reply\n"); + rte_errno = ENOMEM; + goto fail; + } + + /* copy message */ + memcpy(copy, req, sizeof(*copy)); + + param->n_responses_processed = 0; + param->clb = clb; + end = ¶m->end; + reply = ¶m->user_reply; + + end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000; + end->tv_sec = now.tv_sec + ts->tv_sec + + (now.tv_usec * 1000 + ts->tv_nsec) / 1000000000; + reply->nb_sent = 0; + reply->nb_received = 0; + reply->msgs = NULL; + + /* we have to lock the request queue here, as we will be adding a bunch + * of requests to the queue at once, and some of the replies may arrive + * before we add all of the requests to the queue. + */ + pthread_mutex_lock(&pending_requests.lock); + + /* we have to ensure that callback gets triggered even if we don't send + * anything, therefore earlier we have allocated a dummy request. fill + * it, and put it on the queue if we don't send any requests. + */ + dummy->type = REQUEST_TYPE_ASYNC; + dummy->request = copy; + dummy->reply = NULL; + dummy->async.param = param; + dummy->reply_received = 1; /* short-circuit the timeout */ + + /* for secondary process, send request to the primary process only */ + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + ret = mp_request_async(eal_mp_socket_path(), copy, param, ts); + + /* if we didn't send anything, put dummy request on the queue */ + if (ret == 0 && reply->nb_sent == 0) { + TAILQ_INSERT_TAIL(&pending_requests.requests, dummy, + next); + dummy_used = true; + } + + pthread_mutex_unlock(&pending_requests.lock); + + /* if we couldn't send anything, clean up */ + if (ret != 0) + goto fail; + return 0; + } + + /* for primary process, broadcast request */ + mp_dir = opendir(mp_dir_path); + if (!mp_dir) { + RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path); + rte_errno = errno; + goto unlock_fail; + } + dir_fd = dirfd(mp_dir); + /* lock the directory to prevent processes spinning up while we send */ + if (flock(dir_fd, LOCK_SH)) { + RTE_LOG(ERR, EAL, "Unable to lock directory %s\n", + mp_dir_path); + rte_errno = errno; + goto closedir_fail; + } + + while ((ent = readdir(mp_dir))) { + char path[PATH_MAX]; + + if (fnmatch(mp_filter, ent->d_name, 0) != 0) + continue; + + snprintf(path, sizeof(path), "%s/%s", mp_dir_path, + ent->d_name); + + if (mp_request_async(path, copy, param, ts)) + ret = -1; + } + /* if we didn't send anything, put dummy request on the queue */ + if (ret == 0 && reply->nb_sent == 0) { + TAILQ_INSERT_HEAD(&pending_requests.requests, dummy, next); + dummy_used = true; + } + + /* finally, unlock the queue */ + pthread_mutex_unlock(&pending_requests.lock); + + /* unlock the directory */ + flock(dir_fd, LOCK_UN); + + /* dir_fd automatically closed on closedir */ + closedir(mp_dir); + + /* if dummy was unused, free it */ + if (!dummy_used) + free(dummy); + + return ret; +closedir_fail: + closedir(mp_dir); +unlock_fail: + pthread_mutex_unlock(&pending_requests.lock); +fail: + free(dummy); + free(param); + free(copy); + return -1; +} + +int __rte_experimental +rte_mp_reply(struct rte_mp_msg *msg, const char *peer) +{ RTE_LOG(DEBUG, EAL, "reply: %s\n", msg->name); if (check_input(msg) == false) @@ -680,5 +1172,10 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer) return -1; } + if (internal_config.no_shconf) { + RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n"); + return 0; + } + return mp_send(msg, peer, MP_REP); } diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c index 40902e49..48ef4d6d 100644 --- a/lib/librte_eal/common/eal_common_thread.c +++ b/lib/librte_eal/common/eal_common_thread.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include +#include "eal_private.h" #include "eal_thread.h" RTE_DECLARE_PER_LCORE(unsigned , _socket_id); @@ -32,10 +34,7 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role) if (lcore_id >= RTE_MAX_LCORE) return -EINVAL; - if (cfg->lcore_role[lcore_id] == role) - return 0; - - return -EINVAL; + return cfg->lcore_role[lcore_id] == role; } int eal_cpuset_socket_id(rte_cpuset_t *cpusetp) @@ -140,3 +139,94 @@ exit: return ret; } + + +struct rte_thread_ctrl_params { + void *(*start_routine)(void *); + void *arg; + pthread_barrier_t configured; +}; + +static void *rte_thread_init(void *arg) +{ + int ret; + struct rte_thread_ctrl_params *params = arg; + void *(*start_routine)(void *) = params->start_routine; + void *routine_arg = params->arg; + + ret = pthread_barrier_wait(¶ms->configured); + if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { + pthread_barrier_destroy(¶ms->configured); + free(params); + } + + return start_routine(routine_arg); +} + +__rte_experimental int +rte_ctrl_thread_create(pthread_t *thread, const char *name, + const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg) +{ + struct rte_thread_ctrl_params *params; + unsigned int lcore_id; + rte_cpuset_t cpuset; + int cpu_found, ret; + + params = malloc(sizeof(*params)); + if (!params) + return -ENOMEM; + + params->start_routine = start_routine; + params->arg = arg; + + pthread_barrier_init(¶ms->configured, NULL, 2); + + ret = pthread_create(thread, attr, rte_thread_init, (void *)params); + if (ret != 0) { + free(params); + return -ret; + } + + if (name != NULL) { + ret = rte_thread_setname(*thread, name); + if (ret < 0) + RTE_LOG(DEBUG, EAL, + "Cannot set name for ctrl thread\n"); + } + + cpu_found = 0; + CPU_ZERO(&cpuset); + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (eal_cpu_detected(lcore_id) && + rte_lcore_has_role(lcore_id, ROLE_OFF)) { + CPU_SET(lcore_id, &cpuset); + cpu_found = 1; + } + } + /* if no detected cpu is off, use master core */ + if (!cpu_found) + CPU_SET(rte_get_master_lcore(), &cpuset); + + ret = pthread_setaffinity_np(*thread, sizeof(cpuset), &cpuset); + if (ret < 0) + goto fail; + + ret = pthread_barrier_wait(¶ms->configured); + if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { + pthread_barrier_destroy(¶ms->configured); + free(params); + } + + return 0; + +fail: + if (PTHREAD_BARRIER_SERIAL_THREAD == + pthread_barrier_wait(¶ms->configured)) { + pthread_barrier_destroy(¶ms->configured); + free(params); + } + pthread_cancel(*thread); + pthread_join(*thread, NULL); + return -ret; +} diff --git a/lib/librte_eal/common/eal_common_uuid.c b/lib/librte_eal/common/eal_common_uuid.c new file mode 100644 index 00000000..1b93c5b3 --- /dev/null +++ b/lib/librte_eal/common/eal_common_uuid.c @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 1996, 1997 Theodore Ts'o. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF + * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include + +/* UUID packed form */ +struct uuid { + uint32_t time_low; + uint16_t time_mid; + uint16_t time_hi_and_version; + uint16_t clock_seq; + uint8_t node[6]; +}; + +static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr) +{ + uint32_t tmp; + uint8_t *out = ptr; + + tmp = uu->time_low; + out[3] = (uint8_t) tmp; + tmp >>= 8; + out[2] = (uint8_t) tmp; + tmp >>= 8; + out[1] = (uint8_t) tmp; + tmp >>= 8; + out[0] = (uint8_t) tmp; + + tmp = uu->time_mid; + out[5] = (uint8_t) tmp; + tmp >>= 8; + out[4] = (uint8_t) tmp; + + tmp = uu->time_hi_and_version; + out[7] = (uint8_t) tmp; + tmp >>= 8; + out[6] = (uint8_t) tmp; + + tmp = uu->clock_seq; + out[9] = (uint8_t) tmp; + tmp >>= 8; + out[8] = (uint8_t) tmp; + + memcpy(out+10, uu->node, 6); +} + +static void uuid_unpack(const rte_uuid_t in, struct uuid *uu) +{ + const uint8_t *ptr = in; + uint32_t tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + tmp = (tmp << 8) | *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->time_low = tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->time_mid = tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->time_hi_and_version = tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->clock_seq = tmp; + + memcpy(uu->node, ptr, 6); +} + +bool rte_uuid_is_null(const rte_uuid_t uu) +{ + const uint8_t *cp = uu; + int i; + + for (i = 0; i < 16; i++) + if (*cp++) + return false; + return true; +} + +/* + * rte_uuid_compare() - compare two UUIDs. + */ +int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2) +{ + struct uuid uuid1, uuid2; + + uuid_unpack(uu1, &uuid1); + uuid_unpack(uu2, &uuid2); + +#define UUCMP(u1, u2) \ + do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0) + + UUCMP(uuid1.time_low, uuid2.time_low); + UUCMP(uuid1.time_mid, uuid2.time_mid); + UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version); + UUCMP(uuid1.clock_seq, uuid2.clock_seq); +#undef UUCMP + + return memcmp(uuid1.node, uuid2.node, 6); +} + +int rte_uuid_parse(const char *in, rte_uuid_t uu) +{ + struct uuid uuid; + int i; + const char *cp; + char buf[3]; + + if (strlen(in) != 36) + return -1; + + for (i = 0, cp = in; i <= 36; i++, cp++) { + if ((i == 8) || (i == 13) || (i == 18) || + (i == 23)) { + if (*cp == '-') + continue; + else + return -1; + } + if (i == 36) + if (*cp == 0) + continue; + if (!isxdigit(*cp)) + return -1; + } + + uuid.time_low = strtoul(in, NULL, 16); + uuid.time_mid = strtoul(in+9, NULL, 16); + uuid.time_hi_and_version = strtoul(in+14, NULL, 16); + uuid.clock_seq = strtoul(in+19, NULL, 16); + cp = in+24; + buf[2] = 0; + + for (i = 0; i < 6; i++) { + buf[0] = *cp++; + buf[1] = *cp++; + uuid.node[i] = strtoul(buf, NULL, 16); + } + + uuid_pack(&uuid, uu); + return 0; +} + +void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len) +{ + struct uuid uuid; + + uuid_unpack(uu, &uuid); + + snprintf(out, len, + "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", + uuid.time_low, uuid.time_mid, uuid.time_hi_and_version, + uuid.clock_seq >> 8, uuid.clock_seq & 0xFF, + uuid.node[0], uuid.node[1], uuid.node[2], + uuid.node[3], uuid.node[4], uuid.node[5]); +} diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h index 4708dd54..de05febf 100644 --- a/lib/librte_eal/common/eal_filesystem.h +++ b/lib/librte_eal/common/eal_filesystem.h @@ -12,7 +12,6 @@ #define EAL_FILESYSTEM_H /** Path of rte config file. */ -#define RUNTIME_CONFIG_FMT "%s/.%s_config" #include #include @@ -22,60 +21,70 @@ #include #include "eal_internal_cfg.h" -static const char *default_config_dir = "/var/run"; +/* sets up platform-specific runtime data dir */ +int +eal_create_runtime_dir(void); +/* returns runtime dir */ +const char * +eal_get_runtime_dir(void); + +#define RUNTIME_CONFIG_FNAME "config" static inline const char * eal_runtime_config_path(void) { static char buffer[PATH_MAX]; /* static so auto-zeroed */ - const char *directory = default_config_dir; - const char *home_dir = getenv("HOME"); - if (getuid() != 0 && home_dir != NULL) - directory = home_dir; - snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory, - internal_config.hugefile_prefix); + snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(), + RUNTIME_CONFIG_FNAME); return buffer; } /** Path of primary/secondary communication unix socket file. */ -#define MP_SOCKET_PATH_FMT "%s/.%s_unix" +#define MP_SOCKET_FNAME "mp_socket" static inline const char * eal_mp_socket_path(void) { static char buffer[PATH_MAX]; /* static so auto-zeroed */ - const char *directory = default_config_dir; - const char *home_dir = getenv("HOME"); - if (getuid() != 0 && home_dir != NULL) - directory = home_dir; - snprintf(buffer, sizeof(buffer) - 1, MP_SOCKET_PATH_FMT, - directory, internal_config.hugefile_prefix); + snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(), + MP_SOCKET_FNAME); + return buffer; +} +#define FBARRAY_NAME_FMT "%s/fbarray_%s" +static inline const char * +eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) { + snprintf(buffer, buflen, FBARRAY_NAME_FMT, eal_get_runtime_dir(), name); return buffer; } /** Path of hugepage info file. */ -#define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info" - +#define HUGEPAGE_INFO_FNAME "hugepage_info" static inline const char * eal_hugepage_info_path(void) { static char buffer[PATH_MAX]; /* static so auto-zeroed */ - const char *directory = default_config_dir; - const char *home_dir = getenv("HOME"); - if (getuid() != 0 && home_dir != NULL) - directory = home_dir; - snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_INFO_FMT, directory, - internal_config.hugefile_prefix); + snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(), + HUGEPAGE_INFO_FNAME); + return buffer; +} + +/** Path of hugepage data file. */ +#define HUGEPAGE_DATA_FNAME "hugepage_data" +static inline const char * +eal_hugepage_data_path(void) +{ + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + + snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(), + HUGEPAGE_DATA_FNAME); return buffer; } /** String format for hugepage map files. */ #define HUGEFILE_FMT "%s/%smap_%d" -#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d" - static inline const char * eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id) { @@ -85,6 +94,17 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id return buffer; } +/** String format for hugepage map lock files. */ +#define HUGEFILE_LOCK_FMT "%s/map_%d.lock" +static inline const char * +eal_get_hugefile_lock_path(char *buffer, size_t buflen, int f_id) +{ + snprintf(buffer, buflen, HUGEFILE_LOCK_FMT, eal_get_runtime_dir(), + f_id); + buffer[buflen - 1] = '\0'; + return buffer; +} + /** define the default filename prefix for the %s values above */ #define HUGEFILE_PREFIX_DEFAULT "rte" diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h index 1d519bbb..4582f19c 100644 --- a/lib/librte_eal/common/eal_hugepages.h +++ b/lib/librte_eal/common/eal_hugepages.h @@ -22,14 +22,19 @@ struct hugepage_file { size_t size; /**< the page size */ int socket_id; /**< NUMA socket ID */ int file_id; /**< the '%d' in HUGEFILE_FMT */ - int memseg_id; /**< the memory segment to which page belongs */ char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */ }; /** - * Read the information from linux on what hugepages are available - * for the EAL to use + * Read the information on what hugepages are available for the EAL to use, + * clearing out any unused ones. */ int eal_hugepage_info_init(void); +/** + * Read whatever information primary process has shared about hugepages into + * secondary process. + */ +int eal_hugepage_info_read(void); + #endif /* EAL_HUGEPAGES_H */ diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 1169fcc3..00ee6e06 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -21,9 +21,9 @@ */ struct hugepage_info { uint64_t hugepage_sz; /**< size of a huge page */ - const char *hugedir; /**< dir where hugetlbfs is mounted */ + char hugedir[PATH_MAX]; /**< dir where hugetlbfs is mounted */ uint32_t num_pages[RTE_MAX_NUMA_NODES]; - /**< number of hugepages of that size on each socket */ + /**< number of hugepages of that size on each socket */ int lock_descriptor; /**< file descriptor for hugepage dir */ }; @@ -41,12 +41,26 @@ struct internal_config { volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping * instead of native TSC */ volatile unsigned no_shconf; /**< true if there is no shared config */ + volatile unsigned in_memory; + /**< true if DPDK should operate entirely in-memory and not create any + * shared files or runtime data. + */ volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */ volatile enum rte_proc_type_t process_type; /**< multi-process proc type */ /** true to try allocating memory on specific sockets */ volatile unsigned force_sockets; volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */ + volatile unsigned force_socket_limits; + volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */ uintptr_t base_virtaddr; /**< base address to try and reserve memory from */ + volatile unsigned legacy_mem; + /**< true to enable legacy memory behavior (no dynamic allocation, + * IOVA-contiguous segments). + */ + volatile unsigned single_file_segments; + /**< true if storing all pages within single files (per-page-size, + * per-node) non-legacy mode only. + */ volatile int syslog_facility; /**< facility passed to openlog() */ /** default interrupt mode for VFIO */ volatile enum rte_intr_mode vfio_intr_mode; @@ -56,6 +70,8 @@ struct internal_config { /**< user defined mbuf pool ops name */ unsigned num_hugepage_sizes; /**< how many sizes on this system */ struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES]; + volatile unsigned int init_complete; + /**< indicates whether EAL has completed initialization */ }; extern struct internal_config internal_config; /**< Global EAL configuration. */ diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h new file mode 100644 index 00000000..36bb1a02 --- /dev/null +++ b/lib/librte_eal/common/eal_memalloc.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#ifndef EAL_MEMALLOC_H +#define EAL_MEMALLOC_H + +#include + +#include +#include + +/* + * Allocate segment of specified page size. + */ +struct rte_memseg * +eal_memalloc_alloc_seg(size_t page_sz, int socket); + +/* + * Allocate `n_segs` segments. + * + * Note: `ms` can be NULL. + * + * Note: it is possible to request best-effort allocation by setting `exact` to + * `false`, in which case allocator will return however many pages it managed to + * allocate successfully. + */ +int +eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz, + int socket, bool exact); + +/* + * Deallocate segment + */ +int +eal_memalloc_free_seg(struct rte_memseg *ms); + +/* + * Deallocate `n_segs` segments. Returns 0 on successful deallocation of all + * segments, returns -1 on error. Any segments that could have been deallocated, + * will be deallocated even in case of error. + */ +int +eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs); + +/* + * Check if memory pointed to by `start` and of `length` that resides in + * memseg list `msl` is IOVA-contiguous. + */ +bool +eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start, + size_t len); + +/* synchronize local memory map to primary process */ +int +eal_memalloc_sync_with_primary(void); + +int +eal_memalloc_mem_event_callback_register(const char *name, + rte_mem_event_callback_t clb, void *arg); + +int +eal_memalloc_mem_event_callback_unregister(const char *name, void *arg); + +void +eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start, + size_t len); + +int +eal_memalloc_mem_alloc_validator_register(const char *name, + rte_mem_alloc_validator_t clb, int socket_id, size_t limit); + +int +eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id); + +int +eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len); + +int +eal_memalloc_init(void); + +#endif /* EAL_MEMALLOC_H */ diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index e86c7114..96e16678 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -45,8 +45,12 @@ enum { OPT_NO_PCI_NUM, #define OPT_NO_SHCONF "no-shconf" OPT_NO_SHCONF_NUM, +#define OPT_IN_MEMORY "in-memory" + OPT_IN_MEMORY_NUM, #define OPT_SOCKET_MEM "socket-mem" OPT_SOCKET_MEM_NUM, +#define OPT_SOCKET_LIMIT "socket-limit" + OPT_SOCKET_LIMIT_NUM, #define OPT_SYSLOG "syslog" OPT_SYSLOG_NUM, #define OPT_VDEV "vdev" @@ -55,6 +59,10 @@ enum { OPT_VFIO_INTR_NUM, #define OPT_VMWARE_TSC_MAP "vmware-tsc-map" OPT_VMWARE_TSC_MAP_NUM, +#define OPT_LEGACY_MEM "legacy-mem" + OPT_LEGACY_MEM_NUM, +#define OPT_SINGLE_FILE_SEGMENTS "single-file-segments" + OPT_SINGLE_FILE_SEGMENTS_NUM, OPT_LONG_MAX_NUM }; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 0b287700..4f809a83 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -9,6 +9,8 @@ #include #include +#include + /** * Initialize the memzone subsystem (private to eal). * @@ -44,6 +46,18 @@ void eal_log_set_default(FILE *default_log); */ int rte_eal_cpu_init(void); +/** + * Create memseg lists + * + * This function is private to EAL. + * + * Preallocate virtual memory. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_memseg_init(void); + /** * Map memory * @@ -80,6 +94,12 @@ int rte_eal_timer_init(void); */ int rte_eal_log_init(const char *id, int facility); +/** + * Save the log regexp for later + */ +int rte_log_save_regexp(const char *type, int priority); +int rte_log_save_pattern(const char *pattern, int priority); + /** * Init tail queues for non-EAL library structures. This is to allow * the rings, mempools, etc. lists to be shared among multiple processes @@ -126,6 +146,39 @@ int rte_eal_alarm_init(void); */ int rte_eal_check_module(const char *module_name); +/** + * Get virtual area of specified size from the OS. + * + * This function is private to the EAL. + * + * @param requested_addr + * Address where to request address space. + * @param size + * Size of requested area. + * @param page_sz + * Page size on which to align requested virtual area. + * @param flags + * EAL_VIRTUAL_AREA_* flags. + * @param mmap_flags + * Extra flags passed directly to mmap(). + * + * @return + * Virtual area address if successful. + * NULL if unsuccessful. + */ + +#define EAL_VIRTUAL_AREA_ADDR_IS_HINT (1 << 0) +/**< don't fail if cannot get exact requested address. */ +#define EAL_VIRTUAL_AREA_ALLOW_SHRINK (1 << 1) +/**< try getting smaller sized (decrement by page size) virtual areas if cannot + * get area of requested size. + */ +#define EAL_VIRTUAL_AREA_UNMAP (1 << 2) +/**< immediately unmap reserved virtual area. */ +void * +eal_get_virtual_area(void *requested_addr, size_t *size, + size_t page_sz, int flags, int mmap_flags); + /** * Get cpu core_id. * @@ -205,4 +258,50 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str); int rte_mp_channel_init(void); +/** + * Internal Executes all the user application registered callbacks for + * the specific device. It is for DPDK internal user only. User + * application should not call it directly. + * + * @param device_name + * The device name. + * @param event + * the device event type. + */ +void dev_callback_process(char *device_name, enum rte_dev_event_type event); + +/** + * @internal + * Parse a device string and store its information in an + * rte_devargs structure. + * + * A device description is split by layers of abstraction of the device: + * bus, class and driver. Each layer will offer a set of properties that + * can be applied either to configure or recognize a device. + * + * This function will parse those properties and prepare the rte_devargs + * to be given to each layers for processing. + * + * Note: if the "data" field of the devargs points to devstr, + * then no dynamic allocation is performed and the rte_devargs + * can be safely discarded. + * + * Otherwise ``data`` will hold a workable copy of devstr, that will be + * used by layers descriptors within rte_devargs. In this case, + * any rte_devargs should be cleaned-up before being freed. + * + * @param da + * rte_devargs structure to fill. + * + * @param devstr + * Device string. + * + * @return + * 0 on success. + * Negative errno values on error (rte_errno is set). + */ +int +rte_devargs_layers_parse(struct rte_devargs *devargs, + const char *devstr); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h index f3f3b6e3..40e14e56 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h @@ -1,33 +1,5 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_ATOMIC_ARM_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h index d2b7fa20..859562e5 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h @@ -1,33 +1,5 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_ATOMIC_ARM32_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h index 8af0a39a..9ec4a975 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h +++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_BYTEORDER_ARM_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h index b8f62889..022e7da5 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h +++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_CPUFLAGS_ARM_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h index eb02d9b9..b5347be1 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_CPUFLAGS_ARM32_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles.h b/lib/librte_eal/common/include/arch/arm/rte_cycles.h index a8009a06..e8ffa894 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_cycles.h +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_CYCLES_ARM_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h index 9c1be71e..c4f974fe 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_CYCLES_ARM32_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h index 1d562c3f..47dea9a8 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_MEMCPY_ARM_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h index e4dafda1..eb02c3b4 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_MEMCPY_ARM32_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h index aa37de57..27870c2a 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_PREFETCH_ARM_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h index 43cde172..e53420a0 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_PREFETCH_ARM32_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h index 664bec88..18bb37b0 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h +++ b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: BSD-3-Clause + */ /* copied from ppc_64 */ #ifndef _RTE_RWLOCK_ARM_H_ diff --git a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h index 396a42e8..1a6916b6 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h +++ b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2015 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 RehiveTech. All rights reserved. */ #ifndef _RTE_SPINLOCK_ARM_H_ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h index 39fce7b9..ce38350b 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -55,7 +55,7 @@ extern "C" { * Guarantees that the LOAD and STORE operations generated before the * barrier occur before the LOAD and STORE operations generated after. */ -#define rte_mb() {asm volatile("sync" : : : "memory"); } +#define rte_mb() asm volatile("sync" : : : "memory") /** * Write memory barrier. @@ -136,6 +136,12 @@ static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0; } +static inline uint16_t +rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val) +{ + return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST); +} + /*------------------------- 32 bit atomic operations -------------------------*/ static inline int @@ -237,6 +243,13 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) return ret == 0; } + +static inline uint32_t +rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val) +{ + return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST); +} + /*------------------------- 64 bit atomic operations -------------------------*/ static inline int @@ -431,7 +444,6 @@ static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) { return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); } - /** * Atomically set a 64-bit counter to 0. * @@ -442,6 +454,13 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v) { v->cnt = 0; } + +static inline uint64_t +rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val) +{ + return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST); +} + #endif #ifdef __cplusplus diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h index de8af19e..9fadc040 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: BSD-3-Clause + */ #ifndef _RTE_RWLOCK_PPC_64_H_ #define _RTE_RWLOCK_PPC_64_H_ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h index 5cfd3832..148398f5 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -104,6 +104,18 @@ rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) return res; } +static inline uint16_t +rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val) +{ + asm volatile( + MPLOCKED + "xchgw %0, %1;" + : "=r" (val), "=m" (*dst) + : "0" (val), "m" (*dst) + : "memory"); /* no-clobber list */ + return val; +} + static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) { return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); @@ -178,6 +190,18 @@ rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) return res; } +static inline uint32_t +rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val) +{ + asm volatile( + MPLOCKED + "xchgl %0, %1;" + : "=r" (val), "=m" (*dst) + : "0" (val), "m" (*dst) + : "memory"); /* no-clobber list */ + return val; +} + static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) { return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h index fb3abf18..a932f354 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -98,6 +98,18 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) return res; } +static inline uint64_t +rte_atomic64_exchange(volatile uint64_t *dest, uint64_t val) +{ + uint64_t old; + + do { + old = *dest; + } while (rte_atomic64_cmpset(dest, old, val) == 0); + + return old; +} + static inline void rte_atomic64_init(rte_atomic64_t *v) { diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h index 1a53a766..fd2ec9c5 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h @@ -71,6 +71,18 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) return res; } +static inline uint64_t +rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val) +{ + asm volatile( + MPLOCKED + "xchgq %0, %1;" + : "=r" (val), "=m" (*dst) + : "0" (val), "m" (*dst) + : "memory"); /* no-clobber list */ + return val; +} + static inline void rte_atomic64_init(rte_atomic64_t *v) { diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index cc140ecc..7b758094 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -52,7 +52,7 @@ rte_memcpy(void *dst, const void *src, size_t n); * Copy 16 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov16(uint8_t *dst, const uint8_t *src) { __m128i xmm0; @@ -65,7 +65,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src) * Copy 32 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov32(uint8_t *dst, const uint8_t *src) { __m256i ymm0; @@ -78,7 +78,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src) * Copy 64 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov64(uint8_t *dst, const uint8_t *src) { __m512i zmm0; @@ -91,7 +91,7 @@ rte_mov64(uint8_t *dst, const uint8_t *src) * Copy 128 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov128(uint8_t *dst, const uint8_t *src) { rte_mov64(dst + 0 * 64, src + 0 * 64); @@ -102,7 +102,7 @@ rte_mov128(uint8_t *dst, const uint8_t *src) * Copy 256 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov256(uint8_t *dst, const uint8_t *src) { rte_mov64(dst + 0 * 64, src + 0 * 64); @@ -293,7 +293,7 @@ COPY_BLOCK_128_BACK63: * Copy 16 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov16(uint8_t *dst, const uint8_t *src) { __m128i xmm0; @@ -306,7 +306,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src) * Copy 32 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov32(uint8_t *dst, const uint8_t *src) { __m256i ymm0; @@ -319,7 +319,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src) * Copy 64 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov64(uint8_t *dst, const uint8_t *src) { rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); @@ -486,7 +486,7 @@ COPY_BLOCK_128_BACK31: * Copy 16 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov16(uint8_t *dst, const uint8_t *src) { __m128i xmm0; @@ -499,7 +499,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src) * Copy 32 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov32(uint8_t *dst, const uint8_t *src) { rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); @@ -510,7 +510,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src) * Copy 64 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov64(uint8_t *dst, const uint8_t *src) { rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); @@ -574,7 +574,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src) */ #define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \ __extension__ ({ \ - int tmp; \ + size_t tmp; \ while (len >= 128 + 16 - offset) { \ xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ len -= 128; \ diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h index 4b16887e..60321da0 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h +++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h @@ -76,10 +76,12 @@ static inline int rte_tm_supported(void) static inline int rte_try_tm(volatile int *lock) { + int retries; + if (!rte_rtm_supported) return 0; - int retries = RTE_RTM_MAX_RETRIES; + retries = RTE_RTM_MAX_RETRIES; while (likely(retries--)) { diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h index 50e1b8a4..b99ba468 100644 --- a/lib/librte_eal/common/include/generic/rte_atomic.h +++ b/lib/librte_eal/common/include/generic/rte_atomic.h @@ -190,6 +190,36 @@ rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) } #endif +/** + * Atomic exchange. + * + * (atomic) equivalent to: + * ret = *dst + * *dst = val; + * return ret; + * + * @param dst + * The destination location into which the value will be written. + * @param val + * The new value. + * @return + * The original value at that location + */ +static inline uint16_t +rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val); + +#ifdef RTE_FORCE_INTRINSICS +static inline uint16_t +rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val) +{ +#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG) + return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST); +#else + return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST); +#endif +} +#endif + /** * The atomic counter structure. */ @@ -443,6 +473,36 @@ rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) } #endif +/** + * Atomic exchange. + * + * (atomic) equivalent to: + * ret = *dst + * *dst = val; + * return ret; + * + * @param dst + * The destination location into which the value will be written. + * @param val + * The new value. + * @return + * The original value at that location + */ +static inline uint32_t +rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val); + +#ifdef RTE_FORCE_INTRINSICS +static inline uint32_t +rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val) +{ +#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG) + return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST); +#else + return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST); +#endif +} +#endif + /** * The atomic counter structure. */ @@ -695,6 +755,36 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) } #endif +/** + * Atomic exchange. + * + * (atomic) equivalent to: + * ret = *dst + * *dst = val; + * return ret; + * + * @param dst + * The destination location into which the value will be written. + * @param val + * The new value. + * @return + * The original value at that location + */ +static inline uint64_t +rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val); + +#ifdef RTE_FORCE_INTRINSICS +static inline uint64_t +rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val) +{ +#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG) + return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST); +#else + return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST); +#endif +} +#endif + /** * The atomic counter structure. */ diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h index 9bed85cc..7d9a1463 100644 --- a/lib/librte_eal/common/include/generic/rte_byteorder.h +++ b/lib/librte_eal/common/include/generic/rte_byteorder.h @@ -123,7 +123,7 @@ typedef uint64_t rte_le64_t; /**< 64-bit little-endian value. */ static inline uint16_t rte_constant_bswap16(uint16_t x) { - return RTE_STATIC_BSWAP16(x); + return (uint16_t)RTE_STATIC_BSWAP16(x); } /* @@ -135,7 +135,7 @@ rte_constant_bswap16(uint16_t x) static inline uint32_t rte_constant_bswap32(uint32_t x) { - return RTE_STATIC_BSWAP32(x); + return (uint32_t)RTE_STATIC_BSWAP32(x); } /* @@ -147,7 +147,7 @@ rte_constant_bswap32(uint32_t x) static inline uint64_t rte_constant_bswap64(uint64_t x) { - return RTE_STATIC_BSWAP64(x); + return (uint64_t)RTE_STATIC_BSWAP64(x); } diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h index 8d31687d..156ea002 100644 --- a/lib/librte_eal/common/include/generic/rte_cpuflags.h +++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h @@ -64,4 +64,25 @@ rte_cpu_check_supported(void); int rte_cpu_is_supported(void); +/** + * This function attempts to retrieve a value from the auxiliary vector. + * If it is unsuccessful, the result will be 0, and errno will be set. + * + * @return A value from the auxiliary vector. When the value is 0, check + * errno to determine if an error occurred. + */ +unsigned long +rte_cpu_getauxval(unsigned long type); + +/** + * This function retrieves a value from the auxiliary vector, and compares it + * as a string against the value retrieved. + * + * @return The result of calling strcmp() against the value retrieved from + * the auxiliary vector. When the value is 0 (meaning a match is found), + * check errno to determine if an error occurred. + */ +int +rte_cpu_strcmp_auxval(unsigned long type, const char *str); + #endif /* _RTE_CPUFLAGS_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h index 899e9bc4..5751a0e6 100644 --- a/lib/librte_eal/common/include/generic/rte_rwlock.h +++ b/lib/librte_eal/common/include/generic/rte_rwlock.h @@ -71,7 +71,7 @@ rte_rwlock_read_lock(rte_rwlock_t *rwl) continue; } success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, - x, x + 1); + (uint32_t)x, (uint32_t)(x + 1)); } } @@ -107,7 +107,7 @@ rte_rwlock_write_lock(rte_rwlock_t *rwl) continue; } success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, - 0, -1); + 0, (uint32_t)-1); } } diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h index 7d4935fc..d9facc64 100644 --- a/lib/librte_eal/common/include/rte_bitmap.h +++ b/lib/librte_eal/common/include/rte_bitmap.h @@ -198,12 +198,12 @@ rte_bitmap_get_memory_footprint(uint32_t n_bits) { /** * Bitmap initialization * - * @param mem_size - * Minimum expected size of bitmap. + * @param n_bits + * Number of pre-allocated bits in array2. * @param mem * Base address of array1 and array2. - * @param n_bits - * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512. + * @param mem_size + * Minimum expected size of bitmap. * @return * Handle to bitmap instance. */ diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h index 6fb08341..b7b5b084 100644 --- a/lib/librte_eal/common/include/rte_bus.h +++ b/lib/librte_eal/common/include/rte_bus.h @@ -211,6 +211,7 @@ struct rte_bus { rte_bus_parse_t parse; /**< Parse a device name */ struct rte_bus_conf conf; /**< Bus configuration */ rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */ + rte_dev_iterate_t dev_iterate; /**< Device iterator. */ }; /** @@ -325,8 +326,7 @@ enum rte_iova_mode rte_bus_get_iommu_class(void); * The constructor has higher priority than PMD constructors. */ #define RTE_REGISTER_BUS(nm, bus) \ -RTE_INIT_PRIO(businitfn_ ##nm, 110); \ -static void businitfn_ ##nm(void) \ +RTE_INIT_PRIO(businitfn_ ##nm, BUS) \ {\ (bus).name = RTE_STR(nm);\ rte_bus_register(&bus); \ diff --git a/lib/librte_eal/common/include/rte_class.h b/lib/librte_eal/common/include/rte_class.h new file mode 100644 index 00000000..276c91e9 --- /dev/null +++ b/lib/librte_eal/common/include/rte_class.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 Gaëtan Rivet + */ + +#ifndef _RTE_CLASS_H_ +#define _RTE_CLASS_H_ + +/** + * @file + * + * DPDK device class interface. + * + * This file describes the interface of the device class + * abstraction layer. + * + * A device class defines the type of function a device + * will be used for e.g.: Ethernet adapter (eth), + * cryptographic coprocessor (crypto), etc. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include + +/** Double linked list of classes */ +TAILQ_HEAD(rte_class_list, rte_class); + +/** + * A structure describing a generic device class. + */ +struct rte_class { + TAILQ_ENTRY(rte_class) next; /**< Next device class in linked list */ + const char *name; /**< Name of the class */ + rte_dev_iterate_t dev_iterate; /**< Device iterator. */ +}; + +/** + * Class comparison function. + * + * @param cls + * Class under test. + * + * @param data + * Data to compare against. + * + * @return + * 0 if the class matches the data. + * !0 if the class does not match. + * <0 if ordering is possible and the class is lower than the data. + * >0 if ordering is possible and the class is greater than the data. + */ +typedef int (*rte_class_cmp_t)(const struct rte_class *cls, const void *data); + +/** + * Class iterator to find a particular class. + * + * This function compares each registered class to find one that matches + * the data passed as parameter. + * + * If the comparison function returns zero this function will stop iterating + * over any more classes. To continue a search the class of a previous search + * can be passed via the start parameter. + * + * @param start + * Starting point for the iteration. + * + * @param cmp + * Comparison function. + * + * @param data + * Data to pass to comparison function. + * + * @return + * A pointer to a rte_class structure or NULL in case no class matches + */ +__rte_experimental +struct rte_class * +rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp, + const void *data); + +/** + * Find the registered class for a given name. + */ +__rte_experimental +struct rte_class * +rte_class_find_by_name(const char *name); + +/** + * Register a Class handle. + * + * @param cls + * A pointer to a rte_class structure describing the class + * to be registered. + */ +__rte_experimental +void rte_class_register(struct rte_class *cls); + +/** + * Unregister a Class handle. + * + * @param cls + * A pointer to a rte_class structure describing the class + * to be unregistered. + */ +__rte_experimental +void rte_class_unregister(struct rte_class *cls); + +/** + * Helper for Class registration. + * The constructor has lower priority than Bus constructors. + * The constructor has higher priority than PMD constructors. + */ +#define RTE_REGISTER_CLASS(nm, cls) \ +RTE_INIT_PRIO(classinitfn_ ##nm, CLASS) \ +{\ + (cls).name = RTE_STR(nm); \ + rte_class_register(&cls); \ +} + +#define RTE_UNREGISTER_CLASS(nm, cls) \ +RTE_FINI_PRIO(classfinifn_ ##nm, CLASS) \ +{ \ + rte_class_unregister(&cls); \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CLASS_H_ */ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index c7803e41..069c13ec 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -81,6 +81,26 @@ typedef uint16_t unaligned_uint16_t; */ #define RTE_SET_USED(x) (void)(x) +#define RTE_PRIORITY_LOG 101 +#define RTE_PRIORITY_BUS 110 +#define RTE_PRIORITY_CLASS 120 +#define RTE_PRIORITY_LAST 65535 + +#define RTE_PRIO(prio) \ + RTE_PRIORITY_ ## prio + +/** + * Run function before main() with high priority. + * + * @param func + * Constructor function. + * @param prio + * Priority number must be above 100. + * Lowest number is the first to run. + */ +#define RTE_INIT_PRIO(func, prio) \ +static void __attribute__((constructor(RTE_PRIO(prio)), used)) func(void) + /** * Run function before main() with low priority. * @@ -90,19 +110,30 @@ typedef uint16_t unaligned_uint16_t; * Constructor function. */ #define RTE_INIT(func) \ -static void __attribute__((constructor, used)) func(void) + RTE_INIT_PRIO(func, LAST) /** - * Run function before main() with high priority. + * Run after main() with low priority. * * @param func - * Constructor function. + * Destructor function name. * @param prio * Priority number must be above 100. - * Lowest number is the first to run. + * Lowest number is the last to run. */ -#define RTE_INIT_PRIO(func, prio) \ -static void __attribute__((constructor(prio), used)) func(void) +#define RTE_FINI_PRIO(func, prio) \ +static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void) + +/** + * Run after main() with high priority. + * + * The destructor will be run *before* prioritized destructors. + * + * @param func + * Destructor function name. + */ +#define RTE_FINI(func) \ + RTE_FINI_PRIO(func, LAST) /** * Force a function to be inlined @@ -117,7 +148,7 @@ static void __attribute__((constructor(prio), used)) func(void) /*********** Macros for pointer arithmetic ********/ /** - * add a byte-value offset from a pointer + * add a byte-value offset to a pointer */ #define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x))) @@ -190,6 +221,22 @@ static void __attribute__((constructor(prio), used)) func(void) */ #define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align) +/** + * Macro to align a value to the multiple of given value. The resultant + * value will be of the same type as the first parameter and will be no lower + * than the first parameter. + */ +#define RTE_ALIGN_MUL_CEIL(v, mul) \ + (((v + (typeof(v))(mul) - 1) / ((typeof(v))(mul))) * (typeof(v))(mul)) + +/** + * Macro to align a value to the multiple of given value. The resultant + * value will be of the same type as the first parameter and will be no higher + * than the first parameter. + */ +#define RTE_ALIGN_MUL_FLOOR(v, mul) \ + ((v / ((typeof(v))(mul))) * (typeof(v))(mul)) + /** * Checks if a pointer is aligned to a given power-of-two value * @@ -223,8 +270,58 @@ extern int RTE_BUILD_BUG_ON_detected_error; } while(0) #endif +/** + * Combines 32b inputs most significant set bits into the least + * significant bits to construct a value with the same MSBs as x + * but all 1's under it. + * + * @param x + * The integer whose MSBs need to be combined with its LSBs + * @return + * The combined value. + */ +static inline uint32_t +rte_combine32ms1b(register uint32_t x) +{ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return x; +} + +/** + * Combines 64b inputs most significant set bits into the least + * significant bits to construct a value with the same MSBs as x + * but all 1's under it. + * + * @param v + * The integer whose MSBs need to be combined with its LSBs + * @return + * The combined value. + */ +static inline uint64_t +rte_combine64ms1b(register uint64_t v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + + return v; +} + /*********** Macros to work with powers of 2 ********/ +/** + * Macro to return 1 if n is a power of 2, 0 otherwise + */ +#define RTE_IS_POWER_OF_2(n) ((n) && !(((n) - 1) & (n))) + /** * Returns true if n is a power of 2 * @param n @@ -250,15 +347,28 @@ static inline uint32_t rte_align32pow2(uint32_t x) { x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; + x = rte_combine32ms1b(x); return x + 1; } +/** + * Aligns input parameter to the previous power of 2 + * + * @param x + * The integer value to algin + * + * @return + * Input parameter aligned to the previous power of 2 + */ +static inline uint32_t +rte_align32prevpow2(uint32_t x) +{ + x = rte_combine32ms1b(x); + + return x - (x >> 1); +} + /** * Aligns 64b input parameter to the next power of 2 * @@ -272,16 +382,28 @@ static inline uint64_t rte_align64pow2(uint64_t v) { v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v |= v >> 32; + v = rte_combine64ms1b(v); return v + 1; } +/** + * Aligns 64b input parameter to the previous power of 2 + * + * @param v + * The 64b value to align + * + * @return + * Input parameter aligned to the previous power of 2 + */ +static inline uint64_t +rte_align64prevpow2(uint64_t v) +{ + v = rte_combine64ms1b(v); + + return v - (v >> 1); +} + /*********** Macros for calculating min and max **********/ /** @@ -320,7 +442,7 @@ rte_align64pow2(uint64_t v) static inline uint32_t rte_bsf32(uint32_t v) { - return __builtin_ctz(v); + return (uint32_t)__builtin_ctz(v); } /** diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index b688f1ef..b80a8059 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -24,6 +24,25 @@ extern "C" { #include #include +/** + * The device event type. + */ +enum rte_dev_event_type { + RTE_DEV_EVENT_ADD, /**< device being added */ + RTE_DEV_EVENT_REMOVE, /**< device being removed */ + RTE_DEV_EVENT_MAX /**< max value of this enum */ +}; + +struct rte_dev_event { + enum rte_dev_event_type type; /**< device event type */ + int subsystem; /**< subsystem id */ + char *devname; /**< device name */ +}; + +typedef void (*rte_dev_event_cb_fn)(char *device_name, + enum rte_dev_event_type event, + void *cb_arg); + __attribute__((format(printf, 2, 0))) static inline void rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) @@ -32,24 +51,25 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) va_start(ap, fmt); - char buffer[vsnprintf(NULL, 0, fmt, ap) + 1]; + { + char buffer[vsnprintf(NULL, 0, fmt, ap) + 1]; - va_end(ap); + va_end(ap); - va_start(ap, fmt); - vsnprintf(buffer, sizeof(buffer), fmt, ap); - va_end(ap); + va_start(ap, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, ap); + va_end(ap); - rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer); + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", + func_name, buffer); + } } /* * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the * RTE_*_RET() macros defined below is compiled in debug mode. */ -#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \ - defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \ - defined(RTE_LIBRTE_EVENTDEV_DEBUG) +#if defined(RTE_LIBRTE_EVENTDEV_DEBUG) #define RTE_PMD_DEBUG_TRACE(...) \ rte_pmd_debug_trace(__func__, __VA_ARGS__) #else @@ -154,6 +174,7 @@ struct rte_device { * @return * 0 on success, negative on error. */ +__rte_deprecated int rte_eal_dev_attach(const char *name, const char *devargs); /** @@ -164,6 +185,7 @@ int rte_eal_dev_attach(const char *name, const char *devargs); * @return * 0 on success, negative on error. */ +__rte_deprecated int rte_eal_dev_detach(struct rte_device *dev); /** @@ -263,8 +285,179 @@ __attribute__((used)) = str static const char DRV_EXP_TAG(name, kmod_dep_export)[] \ __attribute__((used)) = str +/** + * Iteration context. + * + * This context carries over the current iteration state. + */ +struct rte_dev_iterator { + const char *dev_str; /**< device string. */ + const char *bus_str; /**< bus-related part of device string. */ + const char *cls_str; /**< class-related part of device string. */ + struct rte_bus *bus; /**< bus handle. */ + struct rte_class *cls; /**< class handle. */ + struct rte_device *device; /**< current position. */ + void *class_device; /**< additional specialized context. */ +}; + +/** + * Device iteration function. + * + * Find the next device matching properties passed in parameters. + * The function takes an additional ``start`` parameter, that is + * used as starting context when relevant. + * + * The function returns the current element in the iteration. + * This return value will potentially be used as a start parameter + * in subsequent calls to the function. + * + * The additional iterator parameter is only there if a specific + * implementation needs additional context. It must not be modified by + * the iteration function itself. + * + * @param start + * Starting iteration context. + * + * @param devstr + * Device description string. + * + * @param it + * Device iterator. + * + * @return + * The address of the current element matching the device description + * string. + */ +typedef void *(*rte_dev_iterate_t)(const void *start, + const char *devstr, + const struct rte_dev_iterator *it); + +/** + * Initializes a device iterator. + * + * This iterator allows accessing a list of devices matching a criteria. + * The device matching is made among all buses and classes currently registered, + * filtered by the device description given as parameter. + * + * This function will not allocate any memory. It is safe to stop the + * iteration at any moment and let the iterator go out of context. + * + * @param it + * Device iterator handle. + * + * @param str + * Device description string. + * + * @return + * 0 on successful initialization. + * <0 on error. + */ +__rte_experimental +int +rte_dev_iterator_init(struct rte_dev_iterator *it, const char *str); + +/** + * Iterates on a device iterator. + * + * Generates a new rte_device handle corresponding to the next element + * in the list described in comprehension by the iterator. + * + * The next object is returned, and the iterator is updated. + * + * @param it + * Device iterator handle. + * + * @return + * An rte_device handle if found. + * NULL if an error occurred (rte_errno is set). + * NULL if no device could be found (rte_errno is not set). + */ +__rte_experimental +struct rte_device * +rte_dev_iterator_next(struct rte_dev_iterator *it); + +#define RTE_DEV_FOREACH(dev, devstr, it) \ + for (rte_dev_iterator_init(it, devstr), \ + dev = rte_dev_iterator_next(it); \ + dev != NULL; \ + dev = rte_dev_iterator_next(it)) + #ifdef __cplusplus } #endif +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * It registers the callback for the specific device. + * Multiple callbacks cal be registered at the same time. + * + * @param device_name + * The device name, that is the param name of the struct rte_device, + * null value means for all devices. + * @param cb_fn + * callback address. + * @param cb_arg + * address of parameter for callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int __rte_experimental +rte_dev_event_callback_register(const char *device_name, + rte_dev_event_cb_fn cb_fn, + void *cb_arg); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * It unregisters the callback according to the specified device. + * + * @param device_name + * The device name, that is the param name of the struct rte_device, + * null value means for all devices and their callbacks. + * @param cb_fn + * callback address. + * @param cb_arg + * address of parameter for callback, (void *)-1 means to remove all + * registered which has the same callback address. + * + * @return + * - On success, return the number of callback entities removed. + * - On failure, a negative value. + */ +int __rte_experimental +rte_dev_event_callback_unregister(const char *device_name, + rte_dev_event_cb_fn cb_fn, + void *cb_arg); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Start the device event monitoring. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int __rte_experimental +rte_dev_event_monitor_start(void); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Stop the device event monitoring. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int __rte_experimental +rte_dev_event_monitor_stop(void); + #endif /* _RTE_DEV_H_ */ diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h index 84e5e23c..097a4ce7 100644 --- a/lib/librte_eal/common/include/rte_devargs.h +++ b/lib/librte_eal/common/include/rte_devargs.h @@ -51,21 +51,23 @@ struct rte_devargs { enum rte_devtype type; /** Device policy. */ enum rte_dev_policy policy; - /** Bus handle for the device. */ - struct rte_bus *bus; /** Name of the device. */ char name[RTE_DEV_NAME_MAX_LEN]; + RTE_STD_C11 + union { /** Arguments string as given by user or "" for no argument. */ - char *args; + char *args; + const char *drv_str; + }; + struct rte_bus *bus; /**< bus handle. */ + struct rte_class *cls; /**< class handle. */ + const char *bus_str; /**< bus-related part of device string. */ + const char *cls_str; /**< class-related part of device string. */ + const char *data; /**< Device string storage. */ }; -/** user device double-linked queue type definition */ -TAILQ_HEAD(rte_devargs_list, rte_devargs); - -/** Global list of user devices */ -extern struct rte_devargs_list devargs_list; - /** + * @deprecated * Parse a devargs string. * * For PCI devices, the format of arguments string is "PCI_ADDR" or @@ -90,6 +92,7 @@ extern struct rte_devargs_list devargs_list; * - 0 on success * - A negative value on error */ +__rte_deprecated int rte_eal_parse_devargs_str(const char *devargs_str, char **drvname, char **drvargs); @@ -100,18 +103,73 @@ int rte_eal_parse_devargs_str(const char *devargs_str, * in argument. Store which bus will handle the device, its name * and the eventual device parameters. * + * The syntax is: + * + * bus:device_identifier,arg1=val1,arg2=val2 + * + * where "bus:" is the bus name followed by any character separator. + * The bus name is optional. If no bus name is specified, each bus + * will attempt to recognize the device identifier. The first one + * to succeed will be used. + * + * Examples: + * + * pci:0000:05.00.0,arg=val + * 05.00.0,arg=val + * vdev:net_ring0 + * + * @param da + * The devargs structure holding the device information. + * * @param dev - * The device declaration string. + * String describing a device. + * + * @return + * - 0 on success. + * - Negative errno on error. + */ +__rte_experimental +int +rte_devargs_parse(struct rte_devargs *da, const char *dev); + +/** + * Parse a device string. + * + * Verify that a bus is capable of handling the device passed + * in argument. Store which bus will handle the device, its name + * and the eventual device parameters. + * + * The device string is built with a printf-like syntax. + * + * The syntax is: + * + * bus:device_identifier,arg1=val1,arg2=val2 + * + * where "bus:" is the bus name followed by any character separator. + * The bus name is optional. If no bus name is specified, each bus + * will attempt to recognize the device identifier. The first one + * to succeed will be used. + * + * Examples: + * + * pci:0000:05.00.0,arg=val + * 05.00.0,arg=val + * vdev:net_ring0 + * * @param da * The devargs structure holding the device information. + * @param format + * Format string describing a device. * * @return * - 0 on success. * - Negative errno on error. */ -int __rte_experimental -rte_eal_devargs_parse(const char *dev, - struct rte_devargs *da); +__rte_experimental +int +rte_devargs_parsef(struct rte_devargs *da, + const char *format, ...) +__attribute__((format(printf, 2, 0))); /** * Insert an rte_devargs in the global list. @@ -123,21 +181,30 @@ rte_eal_devargs_parse(const char *dev, * - 0 on success * - Negative on error. */ -int __rte_experimental -rte_eal_devargs_insert(struct rte_devargs *da); +__rte_experimental +int +rte_devargs_insert(struct rte_devargs *da); /** * Add a device to the user device list + * See rte_devargs_parse() for details. * - * For PCI devices, the format of arguments string is "PCI_ADDR" or - * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0", - * "04:00.0,arg=val". + * @param devtype + * The type of the device. + * @param devargs_str + * The arguments as given by the user. * - * For virtual devices, the format of arguments string is "DRIVER_NAME*" - * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring", - * "net_ring0", "net_pmdAnything,arg=0:arg2=1". The validity of the - * driver name is not checked by this function, it is done when probing - * the drivers. + * @return + * - 0 on success + * - A negative value on error + */ +__rte_experimental +int rte_devargs_add(enum rte_devtype devtype, const char *devargs_str); + +/** + * @deprecated + * Add a device to the user device list + * See rte_devargs_parse() for details. * * @param devtype * The type of the device. @@ -148,6 +215,7 @@ rte_eal_devargs_insert(struct rte_devargs *da); * - 0 on success * - A negative value on error */ +__rte_deprecated int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str); /** @@ -166,10 +234,25 @@ int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str); * <0 on error. * >0 if the devargs was not within the user device list. */ -int __rte_experimental rte_eal_devargs_remove(const char *busname, - const char *devname); +__rte_experimental +int rte_devargs_remove(const char *busname, + const char *devname); + +/** + * Count the number of user devices of a specified type + * + * @param devtype + * The type of the devices to counted. + * + * @return + * The number of devices. + */ +__rte_experimental +unsigned int +rte_devargs_type_count(enum rte_devtype devtype); /** + * @deprecated * Count the number of user devices of a specified type * * @param devtype @@ -178,6 +261,7 @@ int __rte_experimental rte_eal_devargs_remove(const char *busname, * @return * The number of devices. */ +__rte_deprecated unsigned int rte_eal_devargs_type_count(enum rte_devtype devtype); @@ -187,8 +271,47 @@ rte_eal_devargs_type_count(enum rte_devtype devtype); * @param f * A pointer to a file for output */ +__rte_experimental +void rte_devargs_dump(FILE *f); + +/** + * @deprecated + * This function dumps the list of user device and their arguments. + * + * @param f + * A pointer to a file for output + */ +__rte_deprecated void rte_eal_devargs_dump(FILE *f); +/** + * Find next rte_devargs matching the provided bus name. + * + * @param busname + * Limit the iteration to devargs related to buses + * matching this name. + * Will return any next rte_devargs if NULL. + * + * @param start + * Starting iteration point. The iteration will start at + * the first rte_devargs if NULL. + * + * @return + * Next rte_devargs entry matching the requested bus, + * NULL if there is none. + */ +__rte_experimental +struct rte_devargs * +rte_devargs_next(const char *busname, const struct rte_devargs *start); + +/** + * Iterate over all rte_devargs for a specific bus. + */ +#define RTE_EAL_DEVARGS_FOREACH(busname, da) \ + for (da = rte_devargs_next(busname, NULL); \ + da != NULL; \ + da = rte_devargs_next(busname, da)) \ + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index 044474e6..e114dcbd 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -57,6 +57,8 @@ enum rte_proc_type_t { struct rte_config { uint32_t master_lcore; /**< Id of the master lcore */ uint32_t lcore_count; /**< Number of available logical cores. */ + uint32_t numa_node_count; /**< Number of detected NUMA nodes. */ + uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */ uint32_t service_lcore_count;/**< Number of available service cores. */ enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */ @@ -229,6 +231,16 @@ struct rte_mp_reply { */ typedef int (*rte_mp_t)(const struct rte_mp_msg *msg, const void *peer); +/** + * Asynchronous reply function typedef used by other components. + * + * As we create socket channel for primary/secondary communication, use + * this function typedef to register action for coming responses to asynchronous + * requests. + */ +typedef int (*rte_mp_async_reply_t)(const struct rte_mp_msg *request, + const struct rte_mp_reply *reply); + /** * @warning * @b EXPERIMENTAL: this API may change without prior notice @@ -314,9 +326,35 @@ rte_mp_sendmsg(struct rte_mp_msg *msg); * - On failure, return -1, and the reason will be stored in rte_errno. */ int __rte_experimental -rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply, +rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply, const struct timespec *ts); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Send a request to the peer process and expect a reply in a separate callback. + * + * This function sends a request message to the peer process, and will not + * block. Instead, reply will be received in a separate callback. + * + * @param req + * The req argument contains the customized request message. + * + * @param ts + * The ts argument specifies how long we can wait for the peer(s) to reply. + * + * @param clb + * The callback to trigger when all responses for this request have arrived. + * + * @return + * - On success, return 0. + * - On failure, return -1, and the reason will be stored in rte_errno. + */ +int __rte_experimental +rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, + rte_mp_async_reply_t clb); + /** * @warning * @b EXPERIMENTAL: this API may change without prior notice @@ -452,25 +490,13 @@ static inline int rte_gettid(void) enum rte_iova_mode rte_eal_iova_mode(void); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Get user provided pool ops name for mbuf * * @return * returns user provided pool ops name. */ -const char * __rte_experimental -rte_eal_mbuf_user_pool_ops(void); - -/** - * Get default pool ops name for mbuf - * - * @return - * returns default pool ops name. - */ const char * -rte_eal_mbuf_default_mempool_ops(void); +rte_eal_mbuf_user_pool_ops(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_eal_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h index 3f792a97..6eb49327 100644 --- a/lib/librte_eal/common/include/rte_eal_interrupts.h +++ b/lib/librte_eal/common/include/rte_eal_interrupts.h @@ -34,6 +34,7 @@ enum rte_intr_handle_type { RTE_INTR_HANDLE_ALARM, /**< alarm handle */ RTE_INTR_HANDLE_EXT, /**< external handler */ RTE_INTR_HANDLE_VDEV, /**< virtual device */ + RTE_INTR_HANDLE_DEV_EVENT, /**< device event handle */ RTE_INTR_HANDLE_MAX /**< count of elements */ }; diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h index 29fa0b60..aff0688d 100644 --- a/lib/librte_eal/common/include/rte_eal_memconfig.h +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -12,11 +12,30 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { #endif +/** + * memseg list is a special case as we need to store a bunch of other data + * together with the array itself. + */ +struct rte_memseg_list { + RTE_STD_C11 + union { + void *base_va; + /**< Base virtual address for this memseg list. */ + uint64_t addr_64; + /**< Makes sure addr is always 64-bits */ + }; + int socket_id; /**< Socket ID for all memsegs in this list. */ + uint64_t page_sz; /**< Page size for all memsegs in this list. */ + volatile uint32_t version; /**< version number for multiprocess sync. */ + struct rte_fbarray memseg_arr; +}; + /** * the structure for the memory configuration for the RTE. * Used by the rte_config structure. It is separated out, as for multi-process @@ -40,11 +59,14 @@ struct rte_mem_config { rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */ rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */ - uint32_t memzone_cnt; /**< Number of allocated memzones */ + rte_rwlock_t memory_hotplug_lock; + /**< indicates whether memory hotplug request is in progress. */ /* memory segments and zones */ - struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */ - struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */ + struct rte_fbarray memzones; /**< Memzone descriptors. */ + + struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS]; + /**< list of dynamic arrays holding memsegs */ struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */ diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h new file mode 100644 index 00000000..5d880551 --- /dev/null +++ b/lib/librte_eal/common/include/rte_fbarray.h @@ -0,0 +1,470 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#ifndef RTE_FBARRAY_H +#define RTE_FBARRAY_H + +/** + * @file + * + * File-backed shared indexed array for DPDK. + * + * Basic workflow is expected to be the following: + * 1) Allocate array either using ``rte_fbarray_init()`` or + * ``rte_fbarray_attach()`` (depending on whether it's shared between + * multiple DPDK processes) + * 2) find free spots using ``rte_fbarray_find_next_free()`` + * 3) get pointer to data in the free spot using ``rte_fbarray_get()``, and + * copy data into the pointer (element size is fixed) + * 4) mark entry as used using ``rte_fbarray_set_used()`` + * + * Calls to ``rte_fbarray_init()`` and ``rte_fbarray_destroy()`` will have + * consequences for all processes, while calls to ``rte_fbarray_attach()`` and + * ``rte_fbarray_detach()`` will only have consequences within a single process. + * Therefore, it is safe to call ``rte_fbarray_attach()`` or + * ``rte_fbarray_detach()`` while another process is using ``rte_fbarray``, + * provided no other thread within the same process will try to use + * ``rte_fbarray`` before attaching or after detaching. It is not safe to call + * ``rte_fbarray_init()`` or ``rte_fbarray_destroy()`` while another thread or + * another process is using ``rte_fbarray``. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include +#include + +#define RTE_FBARRAY_NAME_LEN 64 + +struct rte_fbarray { + char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */ + unsigned int count; /**< number of entries stored */ + unsigned int len; /**< current length of the array */ + unsigned int elt_sz; /**< size of each element */ + void *data; /**< data pointer */ + rte_rwlock_t rwlock; /**< multiprocess lock */ +}; + +/** + * Set up ``rte_fbarray`` structure and allocate underlying resources. + * + * Call this function to correctly set up ``rte_fbarray`` and allocate + * underlying files that will be backing the data in the current process. Note + * that in order to use and share ``rte_fbarray`` between multiple processes, + * data pointed to by ``arr`` pointer must itself be allocated in shared memory. + * + * @param arr + * Valid pointer to allocated ``rte_fbarray`` structure. + * + * @param name + * Unique name to be assigned to this array. + * + * @param len + * Number of elements initially available in the array. + * + * @param elt_sz + * Size of each element. + * + * @return + * - 0 on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, + unsigned int elt_sz); + + +/** + * Attach to a file backing an already allocated and correctly set up + * ``rte_fbarray`` structure. + * + * Call this function to attach to file that will be backing the data in the + * current process. The structure must have been previously correctly set up + * with a call to ``rte_fbarray_init()``. Calls to ``rte_fbarray_attach()`` are + * usually meant to be performed in a multiprocessing scenario, with data + * pointed to by ``arr`` pointer allocated in shared memory. + * + * @param arr + * Valid pointer to allocated and correctly set up rte_fbarray structure. + * + * @return + * - 0 on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_attach(struct rte_fbarray *arr); + + +/** + * Deallocate resources for an already allocated and correctly set up + * ``rte_fbarray`` structure, and remove the underlying file. + * + * Call this function to deallocate all resources associated with an + * ``rte_fbarray`` structure within the current process. This will also + * zero-fill data pointed to by ``arr`` pointer and remove the underlying file + * backing the data, so it is expected that by the time this function is called, + * all other processes have detached from this ``rte_fbarray``. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @return + * - 0 on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_destroy(struct rte_fbarray *arr); + + +/** + * Deallocate resources for an already allocated and correctly set up + * ``rte_fbarray`` structure. + * + * Call this function to deallocate all resources associated with an + * ``rte_fbarray`` structure within current process. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @return + * - 0 on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_detach(struct rte_fbarray *arr); + + +/** + * Get pointer to element residing at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param idx + * Index of an element to get a pointer to. + * + * @return + * - non-NULL pointer on success. + * - NULL on failure, with ``rte_errno`` indicating reason for failure. + */ +void * __rte_experimental +rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx); + + +/** + * Find index of a specified element within the array. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param elt + * Pointer to element to find index to. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt); + + +/** + * Mark specified element as used. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param idx + * Element index to mark as used. + * + * @return + * - 0 on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx); + + +/** + * Mark specified element as free. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param idx + * Element index to mark as free. + * + * @return + * - 0 on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx); + + +/** + * Check whether element at specified index is marked as used. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param idx + * Element index to check as used. + * + * @return + * - 1 if element is used. + * - 0 if element is unused. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx); + + +/** + * Find index of next free element, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start); + + +/** + * Find index of next used element, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start); + + +/** + * Find index of next chunk of ``n`` free elements, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @param n + * Number of free elements to look for. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start, + unsigned int n); + + +/** + * Find index of next chunk of ``n`` used elements, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @param n + * Number of used elements to look for. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start, + unsigned int n); + + +/** + * Find how many more free entries there are, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_contig_free(struct rte_fbarray *arr, + unsigned int start); + + +/** + * Find how many more used entries there are, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start); + +/** + * Find index of previous free element, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start); + + +/** + * Find index of previous used element, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start); + + +/** + * Find lowest start index of chunk of ``n`` free elements, down from specified + * index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @param n + * Number of free elements to look for. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start, + unsigned int n); + + +/** + * Find lowest start index of chunk of ``n`` used elements, down from specified + * index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @param n + * Number of used elements to look for. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start, + unsigned int n); + + +/** + * Find how many more free entries there are before specified index (like + * ``rte_fbarray_find_contig_free`` but going in reverse). + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, + unsigned int start); + + +/** + * Find how many more used entries there are before specified index (like + * ``rte_fbarray_find_contig_used`` but going in reverse). + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start); + + +/** + * Dump ``rte_fbarray`` metadata. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param f + * File object to dump information into. + */ +void __rte_experimental +rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FBARRAY_H */ diff --git a/lib/librte_eal/common/include/rte_hypervisor.h b/lib/librte_eal/common/include/rte_hypervisor.h index 8d8aac74..5fe719c1 100644 --- a/lib/librte_eal/common/include/rte_hypervisor.h +++ b/lib/librte_eal/common/include/rte_hypervisor.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2017 Mellanox Technologies, Ltd. + * Copyright 2017 Mellanox Technologies, Ltd */ #ifndef RTE_HYPERVISOR_H diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h index 04722203..6e09d918 100644 --- a/lib/librte_eal/common/include/rte_lcore.h +++ b/lib/librte_eal/common/include/rte_lcore.h @@ -119,7 +119,7 @@ rte_lcore_index(int lcore_id) if (lcore_id >= RTE_MAX_LCORE) return -1; if (lcore_id < 0) - lcore_id = rte_lcore_id(); + lcore_id = (int)rte_lcore_id(); return lcore_config[lcore_id].core_index; } @@ -131,6 +131,36 @@ rte_lcore_index(int lcore_id) */ unsigned rte_socket_id(void); +/** + * Return number of physical sockets detected on the system. + * + * Note that number of nodes may not be correspondent to their physical id's: + * for example, a system may report two socket id's, but the actual socket id's + * may be 0 and 8. + * + * @return + * the number of physical sockets as recognized by EAL + */ +unsigned int __rte_experimental +rte_socket_count(void); + +/** + * Return socket id with a particular index. + * + * This will return socket id at a particular position in list of all detected + * physical socket id's. For example, on a machine with sockets [0, 8], passing + * 1 as a parameter will return 8. + * + * @param idx + * index of physical socket id to return + * + * @return + * - physical socket id as recognized by EAL + * - -1 on error, with errno set to EINVAL + */ +int __rte_experimental +rte_socket_id_by_idx(unsigned int idx); + /** * Get the ID of the physical socket of the specified lcore * @@ -246,6 +276,32 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp); */ int rte_thread_setname(pthread_t id, const char *name); +/** + * Create a control thread. + * + * Wrapper to pthread_create(), pthread_setname_np() and + * pthread_setaffinity_np(). The dataplane and service lcores are + * excluded from the affinity of the new thread. + * + * @param thread + * Filled with the thread id of the new created thread. + * @param name + * The name of the control thread (max 16 characters including '\0'). + * @param attr + * Attributes for the new thread. + * @param start_routine + * Function to be executed by the new thread. + * @param arg + * Argument passed to start_routine. + * @return + * On success, returns 0; on error, it returns a negative value + * corresponding to the error number. + */ +__rte_experimental int +rte_ctrl_thread_create(pthread_t *thread, const char *name, + const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg); + /** * Test if the core supplied has a specific role * @@ -255,7 +311,7 @@ int rte_thread_setname(pthread_t id, const char *name); * @param role * The role to be checked against. * @return - * On success, return 0; otherwise return a negative value. + * Boolean value: positive if test is true; otherwise returns 0. */ int rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role); diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index 9029c785..2f789cb9 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -20,6 +20,7 @@ extern "C" { #include #include #include +#include #include #include @@ -129,16 +130,28 @@ uint32_t rte_log_get_global_level(void); int rte_log_get_level(uint32_t logtype); /** - * Set the log level for a given type. + * Set the log level for a given type based on shell pattern. * * @param pattern - * The regexp identifying the log type. + * The match pattern identifying the log type. + * @param level + * The level to be set. + * @return + * 0 on success, a negative value if level is invalid. + */ +int rte_log_set_level_pattern(const char *pattern, uint32_t level); + +/** + * Set the log level for a given type based on regular expression. + * + * @param regex + * The regular expression identifying the log type. * @param level * The level to be set. * @return * 0 on success, a negative value if level is invalid. */ -int rte_log_set_level_regexp(const char *pattern, uint32_t level); +int rte_log_set_level_regexp(const char *regex, uint32_t level); /** * Set the log level for a given type. @@ -194,6 +207,27 @@ int rte_log_cur_msg_logtype(void); */ int rte_log_register(const char *name); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Register a dynamic log type and try to pick its level from EAL options + * + * rte_log_register() is called inside. If successful, the function tries + * to search for matching regexp in the list of EAL log level options and + * pick the level from the last matching entry. If nothing can be applied + * from the list, the level will be set to the user-defined default value. + * + * @param name + * Name for the log type to be registered + * @param level_def + * Fallback level to be set if the global list has no matching options + * @return + * - >=0: the newly registered log type + * - <0: rte_log_register() error value + */ +int rte_log_register_type_and_pick_level(const char *name, uint32_t level_def); + /** * Dump log information. * diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h index f02a8ba1..a9fb7e45 100644 --- a/lib/librte_eal/common/include/rte_malloc.h +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -13,6 +13,7 @@ #include #include +#include #include #ifdef __cplusplus @@ -277,6 +278,15 @@ rte_malloc_get_socket_stats(int socket, void rte_malloc_dump_stats(FILE *f, const char *type); +/** + * Dump contents of all malloc heaps to a file. + * + * @param f + * A pointer to a file for output + */ +void __rte_experimental +rte_malloc_dump_heaps(FILE *f); + /** * Set the maximum amount of allocated memory for this type. * diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h index ba99ed90..d43fa909 100644 --- a/lib/librte_eal/common/include/rte_malloc_heap.h +++ b/lib/librte_eal/common/include/rte_malloc_heap.h @@ -13,12 +13,18 @@ /* Number of free lists per heap, grouped by size. */ #define RTE_HEAP_NUM_FREELISTS 13 +/* dummy definition, for pointers */ +struct malloc_elem; + /** * Structure to hold malloc heap */ struct malloc_heap { rte_spinlock_t lock; LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS]; + struct malloc_elem *volatile first; + struct malloc_elem *volatile last; + unsigned alloc_count; size_t total_size; } __rte_cache_aligned; diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 302f865b..c4b7f4cf 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -20,8 +20,12 @@ extern "C" { #endif #include +#include #include +/* forward declaration for pointers */ +struct rte_memseg_list; + __extension__ enum rte_page_sizes { RTE_PGSIZE_4K = 1ULL << 12, @@ -79,6 +83,8 @@ typedef uint64_t rte_iova_t; /** * Physical memory segment descriptor. */ +#define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0) +/**< Prevent this segment from being freed back to the OS. */ struct rte_memseg { RTE_STD_C11 union { @@ -95,6 +101,7 @@ struct rte_memseg { int32_t socket_id; /**< NUMA socket ID. */ uint32_t nchannel; /**< Number of channels. */ uint32_t nrank; /**< Number of ranks. */ + uint32_t flags; /**< Memseg-specific flags */ } __rte_packed; /** @@ -130,25 +137,192 @@ phys_addr_t rte_mem_virt2phy(const void *virt); rte_iova_t rte_mem_virt2iova(const void *virt); /** - * Get the layout of the available physical memory. + * Get virtual memory address corresponding to iova address. + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @param iova + * The iova address. + * @return + * Virtual address corresponding to iova address (or NULL if address does not + * exist within DPDK memory map). + */ +__rte_experimental void * +rte_mem_iova2virt(rte_iova_t iova); + +/** + * Get memseg to which a particular virtual address belongs. + * + * @param virt + * The virtual address. + * @param msl + * The memseg list in which to look up based on ``virt`` address + * (can be NULL). + * @return + * Memseg pointer on success, or NULL on error. + */ +__rte_experimental struct rte_memseg * +rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl); + +/** + * Get memseg list corresponding to virtual memory address. + * + * @param virt + * The virtual address. + * @return + * Memseg list to which this virtual address belongs to. + */ +__rte_experimental struct rte_memseg_list * +rte_mem_virt2memseg_list(const void *virt); + +/** + * Memseg walk function prototype. + * + * Returning 0 will continue walk + * Returning 1 will stop the walk + * Returning -1 will stop the walk and report error + */ +typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl, + const struct rte_memseg *ms, void *arg); + +/** + * Memseg contig walk function prototype. This will trigger a callback on every + * VA-contiguous are starting at memseg ``ms``, so total valid VA space at each + * callback call will be [``ms->addr``, ``ms->addr + len``). + * + * Returning 0 will continue walk + * Returning 1 will stop the walk + * Returning -1 will stop the walk and report error + */ +typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl, + const struct rte_memseg *ms, size_t len, void *arg); + +/** + * Memseg list walk function prototype. This will trigger a callback on every + * allocated memseg list. + * + * Returning 0 will continue walk + * Returning 1 will stop the walk + * Returning -1 will stop the walk and report error + */ +typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl, + void *arg); + +/** + * Walk list of all memsegs. + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_walk(rte_memseg_walk_t func, void *arg); + +/** + * Walk each VA-contiguous area. + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg); + +/** + * Walk each allocated memseg list. + * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); + +/** + * Walk list of all memsegs without performing any locking. + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg); + +/** + * Walk each VA-contiguous area without performing any locking. * - * It can be useful for an application to have the full physical - * memory layout to decide the size of a memory zone to reserve. This - * table is stored in rte_config (see rte_eal_get_configuration()). + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator * @return - * - On success, return a pointer to a read-only table of struct - * rte_physmem_desc elements, containing the layout of all - * addressable physical memory. The last element of the table - * contains a NULL address. - * - On error, return NULL. This should not happen since it is a fatal - * error that will probably cause the entire system to panic. + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error */ -const struct rte_memseg *rte_eal_get_physmem_layout(void); +int __rte_experimental +rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg); + +/** + * Walk each allocated memseg list without performing any locking. + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg); /** * Dump the physical memory layout to a file. * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * * @param f * A pointer to a file for output */ @@ -157,6 +331,9 @@ void rte_dump_physmem_layout(FILE *f); /** * Get the total amount of available physical memory. * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * * @return * The total amount of available physical memory in bytes. */ @@ -191,6 +368,137 @@ unsigned rte_memory_get_nrank(void); */ int rte_eal_using_phys_addrs(void); + +/** + * Enum indicating which kind of memory event has happened. Used by callbacks to + * distinguish between memory allocations and deallocations. + */ +enum rte_mem_event { + RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */ + RTE_MEM_EVENT_FREE, /**< Deallocation event. */ +}; +#define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64 +/**< maximum length of callback name */ + +/** + * Function typedef used to register callbacks for memory events. + */ +typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type, + const void *addr, size_t len, void *arg); + +/** + * Function used to register callbacks for memory events. + * + * @note callbacks will happen while memory hotplug subsystem is write-locked, + * therefore some functions (e.g. `rte_memseg_walk()`) will cause a + * deadlock when called from within such callbacks. + * + * @note mem event callbacks not being supported is an expected error condition, + * so user code needs to handle this situation. In these cases, return + * value will be -1, and rte_errno will be set to ENOTSUP. + * + * @param name + * Name associated with specified callback to be added to the list. + * + * @param clb + * Callback function pointer. + * + * @param arg + * Argument to pass to the callback. + * + * @return + * 0 on successful callback register + * -1 on unsuccessful callback register, with rte_errno value indicating + * reason for failure. + */ +int __rte_experimental +rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb, + void *arg); + +/** + * Function used to unregister callbacks for memory events. + * + * @param name + * Name associated with specified callback to be removed from the list. + * + * @param arg + * Argument to look for among callbacks with specified callback name. + * + * @return + * 0 on successful callback unregister + * -1 on unsuccessful callback unregister, with rte_errno value indicating + * reason for failure. + */ +int __rte_experimental +rte_mem_event_callback_unregister(const char *name, void *arg); + + +#define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64 +/**< maximum length of alloc validator name */ +/** + * Function typedef used to register memory allocation validation callbacks. + * + * Returning 0 will allow allocation attempt to continue. Returning -1 will + * prevent allocation from succeeding. + */ +typedef int (*rte_mem_alloc_validator_t)(int socket_id, + size_t cur_limit, size_t new_len); + +/** + * @brief Register validator callback for memory allocations. + * + * Callbacks registered by this function will be called right before memory + * allocator is about to trigger allocation of more pages from the system if + * said allocation will bring total memory usage above specified limit on + * specified socket. User will be able to cancel pending allocation if callback + * returns -1. + * + * @note callbacks will happen while memory hotplug subsystem is write-locked, + * therefore some functions (e.g. `rte_memseg_walk()`) will cause a + * deadlock when called from within such callbacks. + * + * @note validator callbacks not being supported is an expected error condition, + * so user code needs to handle this situation. In these cases, return + * value will be -1, and rte_errno will be set to ENOTSUP. + * + * @param name + * Name associated with specified callback to be added to the list. + * + * @param clb + * Callback function pointer. + * + * @param socket_id + * Socket ID on which to watch for allocations. + * + * @param limit + * Limit above which to trigger callbacks. + * + * @return + * 0 on successful callback register + * -1 on unsuccessful callback register, with rte_errno value indicating + * reason for failure. + */ +int __rte_experimental +rte_mem_alloc_validator_register(const char *name, + rte_mem_alloc_validator_t clb, int socket_id, size_t limit); + +/** + * @brief Unregister validator callback for memory allocations. + * + * @param name + * Name associated with specified callback to be removed from the list. + * + * @param socket_id + * Socket ID on which to watch for allocations. + * + * @return + * 0 on successful callback unregister + * -1 on unsuccessful callback unregister, with rte_errno value indicating + * reason for failure. + */ +int __rte_experimental +rte_mem_alloc_validator_unregister(const char *name, int socket_id); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h index 2bfb2731..f478fa9e 100644 --- a/lib/librte_eal/common/include/rte_memzone.h +++ b/lib/librte_eal/common/include/rte_memzone.h @@ -23,6 +23,7 @@ */ #include +#include #include #include @@ -39,6 +40,7 @@ extern "C" { #define RTE_MEMZONE_512MB 0x00040000 /**< Use 512MB pages. */ #define RTE_MEMZONE_4GB 0x00080000 /**< Use 4GB pages. */ #define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004 /**< Use available page size */ +#define RTE_MEMZONE_IOVA_CONTIG 0x00100000 /**< Ask for IOVA-contiguous memzone. */ /** * A structure describing a memzone, which is a contiguous portion of @@ -66,7 +68,6 @@ struct rte_memzone { int32_t socket_id; /**< NUMA socket ID. */ uint32_t flags; /**< Characteristics of this memzone. */ - uint32_t memseg_id; /**< Memseg it belongs. */ } __attribute__((__packed__)); /** @@ -76,6 +77,17 @@ struct rte_memzone { * correctly filled memzone descriptor. If the allocation cannot be * done, return NULL. * + * @note Reserving memzones with len set to 0 will only attempt to allocate + * memzones from memory that is already available. It will not trigger any + * new allocations. + * + * @note: When reserving memzones with len set to 0, it is preferable to also + * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but + * will likely not yield expected results. Specifically, the resulting memzone + * may not necessarily be the biggest memzone available, but rather biggest + * memzone available on socket id corresponding to an lcore from which + * reservation was called. + * * @param name * The name of the memzone. If it already exists, the function will * fail and return NULL. @@ -102,6 +114,9 @@ struct rte_memzone { * If this flag is not set, the function * will return error on an unavailable size * request. + * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous. + * This option should be used when allocating + * memory intended for hardware rings etc. * @return * A pointer to a correctly-filled read-only memzone descriptor, or NULL * on error. @@ -126,6 +141,17 @@ const struct rte_memzone *rte_memzone_reserve(const char *name, * descriptor. If the allocation cannot be done or if the alignment * is not a power of 2, returns NULL. * + * @note Reserving memzones with len set to 0 will only attempt to allocate + * memzones from memory that is already available. It will not trigger any + * new allocations. + * + * @note: When reserving memzones with len set to 0, it is preferable to also + * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but + * will likely not yield expected results. Specifically, the resulting memzone + * may not necessarily be the biggest memzone available, but rather biggest + * memzone available on socket id corresponding to an lcore from which + * reservation was called. + * * @param name * The name of the memzone. If it already exists, the function will * fail and return NULL. @@ -152,6 +178,9 @@ const struct rte_memzone *rte_memzone_reserve(const char *name, * If this flag is not set, the function * will return error on an unavailable size * request. + * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous. + * This option should be used when allocating + * memory intended for hardware rings etc. * @param align * Alignment for resulting memzone. Must be a power of 2. * @return @@ -181,6 +210,17 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name, * boundary. That implies that requested length should be less or equal * then boundary. * + * @note Reserving memzones with len set to 0 will only attempt to allocate + * memzones from memory that is already available. It will not trigger any + * new allocations. + * + * @note: When reserving memzones with len set to 0, it is preferable to also + * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but + * will likely not yield expected results. Specifically, the resulting memzone + * may not necessarily be the biggest memzone available, but rather biggest + * memzone available on socket id corresponding to an lcore from which + * reservation was called. + * * @param name * The name of the memzone. If it already exists, the function will * fail and return NULL. @@ -207,6 +247,9 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name, * If this flag is not set, the function * will return error on an unavailable size * request. + * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous. + * This option should be used when allocating + * memory intended for hardware rings etc. * @param align * Alignment for resulting memzone. Must be a power of 2. * @param bound diff --git a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h index 08222510..e12c2208 100644 --- a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h +++ b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h @@ -1,59 +1,5 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) + * Copyright(c) 2010-2014 Intel Corporation */ #ifndef _RTE_PCI_DEV_DEFS_H_ diff --git a/lib/librte_eal/common/include/rte_pci_dev_features.h b/lib/librte_eal/common/include/rte_pci_dev_features.h index 67b986a6..6104123d 100644 --- a/lib/librte_eal/common/include/rte_pci_dev_features.h +++ b/lib/librte_eal/common/include/rte_pci_dev_features.h @@ -1,59 +1,5 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) + * Copyright(c) 2010-2014 Intel Corporation */ #ifndef _RTE_PCI_DEV_FEATURES_H diff --git a/lib/librte_eal/common/include/rte_random.h b/lib/librte_eal/common/include/rte_random.h index 63bb2808..b2ca1c20 100644 --- a/lib/librte_eal/common/include/rte_random.h +++ b/lib/librte_eal/common/include/rte_random.h @@ -31,7 +31,7 @@ extern "C" { static inline void rte_srand(uint64_t seedval) { - srand48((long unsigned int)seedval); + srand48((long)seedval); } /** @@ -48,9 +48,9 @@ static inline uint64_t rte_rand(void) { uint64_t val; - val = lrand48(); + val = (uint64_t)lrand48(); val <<= 32; - val += lrand48(); + val += (uint64_t)lrand48(); return val; } diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h index 211eb376..34b41aff 100644 --- a/lib/librte_eal/common/include/rte_service.h +++ b/lib/librte_eal/common/include/rte_service.h @@ -47,9 +47,6 @@ extern "C" { #define RTE_SERVICE_CAP_MT_SAFE (1 << 0) /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Return the number of services registered. * * The number of services registered can be passed to *rte_service_get_by_id*, @@ -57,12 +54,9 @@ extern "C" { * * @return The number of services registered. */ -uint32_t __rte_experimental rte_service_get_count(void); +uint32_t rte_service_get_count(void); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Return the id of a service by name. * * This function provides the id of the service using the service name as @@ -84,24 +78,17 @@ uint32_t __rte_experimental rte_service_get_count(void); * @retval -EINVAL Null *service_id* pointer provided * @retval -ENODEV No such service registered */ -int32_t __rte_experimental rte_service_get_by_name(const char *name, - uint32_t *service_id); +int32_t rte_service_get_by_name(const char *name, uint32_t *service_id); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Return the name of the service. * * @return A pointer to the name of the service. The returned pointer remains * in ownership of the service, and the application must not free it. */ -const char __rte_experimental *rte_service_get_name(uint32_t id); +const char *rte_service_get_name(uint32_t id); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Check if a service has a specific capability. * * This function returns if *service* has implements *capability*. @@ -109,13 +96,9 @@ const char __rte_experimental *rte_service_get_name(uint32_t id); * @retval 1 Capability supported by this service instance * @retval 0 Capability not supported by this service instance */ -int32_t __rte_experimental rte_service_probe_capability(uint32_t id, - uint32_t capability); +int32_t rte_service_probe_capability(uint32_t id, uint32_t capability); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Map or unmap a lcore to a service. * * Each core can be added or removed from running a specific service. This @@ -134,13 +117,10 @@ int32_t __rte_experimental rte_service_probe_capability(uint32_t id, * @retval 0 lcore map updated successfully * @retval -EINVAL An invalid service or lcore was provided. */ -int32_t __rte_experimental rte_service_map_lcore_set(uint32_t service_id, - uint32_t lcore, uint32_t enable); +int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore, + uint32_t enable); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Retrieve the mapping of an lcore to a service. * * @param service_id the service to apply the lcore to @@ -150,13 +130,9 @@ int32_t __rte_experimental rte_service_map_lcore_set(uint32_t service_id, * @retval 0 lcore is not mapped to service * @retval -EINVAL An invalid service or lcore was provided. */ -int32_t __rte_experimental rte_service_map_lcore_get(uint32_t service_id, - uint32_t lcore); +int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Set the runstate of the service. * * Each service is either running or stopped. Setting a non-zero runstate @@ -168,12 +144,9 @@ int32_t __rte_experimental rte_service_map_lcore_get(uint32_t service_id, * @retval 0 The service was successfully started * @retval -EINVAL Invalid service id */ -int32_t __rte_experimental rte_service_runstate_set(uint32_t id, uint32_t runstate); +int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Get the runstate for the service with *id*. See *rte_service_runstate_set* * for details of runstates. A service can call this function to ensure that * the application has indicated that it will receive CPU cycles. Either a @@ -186,12 +159,29 @@ int32_t __rte_experimental rte_service_runstate_set(uint32_t id, uint32_t runsta * @retval 0 Service is stopped * @retval -EINVAL Invalid service id */ -int32_t __rte_experimental rte_service_runstate_get(uint32_t id); +int32_t rte_service_runstate_get(uint32_t id); /** * @warning - * @b EXPERIMENTAL: this API may change without prior notice + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * This function returns whether the service may be currently executing on + * at least one lcore, or definitely is not. This function can be used to + * determine if, after setting the service runstate to stopped, the service + * is still executing a service lcore. * + * Care must be taken if calling this function when the service runstate is + * running, since the result of this function may be incorrect by the time the + * function returns due to service cores running in parallel. + * + * @retval 1 Service may be running on one or more lcores + * @retval 0 Service is not running on any lcore + * @retval -EINVAL Invalid service id + */ +int32_t __rte_experimental +rte_service_may_be_active(uint32_t id); + +/** * Enable or disable the check for a service-core being mapped to the service. * An application can disable the check when takes the responsibility to run a * service itself using *rte_service_run_iter_on_app_lcore*. @@ -202,13 +192,9 @@ int32_t __rte_experimental rte_service_runstate_get(uint32_t id); * @retval 0 Success * @retval -EINVAL Invalid service ID */ -int32_t __rte_experimental rte_service_set_runstate_mapped_check(uint32_t id, - int32_t enable); +int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * This function runs a service callback from a non-service lcore. * * This function is designed to enable gradual porting to service cores, and @@ -241,13 +227,10 @@ int32_t __rte_experimental rte_service_set_runstate_mapped_check(uint32_t id, * @retval -ENOEXEC Service is not in a run-able state * @retval -EINVAL Invalid service id */ -int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id, +int32_t rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_multithread_unsafe); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Start a service core. * * Starting a core makes the core begin polling. Any services assigned to it @@ -259,12 +242,9 @@ int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id, * @retval -EINVAL Failed to start core. The *lcore_id* passed in is not * currently assigned to be a service core. */ -int32_t __rte_experimental rte_service_lcore_start(uint32_t lcore_id); +int32_t rte_service_lcore_start(uint32_t lcore_id); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Stop a service core. * * Stopping a core makes the core become idle, but remains assigned as a @@ -278,12 +258,9 @@ int32_t __rte_experimental rte_service_lcore_start(uint32_t lcore_id); * The application must stop the service first, and then stop the * lcore. */ -int32_t __rte_experimental rte_service_lcore_stop(uint32_t lcore_id); +int32_t rte_service_lcore_stop(uint32_t lcore_id); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Adds lcore to the list of service cores. * * This functions can be used at runtime in order to modify the service core @@ -294,12 +271,9 @@ int32_t __rte_experimental rte_service_lcore_stop(uint32_t lcore_id); * @retval -EALREADY lcore is already added to the service core list * @retval -EINVAL Invalid lcore provided */ -int32_t __rte_experimental rte_service_lcore_add(uint32_t lcore); +int32_t rte_service_lcore_add(uint32_t lcore); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Removes lcore from the list of service cores. * * This can fail if the core is not stopped, see *rte_service_core_stop*. @@ -308,12 +282,9 @@ int32_t __rte_experimental rte_service_lcore_add(uint32_t lcore); * @retval -EBUSY Lcore is not stopped, stop service core before removing. * @retval -EINVAL failed to add lcore to service core mask. */ -int32_t __rte_experimental rte_service_lcore_del(uint32_t lcore); +int32_t rte_service_lcore_del(uint32_t lcore); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Retrieve the number of service cores currently available. * * This function returns the integer count of service cores available. The @@ -325,24 +296,18 @@ int32_t __rte_experimental rte_service_lcore_del(uint32_t lcore); * * @return The number of service cores currently configured. */ -int32_t __rte_experimental rte_service_lcore_count(void); +int32_t rte_service_lcore_count(void); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Resets all service core mappings. This does not remove the service cores * from duty, just unmaps all services / cores, and stops() the service cores. * The runstate of services is not modified. * * @retval 0 Success */ -int32_t __rte_experimental rte_service_lcore_reset_all(void); +int32_t rte_service_lcore_reset_all(void); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Enable or disable statistics collection for *service*. * * This function enables per core, per-service cycle count collection. @@ -351,13 +316,9 @@ int32_t __rte_experimental rte_service_lcore_reset_all(void); * @retval 0 Success * @retval -EINVAL Invalid service pointer passed */ -int32_t __rte_experimental rte_service_set_stats_enable(uint32_t id, - int32_t enable); +int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Retrieve the list of currently enabled service cores. * * This function fills in an application supplied array, with each element @@ -373,12 +334,9 @@ int32_t __rte_experimental rte_service_set_stats_enable(uint32_t id, * service core list. No items have been populated, call this function * with a size of at least *rte_service_core_count* items. */ -int32_t __rte_experimental rte_service_lcore_list(uint32_t array[], uint32_t n); +int32_t rte_service_lcore_list(uint32_t array[], uint32_t n); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Get the numer of services running on the supplied lcore. * * @param lcore Id of the service core. @@ -386,19 +344,16 @@ int32_t __rte_experimental rte_service_lcore_list(uint32_t array[], uint32_t n); * @retval -EINVAL Invalid lcore provided * @retval -ENOTSUP The provided lcore is not a service core. */ -int32_t __rte_experimental rte_service_lcore_count_services(uint32_t lcore); +int32_t rte_service_lcore_count_services(uint32_t lcore); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Dumps any information available about the service. When id is UINT32_MAX, * this function dumps info for all services. * * @retval 0 Statistics have been successfully dumped * @retval -EINVAL Invalid service id provided */ -int32_t __rte_experimental rte_service_dump(FILE *f, uint32_t id); +int32_t rte_service_dump(FILE *f, uint32_t id); /** * Returns the number of cycles that this service has consumed @@ -411,28 +366,58 @@ int32_t __rte_experimental rte_service_dump(FILE *f, uint32_t id); #define RTE_SERVICE_ATTR_CALL_COUNT 1 /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Get an attribute from a service. * * @retval 0 Success, the attribute value has been written to *attr_value*. * -EINVAL Invalid id, attr_id or attr_value was NULL. */ -int32_t __rte_experimental rte_service_attr_get(uint32_t id, uint32_t attr_id, +int32_t rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value); +/** + * Reset all attribute values of a service. + * + * @param id The service to reset all statistics of + * @retval 0 Successfully reset attributes + * -EINVAL Invalid service id provided + */ +int32_t rte_service_attr_reset_all(uint32_t id); + +/** + * Returns the number of times the service runner has looped. + */ +#define RTE_SERVICE_LCORE_ATTR_LOOPS 0 + /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Reset all attribute values of a service. + * Get an attribute from a service core. * - * @param id The service to reset all statistics of + * @param lcore Id of the service core. + * @param attr_id Id of the attribute to be retrieved. + * @param [out] attr_value Pointer to storage in which to write retrieved value. + * @retval 0 Success, the attribute value has been written to *attr_value*. + * -EINVAL Invalid lcore, attr_id or attr_value was NULL. + * -ENOTSUP lcore is not a service core. + */ +int32_t __rte_experimental +rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id, + uint64_t *attr_value); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reset all attribute values of a service core. + * + * @param lcore The service core to reset all the statistics of * @retval 0 Successfully reset attributes * -EINVAL Invalid service id provided + * -ENOTSUP lcore is not a service core. */ -int32_t __rte_experimental rte_service_attr_reset_all(uint32_t id); +int32_t __rte_experimental +rte_service_lcore_attr_reset_all(uint32_t lcore); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h index 9ba4aa29..c12adbc2 100644 --- a/lib/librte_eal/common/include/rte_service_component.h +++ b/lib/librte_eal/common/include/rte_service_component.h @@ -13,17 +13,11 @@ #include /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Signature of callback function to run a service. */ typedef int32_t (*rte_service_func)(void *args); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * The specification of a service. * * This struct contains metadata about the service itself, the callback @@ -47,9 +41,6 @@ struct rte_service_spec { }; /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Register a new service. * * A service represents a component that the requires CPU time periodically to @@ -73,14 +64,10 @@ struct rte_service_spec { * -EINVAL Attempted to register an invalid service (eg, no callback * set) */ -int32_t __rte_experimental -rte_service_component_register(const struct rte_service_spec *spec, - uint32_t *service_id); +int32_t rte_service_component_register(const struct rte_service_spec *spec, + uint32_t *service_id); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Unregister a service component. * * The service being removed must be stopped before calling this function. @@ -89,12 +76,9 @@ rte_service_component_register(const struct rte_service_spec *spec, * @retval -EBUSY The service is currently running, stop the service before * calling unregister. No action has been taken. */ -int32_t __rte_experimental rte_service_component_unregister(uint32_t id); +int32_t rte_service_component_unregister(uint32_t id); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Private function to allow EAL to initialized default mappings. * * This function iterates all the services, and maps then to the available @@ -107,12 +91,9 @@ int32_t __rte_experimental rte_service_component_unregister(uint32_t id); * @retval -ENODEV Error in enabling service lcore on a service * @retval -ENOEXEC Error when starting services */ -int32_t __rte_experimental rte_service_start_with_defaults(void); +int32_t rte_service_start_with_defaults(void); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Set the backend runstate of a component. * * This function allows services to be registered at startup, but not yet @@ -124,13 +105,9 @@ int32_t __rte_experimental rte_service_start_with_defaults(void); * * @retval 0 Success */ -int32_t __rte_experimental rte_service_component_runstate_set(uint32_t id, - uint32_t runstate); +int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Initialize the service library. * * In order to use the service library, it must be initialized. EAL initializes @@ -142,14 +119,11 @@ int32_t __rte_experimental rte_service_component_runstate_set(uint32_t id, int32_t rte_service_init(void); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * @internal Free up the memory that has been initialized. * This routine is to be invoked prior to process termination. * * @retval None */ -void __rte_experimental rte_service_finalize(void); +void rte_service_finalize(void); #endif /* _RTE_SERVICE_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h index e97047a4..97597a14 100644 --- a/lib/librte_eal/common/include/rte_string_fns.h +++ b/lib/librte_eal/common/include/rte_string_fns.h @@ -15,6 +15,8 @@ extern "C" { #endif +#include + /** * Takes string "string" parameter and splits it at character "delim" * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like @@ -45,6 +47,35 @@ int rte_strsplit(char *string, int stringlen, char **tokens, int maxtokens, char delim); +/** + * @internal + * DPDK-specific version of strlcpy for systems without + * libc or libbsd copies of the function + */ +static inline size_t +rte_strlcpy(char *dst, const char *src, size_t size) +{ + return (size_t)snprintf(dst, size, "%s", src); +} + +/* pull in a strlcpy function */ +#ifdef RTE_EXEC_ENV_BSDAPP +#include +#ifndef __BSD_VISIBLE /* non-standard functions are hidden */ +#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size) +#endif + + +#else /* non-BSD platforms */ +#ifdef RTE_USE_LIBBSD +#include + +#else /* no BSD header files, create own */ +#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size) + +#endif /* RTE_USE_LIBBSD */ +#endif /* BSDAPP */ + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h index 8dccaefc..9b01abb2 100644 --- a/lib/librte_eal/common/include/rte_tailq.h +++ b/lib/librte_eal/common/include/rte_tailq.h @@ -119,8 +119,7 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name); int rte_eal_tailq_register(struct rte_tailq_elem *t); #define EAL_REGISTER_TAILQ(t) \ -RTE_INIT(tailqinitfn_ ##t); \ -static void tailqinitfn_ ##t(void) \ +RTE_INIT(tailqinitfn_ ##t) \ { \ if (rte_eal_tailq_register(&t) < 0) \ rte_panic("Cannot initialize tailq: %s\n", t.name); \ diff --git a/lib/librte_eal/common/include/rte_uuid.h b/lib/librte_eal/common/include/rte_uuid.h new file mode 100644 index 00000000..2c846b5f --- /dev/null +++ b/lib/librte_eal/common/include/rte_uuid.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 1996, 1997, 1998 Theodore Ts'o. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF + * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ +/** + * @file + * + * UUID related functions originally from libuuid + */ + +#ifndef _RTE_UUID_H_ +#define _RTE_UUID_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * Struct describing a Universal Unique Identifer + */ +typedef unsigned char rte_uuid_t[16]; + +/** + * Helper for defining UUID values for id tables. + */ +#define RTE_UUID_INIT(a, b, c, d, e) { \ + ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, \ + ((a) >> 8) & 0xff, (a) & 0xff, \ + ((b) >> 8) & 0xff, (b) & 0xff, \ + ((c) >> 8) & 0xff, (c) & 0xff, \ + ((d) >> 8) & 0xff, (d) & 0xff, \ + ((e) >> 40) & 0xff, ((e) >> 32) & 0xff, \ + ((e) >> 24) & 0xff, ((e) >> 16) & 0xff, \ + ((e) >> 8) & 0xff, (e) & 0xff \ +} + +/** + * Test if UUID is all zeros. + * + * @param uu + * The uuid to check. + * @return + * true if uuid is NULL value, false otherwise + */ +bool rte_uuid_is_null(const rte_uuid_t uu); + +/** + * Copy uuid. + * + * @param dst + * Destination uuid + * @param src + * Source uuid + */ +static inline void rte_uuid_copy(rte_uuid_t dst, const rte_uuid_t src) +{ + memcpy(dst, src, sizeof(rte_uuid_t)); +} + +/** + * Compare two UUID's + * + * @param a + * A UUID to compare + * @param b + * A UUID to compare + * @return + * returns an integer less than, equal to, or greater than zero if UUID a is + * is less than, equal, or greater than UUID b. + */ +int rte_uuid_compare(const rte_uuid_t a, const rte_uuid_t b); + +/** + * Extract UUID from string + * + * @param in + * Pointer to string of characters to convert + * @param uu + * Destination UUID + * @return + * Returns 0 on succes, and -1 if string is not a valid UUID. + */ +int rte_uuid_parse(const char *in, rte_uuid_t uu); + +/** + * Convert UUID to string + * + * @param uu + * UUID to format + * @param out + * Resulting string buffer + * @param len + * Sizeof the available string buffer + */ +#define RTE_UUID_STRLEN (36 + 1) +void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_UUID_H */ diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 8173802b..7c6714a2 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -32,7 +32,7 @@ extern "C" { /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 02 +#define RTE_VER_MONTH 8 /** * Patch level number i.e. the z in yy.mm.z diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h index e981a622..5ca13fcc 100644 --- a/lib/librte_eal/common/include/rte_vfio.h +++ b/lib/librte_eal/common/include/rte_vfio.h @@ -5,6 +5,15 @@ #ifndef _RTE_VFIO_H_ #define _RTE_VFIO_H_ +/** + * @file + * RTE VFIO. This library provides various VFIO related utility functions. + */ + +#ifdef __cplusplus +extern "C" { +#endif + /* * determine if VFIO is present on the system */ @@ -28,6 +37,20 @@ #define VFIO_NOIOMMU_MODE \ "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode" +/* NOIOMMU is defined from kernel version 4.5 onwards */ +#ifdef VFIO_NOIOMMU_IOMMU +#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU +#else +#define RTE_VFIO_NOIOMMU 8 +#endif + +#else /* not VFIO_PRESENT */ + +/* we don't need an actual definition, only pointer is used */ +struct vfio_device_info; + +#endif /* VFIO_PRESENT */ + /** * Setup vfio_cfg for the device identified by its address. * It discovers the configured I/O MMU groups or sets a new one for the device. @@ -119,10 +142,226 @@ int rte_vfio_is_enabled(const char *modname); */ int rte_vfio_noiommu_is_enabled(void); -/* remove group fd from internal VFIO group fd array */ +/** + * Remove group fd from internal VFIO group fd array/ + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param vfio_group_fd + * VFIO Grouup FD. + * + * @return + * 0 on success. + * <0 on failure. + */ int rte_vfio_clear_group(int vfio_group_fd); -#endif /* VFIO_PRESENT */ +/** + * Map memory region for use with VFIO. + * + * @note Require at least one device to be attached at the time of + * mapping. DMA maps done via this API will only apply to default + * container and will not apply to any of the containers created + * via rte_vfio_container_create(). + * + * @param vaddr + * Starting virtual address of memory to be mapped. + * + * @param iova + * Starting IOVA address of memory to be mapped. + * + * @param len + * Length of memory segment being mapped. + * + * @return + * 0 if success. + * -1 on error. + */ +int +rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len); + + +/** + * Unmap memory region from VFIO. + * + * @param vaddr + * Starting virtual address of memory to be unmapped. + * + * @param iova + * Starting IOVA address of memory to be unmapped. + * + * @param len + * Length of memory segment being unmapped. + * + * @return + * 0 if success. + * -1 on error. + */ + +int +rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len); +/** + * Parse IOMMU group number for a device + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param sysfs_base + * sysfs path prefix. + * + * @param dev_addr + * device location. + * + * @param iommu_group_num + * iommu group number + * + * @return + * >0 on success + * 0 for non-existent group or VFIO + * <0 for errors + */ +int +rte_vfio_get_group_num(const char *sysfs_base, + const char *dev_addr, int *iommu_group_num); + +/** + * Open VFIO container fd or get an existing one + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @return + * > 0 container fd + * < 0 for errors + */ +int +rte_vfio_get_container_fd(void); + +/** + * Open VFIO group fd or get an existing one + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param iommu_group_num + * iommu group number + * + * @return + * > 0 group fd + * < 0 for errors + */ +int +rte_vfio_get_group_fd(int iommu_group_num); + +/** + * Create a new container for device binding. + * + * @note Any newly allocated DPDK memory will not be mapped into these + * containers by default, user needs to manage DMA mappings for + * any container created by this API. + * + * @return + * the container fd if successful + * <0 if failed + */ +int +rte_vfio_container_create(void); + +/** + * Destroy the container, unbind all vfio groups within it. + * + * @param container_fd + * the container fd to destroy + * + * @return + * 0 if successful + * <0 if failed + */ +int +rte_vfio_container_destroy(int container_fd); + +/** + * Bind a IOMMU group to a container. + * + * @param container_fd + * the container's fd + * + * @param iommu_group_num + * the iommu group number to bind to container + * + * @return + * group fd if successful + * <0 if failed + */ +int +rte_vfio_container_group_bind(int container_fd, int iommu_group_num); + +/** + * Unbind a IOMMU group from a container. + * + * @param container_fd + * the container fd of container + * + * @param iommu_group_num + * the iommu group number to delete from container + * + * @return + * 0 if successful + * <0 if failed + */ +int +rte_vfio_container_group_unbind(int container_fd, int iommu_group_num); + +/** + * Perform DMA mapping for devices in a container. + * + * @param container_fd + * the specified container fd + * + * @param vaddr + * Starting virtual address of memory to be mapped. + * + * @param iova + * Starting IOVA address of memory to be mapped. + * + * @param len + * Length of memory segment being mapped. + * + * @return + * 0 if successful + * <0 if failed + */ +int +rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, + uint64_t iova, uint64_t len); + +/** + * Perform DMA unmapping for devices in a container. + * + * @param container_fd + * the specified container fd + * + * @param vaddr + * Starting virtual address of memory to be unmapped. + * + * @param iova + * Starting IOVA address of memory to be unmapped. + * + * @param len + * Length of memory segment being unmapped. + * + * @return + * 0 if successful + * <0 if failed + */ +int +rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, + uint64_t iova, uint64_t len); + +#ifdef __cplusplus +} +#endif #endif /* _RTE_VFIO_H_ */ diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index 0cadc8af..e0a8ed15 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -1,10 +1,12 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2010-2014 Intel Corporation */ +#include #include #include #include #include +#include #include #include @@ -16,21 +18,100 @@ #include #include +#include "eal_internal_cfg.h" +#include "eal_memalloc.h" #include "malloc_elem.h" #include "malloc_heap.h" -#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE) +size_t +malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align) +{ + void *cur_page, *contig_seg_start, *page_end, *cur_seg_end; + void *data_start, *data_end; + rte_iova_t expected_iova; + struct rte_memseg *ms; + size_t page_sz, cur, max; + + page_sz = (size_t)elem->msl->page_sz; + data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); + data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN); + /* segment must start after header and with specified alignment */ + contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align); + + /* if we're in IOVA as VA mode, or if we're in legacy mode with + * hugepages, all elements are IOVA-contiguous. + */ + if (rte_eal_iova_mode() == RTE_IOVA_VA || + (internal_config.legacy_mem && rte_eal_has_hugepages())) + return RTE_PTR_DIFF(data_end, contig_seg_start); + + cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz); + ms = rte_mem_virt2memseg(cur_page, elem->msl); + + /* do first iteration outside the loop */ + page_end = RTE_PTR_ADD(cur_page, page_sz); + cur_seg_end = RTE_MIN(page_end, data_end); + cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) - + MALLOC_ELEM_TRAILER_LEN; + max = cur; + expected_iova = ms->iova + page_sz; + /* memsegs are contiguous in memory */ + ms++; + + cur_page = RTE_PTR_ADD(cur_page, page_sz); + + while (cur_page < data_end) { + page_end = RTE_PTR_ADD(cur_page, page_sz); + cur_seg_end = RTE_MIN(page_end, data_end); + + /* reset start of contiguous segment if unexpected iova */ + if (ms->iova != expected_iova) { + /* next contiguous segment must start at specified + * alignment. + */ + contig_seg_start = RTE_PTR_ALIGN(cur_page, align); + /* new segment start may be on a different page, so find + * the page and skip to next iteration to make sure + * we're not blowing past data end. + */ + ms = rte_mem_virt2memseg(contig_seg_start, elem->msl); + cur_page = ms->addr; + /* don't trigger another recalculation */ + expected_iova = ms->iova; + continue; + } + /* cur_seg_end ends on a page boundary or on data end. if we're + * looking at data end, then malloc trailer is already included + * in the calculations. if we're looking at page end, then we + * know there's more data past this page and thus there's space + * for malloc element trailer, so don't count it here. + */ + cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start); + /* update max if cur value is bigger */ + if (cur > max) + max = cur; + + /* move to next page */ + cur_page = page_end; + expected_iova = ms->iova + page_sz; + /* memsegs are contiguous in memory */ + ms++; + } + + return max; +} /* * Initialize a general malloc_elem header structure */ void -malloc_elem_init(struct malloc_elem *elem, - struct malloc_heap *heap, const struct rte_memseg *ms, size_t size) +malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap, + struct rte_memseg_list *msl, size_t size) { elem->heap = heap; - elem->ms = ms; + elem->msl = msl; elem->prev = NULL; + elem->next = NULL; memset(&elem->free_list, 0, sizeof(elem->free_list)); elem->state = ELEM_FREE; elem->size = size; @@ -39,15 +120,74 @@ malloc_elem_init(struct malloc_elem *elem, set_trailer(elem); } +void +malloc_elem_insert(struct malloc_elem *elem) +{ + struct malloc_elem *prev_elem, *next_elem; + struct malloc_heap *heap = elem->heap; + + /* first and last elements must be both NULL or both non-NULL */ + if ((heap->first == NULL) != (heap->last == NULL)) { + RTE_LOG(ERR, EAL, "Heap is probably corrupt\n"); + return; + } + + if (heap->first == NULL && heap->last == NULL) { + /* if empty heap */ + heap->first = elem; + heap->last = elem; + prev_elem = NULL; + next_elem = NULL; + } else if (elem < heap->first) { + /* if lower than start */ + prev_elem = NULL; + next_elem = heap->first; + heap->first = elem; + } else if (elem > heap->last) { + /* if higher than end */ + prev_elem = heap->last; + next_elem = NULL; + heap->last = elem; + } else { + /* the new memory is somewhere inbetween start and end */ + uint64_t dist_from_start, dist_from_end; + + dist_from_end = RTE_PTR_DIFF(heap->last, elem); + dist_from_start = RTE_PTR_DIFF(elem, heap->first); + + /* check which is closer, and find closest list entries */ + if (dist_from_start < dist_from_end) { + prev_elem = heap->first; + while (prev_elem->next < elem) + prev_elem = prev_elem->next; + next_elem = prev_elem->next; + } else { + next_elem = heap->last; + while (next_elem->prev > elem) + next_elem = next_elem->prev; + prev_elem = next_elem->prev; + } + } + + /* insert new element */ + elem->prev = prev_elem; + elem->next = next_elem; + if (prev_elem) + prev_elem->next = elem; + if (next_elem) + next_elem->prev = elem; +} + /* - * Initialize a dummy malloc_elem header for the end-of-memseg marker + * Attempt to find enough physically contiguous memory in this block to store + * our data. Assume that element has at least enough space to fit in the data, + * so we just check the page addresses. */ -void -malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) +static bool +elem_check_phys_contig(const struct rte_memseg_list *msl, + void *start, size_t size) { - malloc_elem_init(elem, prev->heap, prev->ms, 0); - elem->prev = prev; - elem->state = ELEM_BUSY; /* mark busy so its never merged */ + return eal_memalloc_is_contig(msl, start, size); } /* @@ -57,27 +197,59 @@ malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) */ static void * elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, - size_t bound) + size_t bound, bool contig) { - const size_t bmask = ~(bound - 1); - uintptr_t end_pt = (uintptr_t)elem + - elem->size - MALLOC_ELEM_TRAILER_LEN; - uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); - uintptr_t new_elem_start; - - /* check boundary */ - if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { - end_pt = RTE_ALIGN_FLOOR(end_pt, bound); - new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); - end_pt = new_data_start + size; - if (((end_pt - 1) & bmask) != (new_data_start & bmask)) - return NULL; - } + size_t elem_size = elem->size; + + /* + * we're allocating from the end, so adjust the size of element by + * alignment size. + */ + while (elem_size >= size) { + const size_t bmask = ~(bound - 1); + uintptr_t end_pt = (uintptr_t)elem + + elem_size - MALLOC_ELEM_TRAILER_LEN; + uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), + align); + uintptr_t new_elem_start; + + /* check boundary */ + if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { + end_pt = RTE_ALIGN_FLOOR(end_pt, bound); + new_data_start = RTE_ALIGN_FLOOR((end_pt - size), + align); + end_pt = new_data_start + size; + + if (((end_pt - 1) & bmask) != (new_data_start & bmask)) + return NULL; + } + + new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; - new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; + /* if the new start point is before the exist start, + * it won't fit + */ + if (new_elem_start < (uintptr_t)elem) + return NULL; - /* if the new start point is before the exist start, it won't fit */ - return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start; + if (contig) { + size_t new_data_size = end_pt - new_data_start; + + /* + * if physical contiguousness was requested and we + * couldn't fit all data into one physically contiguous + * block, try again with lower addresses. + */ + if (!elem_check_phys_contig(elem->msl, + (void *)new_data_start, + new_data_size)) { + elem_size -= align; + continue; + } + } + return (void *)new_elem_start; + } + return NULL; } /* @@ -86,9 +258,9 @@ elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, */ int malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, - size_t bound) + size_t bound, bool contig) { - return elem_start_pt(elem, size, align, bound) != NULL; + return elem_start_pt(elem, size, align, bound, contig) != NULL; } /* @@ -98,17 +270,57 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, static void split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) { - struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size); + struct malloc_elem *next_elem = elem->next; const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; const size_t new_elem_size = elem->size - old_elem_size; - malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size); + malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size); split_pt->prev = elem; - next_elem->prev = split_pt; + split_pt->next = next_elem; + if (next_elem) + next_elem->prev = split_pt; + else + elem->heap->last = split_pt; + elem->next = split_pt; elem->size = old_elem_size; set_trailer(elem); } +/* + * our malloc heap is a doubly linked list, so doubly remove our element. + */ +static void __rte_unused +remove_elem(struct malloc_elem *elem) +{ + struct malloc_elem *next, *prev; + next = elem->next; + prev = elem->prev; + + if (next) + next->prev = prev; + else + elem->heap->last = prev; + if (prev) + prev->next = next; + else + elem->heap->first = next; + + elem->prev = NULL; + elem->next = NULL; +} + +static int +next_elem_is_adjacent(struct malloc_elem *elem) +{ + return elem->next == RTE_PTR_ADD(elem, elem->size); +} + +static int +prev_elem_is_adjacent(struct malloc_elem *elem) +{ + return elem == RTE_PTR_ADD(elem->prev, elem->prev->size); +} + /* * Given an element size, compute its freelist index. * We free an element into the freelist containing similarly-sized elements. @@ -162,8 +374,8 @@ malloc_elem_free_list_insert(struct malloc_elem *elem) /* * Remove the specified element from its heap's free list. */ -static void -elem_free_list_remove(struct malloc_elem *elem) +void +malloc_elem_free_list_remove(struct malloc_elem *elem) { LIST_REMOVE(elem, free_list); } @@ -176,14 +388,15 @@ elem_free_list_remove(struct malloc_elem *elem) */ struct malloc_elem * malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, - size_t bound) + size_t bound, bool contig) { - struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound); + struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound, + contig); const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; const size_t trailer_size = elem->size - old_elem_size - size - MALLOC_ELEM_OVERHEAD; - elem_free_list_remove(elem); + malloc_elem_free_list_remove(elem); if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { /* split it, too much free space after elem */ @@ -192,6 +405,9 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, split_elem(elem, new_free_elem); malloc_elem_free_list_insert(new_free_elem); + + if (elem == elem->heap->last) + elem->heap->last = new_free_elem; } if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { @@ -230,9 +446,66 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, static inline void join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) { - struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size); + struct malloc_elem *next = elem2->next; elem1->size += elem2->size; - next->prev = elem1; + if (next) + next->prev = elem1; + else + elem1->heap->last = elem1; + elem1->next = next; +} + +struct malloc_elem * +malloc_elem_join_adjacent_free(struct malloc_elem *elem) +{ + /* + * check if next element exists, is adjacent and is free, if so join + * with it, need to remove from free list. + */ + if (elem->next != NULL && elem->next->state == ELEM_FREE && + next_elem_is_adjacent(elem)) { + void *erase; + size_t erase_len; + + /* we will want to erase the trailer and header */ + erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN); + erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad; + + /* remove from free list, join to this one */ + malloc_elem_free_list_remove(elem->next); + join_elem(elem, elem->next); + + /* erase header, trailer and pad */ + memset(erase, 0, erase_len); + } + + /* + * check if prev element exists, is adjacent and is free, if so join + * with it, need to remove from free list. + */ + if (elem->prev != NULL && elem->prev->state == ELEM_FREE && + prev_elem_is_adjacent(elem)) { + struct malloc_elem *new_elem; + void *erase; + size_t erase_len; + + /* we will want to erase trailer and header */ + erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN); + erase_len = MALLOC_ELEM_OVERHEAD + elem->pad; + + /* remove from free list, join to this one */ + malloc_elem_free_list_remove(elem->prev); + + new_elem = elem->prev; + join_elem(new_elem, elem); + + /* erase header, trailer and pad */ + memset(erase, 0, erase_len); + + elem = new_elem; + } + + return elem; } /* @@ -240,43 +513,74 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) * blocks either immediately before or immediately after newly freed block * are also free, the blocks are merged together. */ -int +struct malloc_elem * malloc_elem_free(struct malloc_elem *elem) { - if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) - return -1; + void *ptr; + size_t data_len; - rte_spinlock_lock(&(elem->heap->lock)); - size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN; - uint8_t *ptr = (uint8_t *)&elem[1]; - struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); - if (next->state == ELEM_FREE){ - /* remove from free list, join to this one */ - elem_free_list_remove(next); - join_elem(elem, next); - sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN); - } + ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); + data_len = elem->size - MALLOC_ELEM_OVERHEAD; + + elem = malloc_elem_join_adjacent_free(elem); - /* check if previous element is free, if so join with it and return, - * need to re-insert in free list, as that element's size is changing - */ - if (elem->prev != NULL && elem->prev->state == ELEM_FREE) { - elem_free_list_remove(elem->prev); - join_elem(elem->prev, elem); - sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN); - ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN); - elem = elem->prev; - } malloc_elem_free_list_insert(elem); + elem->pad = 0; + /* decrease heap's count of allocated elements */ elem->heap->alloc_count--; - memset(ptr, 0, sz); + memset(ptr, 0, data_len); - rte_spinlock_unlock(&(elem->heap->lock)); + return elem; +} - return 0; +/* assume all checks were already done */ +void +malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len) +{ + struct malloc_elem *hide_start, *hide_end, *prev, *next; + size_t len_before, len_after; + + hide_start = start; + hide_end = RTE_PTR_ADD(start, len); + + prev = elem->prev; + next = elem->next; + + /* we cannot do anything with non-adjacent elements */ + if (next && next_elem_is_adjacent(elem)) { + len_after = RTE_PTR_DIFF(next, hide_end); + if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* split after */ + split_elem(elem, hide_end); + + malloc_elem_free_list_insert(hide_end); + } else if (len_after > 0) { + RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n"); + return; + } + } + + /* we cannot do anything with non-adjacent elements */ + if (prev && prev_elem_is_adjacent(elem)) { + len_before = RTE_PTR_DIFF(hide_start, elem); + if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* split before */ + split_elem(elem, hide_start); + + prev = elem; + elem = hide_start; + + malloc_elem_free_list_insert(prev); + } else if (len_before > 0) { + RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n"); + return; + } + } + + remove_elem(elem); } /* @@ -287,22 +591,23 @@ int malloc_elem_resize(struct malloc_elem *elem, size_t size) { const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD; + /* if we request a smaller size, then always return ok */ if (elem->size >= new_size) return 0; - struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); - rte_spinlock_lock(&elem->heap->lock); - if (next ->state != ELEM_FREE) - goto err_return; - if (elem->size + next->size < new_size) - goto err_return; + /* check if there is a next element, it's free and adjacent */ + if (!elem->next || elem->next->state != ELEM_FREE || + !next_elem_is_adjacent(elem)) + return -1; + if (elem->size + elem->next->size < new_size) + return -1; /* we now know the element fits, so remove from free list, * join the two */ - elem_free_list_remove(next); - join_elem(elem, next); + malloc_elem_free_list_remove(elem->next); + join_elem(elem, elem->next); if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) { /* now we have a big block together. Lets cut it down a bit, by splitting */ @@ -311,10 +616,28 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size) split_elem(elem, split_pt); malloc_elem_free_list_insert(split_pt); } - rte_spinlock_unlock(&elem->heap->lock); return 0; +} -err_return: - rte_spinlock_unlock(&elem->heap->lock); - return -1; +static inline const char * +elem_state_to_str(enum elem_state state) +{ + switch (state) { + case ELEM_PAD: + return "PAD"; + case ELEM_BUSY: + return "BUSY"; + case ELEM_FREE: + return "FREE"; + } + return "ERROR"; +} + +void +malloc_elem_dump(const struct malloc_elem *elem, FILE *f) +{ + fprintf(f, "Malloc element at %p (%s)\n", elem, + elem_state_to_str(elem->state)); + fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad); + fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next); } diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h index f4c1c7a9..e2bda4c0 100644 --- a/lib/librte_eal/common/malloc_elem.h +++ b/lib/librte_eal/common/malloc_elem.h @@ -5,7 +5,11 @@ #ifndef MALLOC_ELEM_H_ #define MALLOC_ELEM_H_ -#include +#include + +#include + +#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE) /* dummy definition of struct so we can use pointers to it in malloc_elem struct */ struct malloc_heap; @@ -18,9 +22,13 @@ enum elem_state { struct malloc_elem { struct malloc_heap *heap; - struct malloc_elem *volatile prev; /* points to prev elem in memseg */ - LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */ - const struct rte_memseg *ms; + struct malloc_elem *volatile prev; + /**< points to prev elem in memseg */ + struct malloc_elem *volatile next; + /**< points to next elem in memseg */ + LIST_ENTRY(malloc_elem) free_list; + /**< list of free elements in heap */ + struct rte_memseg_list *msl; volatile enum elem_state state; uint32_t pad; size_t size; @@ -107,15 +115,11 @@ malloc_elem_from_data(const void *data) void malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap, - const struct rte_memseg *ms, + struct rte_memseg_list *msl, size_t size); -/* - * initialise a dummy malloc_elem header for the end-of-memseg marker - */ void -malloc_elem_mkend(struct malloc_elem *elem, - struct malloc_elem *prev_free); +malloc_elem_insert(struct malloc_elem *elem); /* * return true if the current malloc_elem can hold a block of data @@ -123,7 +127,7 @@ malloc_elem_mkend(struct malloc_elem *elem, */ int malloc_elem_can_hold(struct malloc_elem *elem, size_t size, - unsigned align, size_t bound); + unsigned int align, size_t bound, bool contig); /* * reserve a block of data in an existing malloc_elem. If the malloc_elem @@ -131,16 +135,19 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size, */ struct malloc_elem * malloc_elem_alloc(struct malloc_elem *elem, size_t size, - unsigned align, size_t bound); + unsigned int align, size_t bound, bool contig); /* * free a malloc_elem block by adding it to the free list. If the * blocks either immediately before or immediately after newly freed block * are also free, the blocks are merged together. */ -int +struct malloc_elem * malloc_elem_free(struct malloc_elem *elem); +struct malloc_elem * +malloc_elem_join_adjacent_free(struct malloc_elem *elem); + /* * attempt to resize a malloc_elem by expanding into any free space * immediately after it in memory. @@ -148,6 +155,18 @@ malloc_elem_free(struct malloc_elem *elem); int malloc_elem_resize(struct malloc_elem *elem, size_t size); +void +malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len); + +void +malloc_elem_free_list_remove(struct malloc_elem *elem); + +/* + * dump contents of malloc elem to a file. + */ +void +malloc_elem_dump(const struct malloc_elem *elem, FILE *f); + /* * Given an element size, compute its freelist index. */ @@ -160,4 +179,10 @@ malloc_elem_free_list_index(size_t size); void malloc_elem_free_list_insert(struct malloc_elem *elem); +/* + * Find biggest IOVA-contiguous zone within an element with specified alignment. + */ +size_t +malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align); + #endif /* MALLOC_ELEM_H_ */ diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 7aafc880..12aaf2d7 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -20,9 +21,13 @@ #include #include #include +#include +#include "eal_internal_cfg.h" +#include "eal_memalloc.h" #include "malloc_elem.h" #include "malloc_heap.h" +#include "malloc_mp.h" static unsigned check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) @@ -62,26 +67,51 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) } /* - * Expand the heap with a memseg. - * This reserves the zone and sets a dummy malloc_elem header at the end - * to prevent overflow. The rest of the zone is added to free list as a single - * large free block + * Expand the heap with a memory area. */ -static void -malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms) +static struct malloc_elem * +malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl, + void *start, size_t len) +{ + struct malloc_elem *elem = start; + + malloc_elem_init(elem, heap, msl, len); + + malloc_elem_insert(elem); + + elem = malloc_elem_join_adjacent_free(elem); + + malloc_elem_free_list_insert(elem); + + return elem; +} + +static int +malloc_add_seg(const struct rte_memseg_list *msl, + const struct rte_memseg *ms, size_t len, void *arg __rte_unused) { - /* allocate the memory block headers, one at end, one at start */ - struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr; - struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr, - ms->len - MALLOC_ELEM_OVERHEAD); - end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE); - const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *found_msl; + struct malloc_heap *heap; + int msl_idx; + + heap = &mcfg->malloc_heaps[msl->socket_id]; + + /* msl is const, so find it */ + msl_idx = msl - mcfg->memsegs; + + if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS) + return -1; - malloc_elem_init(start_elem, heap, ms, elem_size); - malloc_elem_mkend(end_elem, start_elem); - malloc_elem_free_list_insert(start_elem); + found_msl = &mcfg->memsegs[msl_idx]; - heap->total_size += elem_size; + malloc_heap_add_memory(heap, found_msl, ms->addr, len); + + heap->total_size += len; + + RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20, + msl->socket_id); + return 0; } /* @@ -92,7 +122,7 @@ malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms) */ static struct malloc_elem * find_suitable_element(struct malloc_heap *heap, size_t size, - unsigned flags, size_t align, size_t bound) + unsigned int flags, size_t align, size_t bound, bool contig) { size_t idx; struct malloc_elem *elem, *alt_elem = NULL; @@ -101,8 +131,10 @@ find_suitable_element(struct malloc_heap *heap, size_t size, idx < RTE_HEAP_NUM_FREELISTS; idx++) { for (elem = LIST_FIRST(&heap->free_head[idx]); !!elem; elem = LIST_NEXT(elem, free_list)) { - if (malloc_elem_can_hold(elem, size, align, bound)) { - if (check_hugepage_sz(flags, elem->ms->hugepage_sz)) + if (malloc_elem_can_hold(elem, size, align, bound, + contig)) { + if (check_hugepage_sz(flags, + elem->msl->page_sz)) return elem; if (alt_elem == NULL) alt_elem = elem; @@ -116,35 +148,771 @@ find_suitable_element(struct malloc_heap *heap, size_t size, return NULL; } +/* + * Iterates through the freelist for a heap to find a free element with the + * biggest size and requested alignment. Will also set size to whatever element + * size that was found. + * Returns null on failure, or pointer to element on success. + */ +static struct malloc_elem * +find_biggest_element(struct malloc_heap *heap, size_t *size, + unsigned int flags, size_t align, bool contig) +{ + struct malloc_elem *elem, *max_elem = NULL; + size_t idx, max_size = 0; + + for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) { + size_t cur_size; + if (!check_hugepage_sz(flags, elem->msl->page_sz)) + continue; + if (contig) { + cur_size = + malloc_elem_find_max_iova_contig(elem, + align); + } else { + void *data_start = RTE_PTR_ADD(elem, + MALLOC_ELEM_HEADER_LEN); + void *data_end = RTE_PTR_ADD(elem, elem->size - + MALLOC_ELEM_TRAILER_LEN); + void *aligned = RTE_PTR_ALIGN_CEIL(data_start, + align); + /* check if aligned data start is beyond end */ + if (aligned >= data_end) + continue; + cur_size = RTE_PTR_DIFF(data_end, aligned); + } + if (cur_size > max_size) { + max_size = cur_size; + max_elem = elem; + } + } + } + + *size = max_size; + return max_elem; +} + /* * Main function to allocate a block of memory from the heap. * It locks the free list, scans it, and adds a new memseg if the * scan fails. Once the new memseg is added, it re-scans and should return * the new element after releasing the lock. */ -void * -malloc_heap_alloc(struct malloc_heap *heap, - const char *type __attribute__((unused)), size_t size, unsigned flags, - size_t align, size_t bound) +static void * +heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size, + unsigned int flags, size_t align, size_t bound, bool contig) { struct malloc_elem *elem; size = RTE_CACHE_LINE_ROUNDUP(size); align = RTE_CACHE_LINE_ROUNDUP(align); - rte_spinlock_lock(&heap->lock); + elem = find_suitable_element(heap, size, flags, align, bound, contig); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, bound, contig); + + /* increase heap's count of allocated elements */ + heap->alloc_count++; + } - elem = find_suitable_element(heap, size, flags, align, bound); + return elem == NULL ? NULL : (void *)(&elem[1]); +} + +static void * +heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused, + unsigned int flags, size_t align, bool contig) +{ + struct malloc_elem *elem; + size_t size; + + align = RTE_CACHE_LINE_ROUNDUP(align); + + elem = find_biggest_element(heap, &size, flags, align, contig); if (elem != NULL) { - elem = malloc_elem_alloc(elem, size, align, bound); + elem = malloc_elem_alloc(elem, size, align, 0, contig); + /* increase heap's count of allocated elements */ heap->alloc_count++; } - rte_spinlock_unlock(&heap->lock); return elem == NULL ? NULL : (void *)(&elem[1]); } +/* this function is exposed in malloc_mp.h */ +void +rollback_expand_heap(struct rte_memseg **ms, int n_segs, + struct malloc_elem *elem, void *map_addr, size_t map_len) +{ + if (elem != NULL) { + malloc_elem_free_list_remove(elem); + malloc_elem_hide_region(elem, map_addr, map_len); + } + + eal_memalloc_free_seg_bulk(ms, n_segs); +} + +/* this function is exposed in malloc_mp.h */ +struct malloc_elem * +alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, + int socket, unsigned int flags, size_t align, size_t bound, + bool contig, struct rte_memseg **ms, int n_segs) +{ + struct rte_memseg_list *msl; + struct malloc_elem *elem = NULL; + size_t alloc_sz; + int allocd_pages; + void *ret, *map_addr; + + alloc_sz = (size_t)pg_sz * n_segs; + + /* first, check if we're allowed to allocate this memory */ + if (eal_memalloc_mem_alloc_validate(socket, + heap->total_size + alloc_sz) < 0) { + RTE_LOG(DEBUG, EAL, "User has disallowed allocation\n"); + return NULL; + } + + allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz, + socket, true); + + /* make sure we've allocated our pages... */ + if (allocd_pages < 0) + return NULL; + + map_addr = ms[0]->addr; + msl = rte_mem_virt2memseg_list(map_addr); + + /* check if we wanted contiguous memory but didn't get it */ + if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) { + RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically contiguous space\n", + __func__); + goto fail; + } + + /* add newly minted memsegs to malloc heap */ + elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz); + + /* try once more, as now we have allocated new memory */ + ret = find_suitable_element(heap, elt_size, flags, align, bound, + contig); + + if (ret == NULL) + goto fail; + + return elem; + +fail: + rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); + return NULL; +} + +static int +try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, + size_t elt_size, int socket, unsigned int flags, size_t align, + size_t bound, bool contig) +{ + struct malloc_elem *elem; + struct rte_memseg **ms; + void *map_addr; + size_t alloc_sz; + int n_segs; + bool callback_triggered = false; + + alloc_sz = RTE_ALIGN_CEIL(align + elt_size + + MALLOC_ELEM_TRAILER_LEN, pg_sz); + n_segs = alloc_sz / pg_sz; + + /* we can't know in advance how many pages we'll need, so we malloc */ + ms = malloc(sizeof(*ms) * n_segs); + + memset(ms, 0, sizeof(*ms) * n_segs); + + if (ms == NULL) + return -1; + + elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align, + bound, contig, ms, n_segs); + + if (elem == NULL) + goto free_ms; + + map_addr = ms[0]->addr; + + /* notify user about changes in memory map */ + eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz); + + /* notify other processes that this has happened */ + if (request_sync()) { + /* we couldn't ensure all processes have mapped memory, + * so free it back and notify everyone that it's been + * freed back. + * + * technically, we could've avoided adding memory addresses to + * the map, but that would've led to inconsistent behavior + * between primary and secondary processes, as those get + * callbacks during sync. therefore, force primary process to + * do alloc-and-rollback syncs as well. + */ + callback_triggered = true; + goto free_elem; + } + heap->total_size += alloc_sz; + + RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n", + socket, alloc_sz >> 20ULL); + + free(ms); + + return 0; + +free_elem: + if (callback_triggered) + eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, + map_addr, alloc_sz); + + rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); + + request_sync(); +free_ms: + free(ms); + + return -1; +} + +static int +try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz, + size_t elt_size, int socket, unsigned int flags, size_t align, + size_t bound, bool contig) +{ + struct malloc_mp_req req; + int req_result; + + memset(&req, 0, sizeof(req)); + + req.t = REQ_TYPE_ALLOC; + req.alloc_req.align = align; + req.alloc_req.bound = bound; + req.alloc_req.contig = contig; + req.alloc_req.flags = flags; + req.alloc_req.elt_size = elt_size; + req.alloc_req.page_sz = pg_sz; + req.alloc_req.socket = socket; + req.alloc_req.heap = heap; /* it's in shared memory */ + + req_result = request_to_primary(&req); + + if (req_result != 0) + return -1; + + if (req.result != REQ_RESULT_SUCCESS) + return -1; + + return 0; +} + +static int +try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, + int socket, unsigned int flags, size_t align, size_t bound, + bool contig) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int ret; + + rte_rwlock_write_lock(&mcfg->memory_hotplug_lock); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket, + flags, align, bound, contig); + } else { + ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket, + flags, align, bound, contig); + } + + rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock); + return ret; +} + +static int +compare_pagesz(const void *a, const void *b) +{ + const struct rte_memseg_list * const*mpa = a; + const struct rte_memseg_list * const*mpb = b; + const struct rte_memseg_list *msla = *mpa; + const struct rte_memseg_list *mslb = *mpb; + uint64_t pg_sz_a = msla->page_sz; + uint64_t pg_sz_b = mslb->page_sz; + + if (pg_sz_a < pg_sz_b) + return -1; + if (pg_sz_a > pg_sz_b) + return 1; + return 0; +} + +static int +alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket, + unsigned int flags, size_t align, size_t bound, bool contig) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS]; + struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS]; + uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS]; + uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS]; + uint64_t prev_pg_sz; + int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz; + bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0; + unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY; + void *ret; + + memset(requested_msls, 0, sizeof(requested_msls)); + memset(other_msls, 0, sizeof(other_msls)); + memset(requested_pg_sz, 0, sizeof(requested_pg_sz)); + memset(other_pg_sz, 0, sizeof(other_pg_sz)); + + /* + * go through memseg list and take note of all the page sizes available, + * and if any of them were specifically requested by the user. + */ + n_requested_msls = 0; + n_other_msls = 0; + for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { + struct rte_memseg_list *msl = &mcfg->memsegs[i]; + + if (msl->socket_id != socket) + continue; + + if (msl->base_va == NULL) + continue; + + /* if pages of specific size were requested */ + if (size_flags != 0 && check_hugepage_sz(size_flags, + msl->page_sz)) + requested_msls[n_requested_msls++] = msl; + else if (size_flags == 0 || size_hint) + other_msls[n_other_msls++] = msl; + } + + /* sort the lists, smallest first */ + qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]), + compare_pagesz); + qsort(other_msls, n_other_msls, sizeof(other_msls[0]), + compare_pagesz); + + /* now, extract page sizes we are supposed to try */ + prev_pg_sz = 0; + n_requested_pg_sz = 0; + for (i = 0; i < n_requested_msls; i++) { + uint64_t pg_sz = requested_msls[i]->page_sz; + + if (prev_pg_sz != pg_sz) { + requested_pg_sz[n_requested_pg_sz++] = pg_sz; + prev_pg_sz = pg_sz; + } + } + prev_pg_sz = 0; + n_other_pg_sz = 0; + for (i = 0; i < n_other_msls; i++) { + uint64_t pg_sz = other_msls[i]->page_sz; + + if (prev_pg_sz != pg_sz) { + other_pg_sz[n_other_pg_sz++] = pg_sz; + prev_pg_sz = pg_sz; + } + } + + /* finally, try allocating memory of specified page sizes, starting from + * the smallest sizes + */ + for (i = 0; i < n_requested_pg_sz; i++) { + uint64_t pg_sz = requested_pg_sz[i]; + + /* + * do not pass the size hint here, as user expects other page + * sizes first, before resorting to best effort allocation. + */ + if (!try_expand_heap(heap, pg_sz, size, socket, size_flags, + align, bound, contig)) + return 0; + } + if (n_other_pg_sz == 0) + return -1; + + /* now, check if we can reserve anything with size hint */ + ret = find_suitable_element(heap, size, flags, align, bound, contig); + if (ret != NULL) + return 0; + + /* + * we still couldn't reserve memory, so try expanding heap with other + * page sizes, if there are any + */ + for (i = 0; i < n_other_pg_sz; i++) { + uint64_t pg_sz = other_pg_sz[i]; + + if (!try_expand_heap(heap, pg_sz, size, socket, flags, + align, bound, contig)) + return 0; + } + return -1; +} + +/* this will try lower page sizes first */ +static void * +heap_alloc_on_socket(const char *type, size_t size, int socket, + unsigned int flags, size_t align, size_t bound, bool contig) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct malloc_heap *heap = &mcfg->malloc_heaps[socket]; + unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY; + void *ret; + + rte_spinlock_lock(&(heap->lock)); + + align = align == 0 ? 1 : align; + + /* for legacy mode, try once and with all flags */ + if (internal_config.legacy_mem) { + ret = heap_alloc(heap, type, size, flags, align, bound, contig); + goto alloc_unlock; + } + + /* + * we do not pass the size hint here, because even if allocation fails, + * we may still be able to allocate memory from appropriate page sizes, + * we just need to request more memory first. + */ + ret = heap_alloc(heap, type, size, size_flags, align, bound, contig); + if (ret != NULL) + goto alloc_unlock; + + if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound, + contig)) { + ret = heap_alloc(heap, type, size, flags, align, bound, contig); + + /* this should have succeeded */ + if (ret == NULL) + RTE_LOG(ERR, EAL, "Error allocating from heap\n"); + } +alloc_unlock: + rte_spinlock_unlock(&(heap->lock)); + return ret; +} + +void * +malloc_heap_alloc(const char *type, size_t size, int socket_arg, + unsigned int flags, size_t align, size_t bound, bool contig) +{ + int socket, i, cur_socket; + void *ret; + + /* return NULL if size is 0 or alignment is not power-of-2 */ + if (size == 0 || (align && !rte_is_power_of_2(align))) + return NULL; + + if (!rte_eal_has_hugepages()) + socket_arg = SOCKET_ID_ANY; + + if (socket_arg == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_arg; + + /* Check socket parameter */ + if (socket >= RTE_MAX_NUMA_NODES) + return NULL; + + ret = heap_alloc_on_socket(type, size, socket, flags, align, bound, + contig); + if (ret != NULL || socket_arg != SOCKET_ID_ANY) + return ret; + + /* try other heaps */ + for (i = 0; i < (int) rte_socket_count(); i++) { + cur_socket = rte_socket_id_by_idx(i); + if (cur_socket == socket) + continue; + ret = heap_alloc_on_socket(type, size, cur_socket, flags, + align, bound, contig); + if (ret != NULL) + return ret; + } + return NULL; +} + +static void * +heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags, + size_t align, bool contig) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct malloc_heap *heap = &mcfg->malloc_heaps[socket]; + void *ret; + + rte_spinlock_lock(&(heap->lock)); + + align = align == 0 ? 1 : align; + + ret = heap_alloc_biggest(heap, type, flags, align, contig); + + rte_spinlock_unlock(&(heap->lock)); + + return ret; +} + +void * +malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags, + size_t align, bool contig) +{ + int socket, i, cur_socket; + void *ret; + + /* return NULL if align is not power-of-2 */ + if ((align && !rte_is_power_of_2(align))) + return NULL; + + if (!rte_eal_has_hugepages()) + socket_arg = SOCKET_ID_ANY; + + if (socket_arg == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_arg; + + /* Check socket parameter */ + if (socket >= RTE_MAX_NUMA_NODES) + return NULL; + + ret = heap_alloc_biggest_on_socket(type, socket, flags, align, + contig); + if (ret != NULL || socket_arg != SOCKET_ID_ANY) + return ret; + + /* try other heaps */ + for (i = 0; i < (int) rte_socket_count(); i++) { + cur_socket = rte_socket_id_by_idx(i); + if (cur_socket == socket) + continue; + ret = heap_alloc_biggest_on_socket(type, cur_socket, flags, + align, contig); + if (ret != NULL) + return ret; + } + return NULL; +} + +/* this function is exposed in malloc_mp.h */ +int +malloc_heap_free_pages(void *aligned_start, size_t aligned_len) +{ + int n_segs, seg_idx, max_seg_idx; + struct rte_memseg_list *msl; + size_t page_sz; + + msl = rte_mem_virt2memseg_list(aligned_start); + if (msl == NULL) + return -1; + + page_sz = (size_t)msl->page_sz; + n_segs = aligned_len / page_sz; + seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz; + max_seg_idx = seg_idx + n_segs; + + for (; seg_idx < max_seg_idx; seg_idx++) { + struct rte_memseg *ms; + + ms = rte_fbarray_get(&msl->memseg_arr, seg_idx); + eal_memalloc_free_seg(ms); + } + return 0; +} + +int +malloc_heap_free(struct malloc_elem *elem) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct malloc_heap *heap; + void *start, *aligned_start, *end, *aligned_end; + size_t len, aligned_len, page_sz; + struct rte_memseg_list *msl; + unsigned int i, n_segs, before_space, after_space; + int ret; + + if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) + return -1; + + /* elem may be merged with previous element, so keep heap address */ + heap = elem->heap; + msl = elem->msl; + page_sz = (size_t)msl->page_sz; + + rte_spinlock_lock(&(heap->lock)); + + /* mark element as free */ + elem->state = ELEM_FREE; + + elem = malloc_elem_free(elem); + + /* anything after this is a bonus */ + ret = 0; + + /* ...of which we can't avail if we are in legacy mode */ + if (internal_config.legacy_mem) + goto free_unlock; + + /* check if we can free any memory back to the system */ + if (elem->size < page_sz) + goto free_unlock; + + /* probably, but let's make sure, as we may not be using up full page */ + start = elem; + len = elem->size; + aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz); + end = RTE_PTR_ADD(elem, len); + aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz); + + aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start); + + /* can't free anything */ + if (aligned_len < page_sz) + goto free_unlock; + + /* we can free something. however, some of these pages may be marked as + * unfreeable, so also check that as well + */ + n_segs = aligned_len / page_sz; + for (i = 0; i < n_segs; i++) { + const struct rte_memseg *tmp = + rte_mem_virt2memseg(aligned_start, msl); + + if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) { + /* this is an unfreeable segment, so move start */ + aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len); + } + } + + /* recalculate length and number of segments */ + aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start); + n_segs = aligned_len / page_sz; + + /* check if we can still free some pages */ + if (n_segs == 0) + goto free_unlock; + + /* We're not done yet. We also have to check if by freeing space we will + * be leaving free elements that are too small to store new elements. + * Check if we have enough space in the beginning and at the end, or if + * start/end are exactly page aligned. + */ + before_space = RTE_PTR_DIFF(aligned_start, elem); + after_space = RTE_PTR_DIFF(end, aligned_end); + if (before_space != 0 && + before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* There is not enough space before start, but we may be able to + * move the start forward by one page. + */ + if (n_segs == 1) + goto free_unlock; + + /* move start */ + aligned_start = RTE_PTR_ADD(aligned_start, page_sz); + aligned_len -= page_sz; + n_segs--; + } + if (after_space != 0 && after_space < + MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* There is not enough space after end, but we may be able to + * move the end backwards by one page. + */ + if (n_segs == 1) + goto free_unlock; + + /* move end */ + aligned_end = RTE_PTR_SUB(aligned_end, page_sz); + aligned_len -= page_sz; + n_segs--; + } + + /* now we can finally free us some pages */ + + rte_rwlock_write_lock(&mcfg->memory_hotplug_lock); + + /* + * we allow secondary processes to clear the heap of this allocated + * memory because it is safe to do so, as even if notifications about + * unmapped pages don't make it to other processes, heap is shared + * across all processes, and will become empty of this memory anyway, + * and nothing can allocate it back unless primary process will be able + * to deliver allocation message to every single running process. + */ + + malloc_elem_free_list_remove(elem); + + malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len); + + heap->total_size -= aligned_len; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* notify user about changes in memory map */ + eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, + aligned_start, aligned_len); + + /* don't care if any of this fails */ + malloc_heap_free_pages(aligned_start, aligned_len); + + request_sync(); + } else { + struct malloc_mp_req req; + + memset(&req, 0, sizeof(req)); + + req.t = REQ_TYPE_FREE; + req.free_req.addr = aligned_start; + req.free_req.len = aligned_len; + + /* + * we request primary to deallocate pages, but we don't do it + * in this thread. instead, we notify primary that we would like + * to deallocate pages, and this process will receive another + * request (in parallel) that will do it for us on another + * thread. + * + * we also don't really care if this succeeds - the data is + * already removed from the heap, so it is, for all intents and + * purposes, hidden from the rest of DPDK even if some other + * process (including this one) may have these pages mapped. + * + * notifications about deallocated memory happen during sync. + */ + request_to_primary(&req); + } + + RTE_LOG(DEBUG, EAL, "Heap on socket %d was shrunk by %zdMB\n", + msl->socket_id, aligned_len >> 20ULL); + + rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock); +free_unlock: + rte_spinlock_unlock(&(heap->lock)); + return ret; +} + +int +malloc_heap_resize(struct malloc_elem *elem, size_t size) +{ + int ret; + + if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) + return -1; + + rte_spinlock_lock(&(elem->heap->lock)); + + ret = malloc_elem_resize(elem, size); + + rte_spinlock_unlock(&(elem->heap->lock)); + + return ret; +} + /* * Function to retrieve data for heap on given socket */ @@ -183,21 +951,49 @@ malloc_heap_get_stats(struct malloc_heap *heap, return 0; } +/* + * Function to retrieve data for heap on given socket + */ +void +malloc_heap_dump(struct malloc_heap *heap, FILE *f) +{ + struct malloc_elem *elem; + + rte_spinlock_lock(&heap->lock); + + fprintf(f, "Heap size: 0x%zx\n", heap->total_size); + fprintf(f, "Heap alloc count: %u\n", heap->alloc_count); + + elem = heap->first; + while (elem) { + malloc_elem_dump(elem, f); + elem = elem->next; + } + + rte_spinlock_unlock(&heap->lock); +} + int rte_eal_malloc_heap_init(void) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - unsigned ms_cnt; - struct rte_memseg *ms; - if (mcfg == NULL) + if (register_mp_requests()) { + RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n"); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); return -1; - - for (ms = &mcfg->memseg[0], ms_cnt = 0; - (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0); - ms_cnt++, ms++) { - malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms); } - return 0; + /* unlock mem hotplug here. it's safe for primary as no requests can + * even come before primary itself is fully initialized, and secondaries + * do not need to initialize the heap. + */ + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + /* secondary process does not need to initialize anything */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + /* add all IOVA-contiguous areas to the heap */ + return rte_memseg_contig_walk(malloc_add_seg, NULL); } diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index e0defa70..f52cb555 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -5,6 +5,8 @@ #ifndef MALLOC_HEAP_H_ #define MALLOC_HEAP_H_ +#include + #include #include @@ -24,13 +26,26 @@ malloc_get_numa_socket(void) } void * -malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size, - unsigned flags, size_t align, size_t bound); +malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags, + size_t align, size_t bound, bool contig); + +void * +malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags, + size_t align, bool contig); + +int +malloc_heap_free(struct malloc_elem *elem); + +int +malloc_heap_resize(struct malloc_elem *elem, size_t size); int malloc_heap_get_stats(struct malloc_heap *heap, struct rte_malloc_socket_stats *socket_stats); +void +malloc_heap_dump(struct malloc_heap *heap, FILE *f); + int rte_eal_malloc_heap_init(void); diff --git a/lib/librte_eal/common/malloc_mp.c b/lib/librte_eal/common/malloc_mp.c new file mode 100644 index 00000000..931c14bc --- /dev/null +++ b/lib/librte_eal/common/malloc_mp.c @@ -0,0 +1,743 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include + +#include +#include +#include + +#include "eal_memalloc.h" + +#include "malloc_elem.h" +#include "malloc_mp.h" + +#define MP_ACTION_SYNC "mp_malloc_sync" +/**< request sent by primary process to notify of changes in memory map */ +#define MP_ACTION_ROLLBACK "mp_malloc_rollback" +/**< request sent by primary process to notify of changes in memory map. this is + * essentially a regular sync request, but we cannot send sync requests while + * another one is in progress, and we might have to - therefore, we do this as + * a separate callback. + */ +#define MP_ACTION_REQUEST "mp_malloc_request" +/**< request sent by secondary process to ask for allocation/deallocation */ +#define MP_ACTION_RESPONSE "mp_malloc_response" +/**< response sent to secondary process to indicate result of request */ + +/* forward declarations */ +static int +handle_sync_response(const struct rte_mp_msg *request, + const struct rte_mp_reply *reply); +static int +handle_rollback_response(const struct rte_mp_msg *request, + const struct rte_mp_reply *reply); + +#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */ + +/* when we're allocating, we need to store some state to ensure that we can + * roll back later + */ +struct primary_alloc_req_state { + struct malloc_heap *heap; + struct rte_memseg **ms; + int ms_len; + struct malloc_elem *elem; + void *map_addr; + size_t map_len; +}; + +enum req_state { + REQ_STATE_INACTIVE = 0, + REQ_STATE_ACTIVE, + REQ_STATE_COMPLETE +}; + +struct mp_request { + TAILQ_ENTRY(mp_request) next; + struct malloc_mp_req user_req; /**< contents of request */ + pthread_cond_t cond; /**< variable we use to time out on this request */ + enum req_state state; /**< indicate status of this request */ + struct primary_alloc_req_state alloc_state; +}; + +/* + * We could've used just a single request, but it may be possible for + * secondaries to timeout earlier than the primary, and send a new request while + * primary is still expecting replies to the old one. Therefore, each new + * request will get assigned a new ID, which is how we will distinguish between + * expected and unexpected messages. + */ +TAILQ_HEAD(mp_request_list, mp_request); +static struct { + struct mp_request_list list; + pthread_mutex_t lock; +} mp_request_list = { + .list = TAILQ_HEAD_INITIALIZER(mp_request_list.list), + .lock = PTHREAD_MUTEX_INITIALIZER +}; + +/** + * General workflow is the following: + * + * Allocation: + * S: send request to primary + * P: attempt to allocate memory + * if failed, sendmsg failure + * if success, send sync request + * S: if received msg of failure, quit + * if received sync request, synchronize memory map and reply with result + * P: if received sync request result + * if success, sendmsg success + * if failure, roll back allocation and send a rollback request + * S: if received msg of success, quit + * if received rollback request, synchronize memory map and reply with result + * P: if received sync request result + * sendmsg sync request result + * S: if received msg, quit + * + * Aside from timeouts, there are three points where we can quit: + * - if allocation failed straight away + * - if allocation and sync request succeeded + * - if allocation succeeded, sync request failed, allocation rolled back and + * rollback request received (irrespective of whether it succeeded or failed) + * + * Deallocation: + * S: send request to primary + * P: attempt to deallocate memory + * if failed, sendmsg failure + * if success, send sync request + * S: if received msg of failure, quit + * if received sync request, synchronize memory map and reply with result + * P: if received sync request result + * sendmsg sync request result + * S: if received msg, quit + * + * There is no "rollback" from deallocation, as it's safe to have some memory + * mapped in some processes - it's absent from the heap, so it won't get used. + */ + +static struct mp_request * +find_request_by_id(uint64_t id) +{ + struct mp_request *req; + TAILQ_FOREACH(req, &mp_request_list.list, next) { + if (req->user_req.id == id) + break; + } + return req; +} + +/* this ID is, like, totally guaranteed to be absolutely unique. pinky swear. */ +static uint64_t +get_unique_id(void) +{ + uint64_t id; + do { + id = rte_rand(); + } while (find_request_by_id(id) != NULL); + return id; +} + +/* secondary will respond to sync requests thusly */ +static int +handle_sync(const struct rte_mp_msg *msg, const void *peer) +{ + struct rte_mp_msg reply; + const struct malloc_mp_req *req = + (const struct malloc_mp_req *)msg->param; + struct malloc_mp_req *resp = + (struct malloc_mp_req *)reply.param; + int ret; + + if (req->t != REQ_TYPE_SYNC) { + RTE_LOG(ERR, EAL, "Unexpected request from primary\n"); + return -1; + } + + memset(&reply, 0, sizeof(reply)); + + reply.num_fds = 0; + strlcpy(reply.name, msg->name, sizeof(reply.name)); + reply.len_param = sizeof(*resp); + + ret = eal_memalloc_sync_with_primary(); + + resp->t = REQ_TYPE_SYNC; + resp->id = req->id; + resp->result = ret == 0 ? REQ_RESULT_SUCCESS : REQ_RESULT_FAIL; + + rte_mp_reply(&reply, peer); + + return 0; +} + +static int +handle_alloc_request(const struct malloc_mp_req *m, + struct mp_request *req) +{ + const struct malloc_req_alloc *ar = &m->alloc_req; + struct malloc_heap *heap; + struct malloc_elem *elem; + struct rte_memseg **ms; + size_t alloc_sz; + int n_segs; + void *map_addr; + + alloc_sz = RTE_ALIGN_CEIL(ar->align + ar->elt_size + + MALLOC_ELEM_TRAILER_LEN, ar->page_sz); + n_segs = alloc_sz / ar->page_sz; + + heap = ar->heap; + + /* we can't know in advance how many pages we'll need, so we malloc */ + ms = malloc(sizeof(*ms) * n_segs); + + memset(ms, 0, sizeof(*ms) * n_segs); + + if (ms == NULL) { + RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n"); + goto fail; + } + + elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket, + ar->flags, ar->align, ar->bound, ar->contig, ms, + n_segs); + + if (elem == NULL) + goto fail; + + map_addr = ms[0]->addr; + + /* we have succeeded in allocating memory, but we still need to sync + * with other processes. however, since DPDK IPC is single-threaded, we + * send an asynchronous request and exit this callback. + */ + + req->alloc_state.ms = ms; + req->alloc_state.ms_len = n_segs; + req->alloc_state.map_addr = map_addr; + req->alloc_state.map_len = alloc_sz; + req->alloc_state.elem = elem; + req->alloc_state.heap = heap; + + return 0; +fail: + free(ms); + return -1; +} + +/* first stage of primary handling requests from secondary */ +static int +handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused) +{ + const struct malloc_mp_req *m = + (const struct malloc_mp_req *)msg->param; + struct mp_request *entry; + int ret; + + /* lock access to request */ + pthread_mutex_lock(&mp_request_list.lock); + + /* make sure it's not a dupe */ + entry = find_request_by_id(m->id); + if (entry != NULL) { + RTE_LOG(ERR, EAL, "Duplicate request id\n"); + goto fail; + } + + entry = malloc(sizeof(*entry)); + if (entry == NULL) { + RTE_LOG(ERR, EAL, "Unable to allocate memory for request\n"); + goto fail; + } + + /* erase all data */ + memset(entry, 0, sizeof(*entry)); + + if (m->t == REQ_TYPE_ALLOC) { + ret = handle_alloc_request(m, entry); + } else if (m->t == REQ_TYPE_FREE) { + ret = malloc_heap_free_pages(m->free_req.addr, + m->free_req.len); + } else { + RTE_LOG(ERR, EAL, "Unexpected request from secondary\n"); + goto fail; + } + + if (ret != 0) { + struct rte_mp_msg resp_msg; + struct malloc_mp_req *resp = + (struct malloc_mp_req *)resp_msg.param; + + /* send failure message straight away */ + resp_msg.num_fds = 0; + resp_msg.len_param = sizeof(*resp); + strlcpy(resp_msg.name, MP_ACTION_RESPONSE, + sizeof(resp_msg.name)); + + resp->t = m->t; + resp->result = REQ_RESULT_FAIL; + resp->id = m->id; + + if (rte_mp_sendmsg(&resp_msg)) { + RTE_LOG(ERR, EAL, "Couldn't send response\n"); + goto fail; + } + /* we did not modify the request */ + free(entry); + } else { + struct rte_mp_msg sr_msg; + struct malloc_mp_req *sr = + (struct malloc_mp_req *)sr_msg.param; + struct timespec ts; + + memset(&sr_msg, 0, sizeof(sr_msg)); + + /* we can do something, so send sync request asynchronously */ + sr_msg.num_fds = 0; + sr_msg.len_param = sizeof(*sr); + strlcpy(sr_msg.name, MP_ACTION_SYNC, sizeof(sr_msg.name)); + + ts.tv_nsec = 0; + ts.tv_sec = MP_TIMEOUT_S; + + /* sync requests carry no data */ + sr->t = REQ_TYPE_SYNC; + sr->id = m->id; + + /* there may be stray timeout still waiting */ + do { + ret = rte_mp_request_async(&sr_msg, &ts, + handle_sync_response); + } while (ret != 0 && rte_errno == EEXIST); + if (ret != 0) { + RTE_LOG(ERR, EAL, "Couldn't send sync request\n"); + if (m->t == REQ_TYPE_ALLOC) + free(entry->alloc_state.ms); + goto fail; + } + + /* mark request as in progress */ + memcpy(&entry->user_req, m, sizeof(*m)); + entry->state = REQ_STATE_ACTIVE; + + TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next); + } + pthread_mutex_unlock(&mp_request_list.lock); + return 0; +fail: + pthread_mutex_unlock(&mp_request_list.lock); + free(entry); + return -1; +} + +/* callback for asynchronous sync requests for primary. this will either do a + * sendmsg with results, or trigger rollback request. + */ +static int +handle_sync_response(const struct rte_mp_msg *request, + const struct rte_mp_reply *reply) +{ + enum malloc_req_result result; + struct mp_request *entry; + const struct malloc_mp_req *mpreq = + (const struct malloc_mp_req *)request->param; + int i; + + /* lock the request */ + pthread_mutex_lock(&mp_request_list.lock); + + entry = find_request_by_id(mpreq->id); + if (entry == NULL) { + RTE_LOG(ERR, EAL, "Wrong request ID\n"); + goto fail; + } + + result = REQ_RESULT_SUCCESS; + + if (reply->nb_received != reply->nb_sent) + result = REQ_RESULT_FAIL; + + for (i = 0; i < reply->nb_received; i++) { + struct malloc_mp_req *resp = + (struct malloc_mp_req *)reply->msgs[i].param; + + if (resp->t != REQ_TYPE_SYNC) { + RTE_LOG(ERR, EAL, "Unexpected response to sync request\n"); + result = REQ_RESULT_FAIL; + break; + } + if (resp->id != entry->user_req.id) { + RTE_LOG(ERR, EAL, "Response to wrong sync request\n"); + result = REQ_RESULT_FAIL; + break; + } + if (resp->result == REQ_RESULT_FAIL) { + result = REQ_RESULT_FAIL; + break; + } + } + + if (entry->user_req.t == REQ_TYPE_FREE) { + struct rte_mp_msg msg; + struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param; + + memset(&msg, 0, sizeof(msg)); + + /* this is a free request, just sendmsg result */ + resp->t = REQ_TYPE_FREE; + resp->result = result; + resp->id = entry->user_req.id; + msg.num_fds = 0; + msg.len_param = sizeof(*resp); + strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name)); + + if (rte_mp_sendmsg(&msg)) + RTE_LOG(ERR, EAL, "Could not send message to secondary process\n"); + + TAILQ_REMOVE(&mp_request_list.list, entry, next); + free(entry); + } else if (entry->user_req.t == REQ_TYPE_ALLOC && + result == REQ_RESULT_SUCCESS) { + struct malloc_heap *heap = entry->alloc_state.heap; + struct rte_mp_msg msg; + struct malloc_mp_req *resp = + (struct malloc_mp_req *)msg.param; + + memset(&msg, 0, sizeof(msg)); + + heap->total_size += entry->alloc_state.map_len; + + /* result is success, so just notify secondary about this */ + resp->t = REQ_TYPE_ALLOC; + resp->result = result; + resp->id = entry->user_req.id; + msg.num_fds = 0; + msg.len_param = sizeof(*resp); + strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name)); + + if (rte_mp_sendmsg(&msg)) + RTE_LOG(ERR, EAL, "Could not send message to secondary process\n"); + + TAILQ_REMOVE(&mp_request_list.list, entry, next); + free(entry->alloc_state.ms); + free(entry); + } else if (entry->user_req.t == REQ_TYPE_ALLOC && + result == REQ_RESULT_FAIL) { + struct rte_mp_msg rb_msg; + struct malloc_mp_req *rb = + (struct malloc_mp_req *)rb_msg.param; + struct timespec ts; + struct primary_alloc_req_state *state = + &entry->alloc_state; + int ret; + + memset(&rb_msg, 0, sizeof(rb_msg)); + + /* we've failed to sync, so do a rollback */ + rollback_expand_heap(state->ms, state->ms_len, state->elem, + state->map_addr, state->map_len); + + /* send rollback request */ + rb_msg.num_fds = 0; + rb_msg.len_param = sizeof(*rb); + strlcpy(rb_msg.name, MP_ACTION_ROLLBACK, sizeof(rb_msg.name)); + + ts.tv_nsec = 0; + ts.tv_sec = MP_TIMEOUT_S; + + /* sync requests carry no data */ + rb->t = REQ_TYPE_SYNC; + rb->id = entry->user_req.id; + + /* there may be stray timeout still waiting */ + do { + ret = rte_mp_request_async(&rb_msg, &ts, + handle_rollback_response); + } while (ret != 0 && rte_errno == EEXIST); + if (ret != 0) { + RTE_LOG(ERR, EAL, "Could not send rollback request to secondary process\n"); + + /* we couldn't send rollback request, but that's OK - + * secondary will time out, and memory has been removed + * from heap anyway. + */ + TAILQ_REMOVE(&mp_request_list.list, entry, next); + free(state->ms); + free(entry); + goto fail; + } + } else { + RTE_LOG(ERR, EAL, " to sync request of unknown type\n"); + goto fail; + } + + pthread_mutex_unlock(&mp_request_list.lock); + return 0; +fail: + pthread_mutex_unlock(&mp_request_list.lock); + return -1; +} + +static int +handle_rollback_response(const struct rte_mp_msg *request, + const struct rte_mp_reply *reply __rte_unused) +{ + struct rte_mp_msg msg; + struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param; + const struct malloc_mp_req *mpreq = + (const struct malloc_mp_req *)request->param; + struct mp_request *entry; + + /* lock the request */ + pthread_mutex_lock(&mp_request_list.lock); + + memset(&msg, 0, sizeof(0)); + + entry = find_request_by_id(mpreq->id); + if (entry == NULL) { + RTE_LOG(ERR, EAL, "Wrong request ID\n"); + goto fail; + } + + if (entry->user_req.t != REQ_TYPE_ALLOC) { + RTE_LOG(ERR, EAL, "Unexpected active request\n"); + goto fail; + } + + /* we don't care if rollback succeeded, request still failed */ + resp->t = REQ_TYPE_ALLOC; + resp->result = REQ_RESULT_FAIL; + resp->id = mpreq->id; + msg.num_fds = 0; + msg.len_param = sizeof(*resp); + strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name)); + + if (rte_mp_sendmsg(&msg)) + RTE_LOG(ERR, EAL, "Could not send message to secondary process\n"); + + /* clean up */ + TAILQ_REMOVE(&mp_request_list.list, entry, next); + free(entry->alloc_state.ms); + free(entry); + + pthread_mutex_unlock(&mp_request_list.lock); + return 0; +fail: + pthread_mutex_unlock(&mp_request_list.lock); + return -1; +} + +/* final stage of the request from secondary */ +static int +handle_response(const struct rte_mp_msg *msg, const void *peer __rte_unused) +{ + const struct malloc_mp_req *m = + (const struct malloc_mp_req *)msg->param; + struct mp_request *entry; + + pthread_mutex_lock(&mp_request_list.lock); + + entry = find_request_by_id(m->id); + if (entry != NULL) { + /* update request status */ + entry->user_req.result = m->result; + + entry->state = REQ_STATE_COMPLETE; + + /* trigger thread wakeup */ + pthread_cond_signal(&entry->cond); + } + + pthread_mutex_unlock(&mp_request_list.lock); + + return 0; +} + +/* synchronously request memory map sync, this is only called whenever primary + * process initiates the allocation. + */ +int +request_sync(void) +{ + struct rte_mp_msg msg; + struct rte_mp_reply reply; + struct malloc_mp_req *req = (struct malloc_mp_req *)msg.param; + struct timespec ts; + int i, ret; + + memset(&msg, 0, sizeof(msg)); + memset(&reply, 0, sizeof(reply)); + + /* no need to create tailq entries as this is entirely synchronous */ + + msg.num_fds = 0; + msg.len_param = sizeof(*req); + strlcpy(msg.name, MP_ACTION_SYNC, sizeof(msg.name)); + + /* sync request carries no data */ + req->t = REQ_TYPE_SYNC; + req->id = get_unique_id(); + + ts.tv_nsec = 0; + ts.tv_sec = MP_TIMEOUT_S; + + /* there may be stray timeout still waiting */ + do { + ret = rte_mp_request_sync(&msg, &reply, &ts); + } while (ret != 0 && rte_errno == EEXIST); + if (ret != 0) { + RTE_LOG(ERR, EAL, "Could not send sync request to secondary process\n"); + ret = -1; + goto out; + } + + if (reply.nb_received != reply.nb_sent) { + RTE_LOG(ERR, EAL, "Not all secondaries have responded\n"); + ret = -1; + goto out; + } + + for (i = 0; i < reply.nb_received; i++) { + struct malloc_mp_req *resp = + (struct malloc_mp_req *)reply.msgs[i].param; + if (resp->t != REQ_TYPE_SYNC) { + RTE_LOG(ERR, EAL, "Unexpected response from secondary\n"); + ret = -1; + goto out; + } + if (resp->id != req->id) { + RTE_LOG(ERR, EAL, "Wrong request ID\n"); + ret = -1; + goto out; + } + if (resp->result != REQ_RESULT_SUCCESS) { + RTE_LOG(ERR, EAL, "Secondary process failed to synchronize\n"); + ret = -1; + goto out; + } + } + + ret = 0; +out: + free(reply.msgs); + return ret; +} + +/* this is a synchronous wrapper around a bunch of asynchronous requests to + * primary process. this will initiate a request and wait until responses come. + */ +int +request_to_primary(struct malloc_mp_req *user_req) +{ + struct rte_mp_msg msg; + struct malloc_mp_req *msg_req = (struct malloc_mp_req *)msg.param; + struct mp_request *entry; + struct timespec ts; + struct timeval now; + int ret; + + memset(&msg, 0, sizeof(msg)); + memset(&ts, 0, sizeof(ts)); + + pthread_mutex_lock(&mp_request_list.lock); + + entry = malloc(sizeof(*entry)); + if (entry == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for request\n"); + goto fail; + } + + memset(entry, 0, sizeof(*entry)); + + if (gettimeofday(&now, NULL) < 0) { + RTE_LOG(ERR, EAL, "Cannot get current time\n"); + goto fail; + } + + ts.tv_nsec = (now.tv_usec * 1000) % 1000000000; + ts.tv_sec = now.tv_sec + MP_TIMEOUT_S + + (now.tv_usec * 1000) / 1000000000; + + /* initialize the request */ + pthread_cond_init(&entry->cond, NULL); + + msg.num_fds = 0; + msg.len_param = sizeof(*msg_req); + strlcpy(msg.name, MP_ACTION_REQUEST, sizeof(msg.name)); + + /* (attempt to) get a unique id */ + user_req->id = get_unique_id(); + + /* copy contents of user request into the message */ + memcpy(msg_req, user_req, sizeof(*msg_req)); + + if (rte_mp_sendmsg(&msg)) { + RTE_LOG(ERR, EAL, "Cannot send message to primary\n"); + goto fail; + } + + /* copy contents of user request into active request */ + memcpy(&entry->user_req, user_req, sizeof(*user_req)); + + /* mark request as in progress */ + entry->state = REQ_STATE_ACTIVE; + + TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next); + + /* finally, wait on timeout */ + do { + ret = pthread_cond_timedwait(&entry->cond, + &mp_request_list.lock, &ts); + } while (ret != 0 && ret != ETIMEDOUT); + + if (entry->state != REQ_STATE_COMPLETE) { + RTE_LOG(ERR, EAL, "Request timed out\n"); + ret = -1; + } else { + ret = 0; + user_req->result = entry->user_req.result; + } + TAILQ_REMOVE(&mp_request_list.list, entry, next); + free(entry); + + pthread_mutex_unlock(&mp_request_list.lock); + return ret; +fail: + pthread_mutex_unlock(&mp_request_list.lock); + free(entry); + return -1; +} + +int +register_mp_requests(void) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + if (rte_mp_action_register(MP_ACTION_REQUEST, handle_request)) { + RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n", + MP_ACTION_REQUEST); + return -1; + } + } else { + if (rte_mp_action_register(MP_ACTION_SYNC, handle_sync)) { + RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n", + MP_ACTION_SYNC); + return -1; + } + if (rte_mp_action_register(MP_ACTION_ROLLBACK, handle_sync)) { + RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n", + MP_ACTION_SYNC); + return -1; + } + if (rte_mp_action_register(MP_ACTION_RESPONSE, + handle_response)) { + RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n", + MP_ACTION_RESPONSE); + return -1; + } + } + return 0; +} diff --git a/lib/librte_eal/common/malloc_mp.h b/lib/librte_eal/common/malloc_mp.h new file mode 100644 index 00000000..2b86b76f --- /dev/null +++ b/lib/librte_eal/common/malloc_mp.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef MALLOC_MP_H +#define MALLOC_MP_H + +#include +#include + +#include +#include +#include +#include + +/* forward declarations */ +struct malloc_heap; +struct rte_memseg; + +/* multiprocess synchronization structures for malloc */ +enum malloc_req_type { + REQ_TYPE_ALLOC, /**< ask primary to allocate */ + REQ_TYPE_FREE, /**< ask primary to free */ + REQ_TYPE_SYNC /**< ask secondary to synchronize its memory map */ +}; + +enum malloc_req_result { + REQ_RESULT_SUCCESS, + REQ_RESULT_FAIL +}; + +struct malloc_req_alloc { + struct malloc_heap *heap; + uint64_t page_sz; + size_t elt_size; + int socket; + unsigned int flags; + size_t align; + size_t bound; + bool contig; +}; + +struct malloc_req_free { + RTE_STD_C11 + union { + void *addr; + uint64_t addr_64; + }; + uint64_t len; +}; + +struct malloc_mp_req { + enum malloc_req_type t; + RTE_STD_C11 + union { + struct malloc_req_alloc alloc_req; + struct malloc_req_free free_req; + }; + uint64_t id; /**< not to be populated by caller */ + enum malloc_req_result result; +}; + +int +register_mp_requests(void); + +int +request_to_primary(struct malloc_mp_req *req); + +/* synchronous memory map sync request */ +int +request_sync(void); + +/* functions from malloc_heap exposed here */ +int +malloc_heap_free_pages(void *aligned_start, size_t aligned_len); + +struct malloc_elem * +alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, + int socket, unsigned int flags, size_t align, size_t bound, + bool contig, struct rte_memseg **ms, int n_segs); + +void +rollback_expand_heap(struct rte_memseg **ms, int n_segs, + struct malloc_elem *elem, void *map_addr, size_t map_len); + +#endif /* MALLOC_MP_H */ diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build index 82b8910f..56005bea 100644 --- a/lib/librte_eal/common/meson.build +++ b/lib/librte_eal/common/meson.build @@ -8,13 +8,16 @@ common_objs = [] common_sources = files( 'eal_common_bus.c', 'eal_common_cpuflags.c', + 'eal_common_class.c', 'eal_common_devargs.c', 'eal_common_dev.c', 'eal_common_errno.c', + 'eal_common_fbarray.c', 'eal_common_hexdump.c', 'eal_common_launch.c', 'eal_common_lcore.c', 'eal_common_log.c', + 'eal_common_memalloc.c', 'eal_common_memory.c', 'eal_common_memzone.c', 'eal_common_options.c', @@ -23,8 +26,10 @@ common_sources = files( 'eal_common_tailqs.c', 'eal_common_thread.c', 'eal_common_timer.c', + 'eal_common_uuid.c', 'malloc_elem.c', 'malloc_heap.c', + 'malloc_mp.c', 'rte_keepalive.c', 'rte_malloc.c', 'rte_reciprocal.c', @@ -43,6 +48,7 @@ common_headers = files( 'include/rte_branch_prediction.h', 'include/rte_bus.h', 'include/rte_bitmap.h', + 'include/rte_class.h', 'include/rte_common.h', 'include/rte_debug.h', 'include/rte_devargs.h', @@ -51,6 +57,7 @@ common_headers = files( 'include/rte_eal_memconfig.h', 'include/rte_eal_interrupts.h', 'include/rte_errno.h', + 'include/rte_fbarray.h', 'include/rte_hexdump.h', 'include/rte_interrupts.h', 'include/rte_keepalive.h', @@ -71,6 +78,7 @@ common_headers = files( 'include/rte_string_fns.h', 'include/rte_tailq.h', 'include/rte_time.h', + 'include/rte_uuid.h', 'include/rte_version.h') # special case install the generic headers, since they go in a subdir diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index e0e0d0b3..b51a6d11 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -29,20 +29,17 @@ void rte_free(void *addr) { if (addr == NULL) return; - if (malloc_elem_free(malloc_elem_from_data(addr)) < 0) - rte_panic("Fatal error: Invalid memory\n"); + if (malloc_heap_free(malloc_elem_from_data(addr)) < 0) + RTE_LOG(ERR, EAL, "Error: Invalid memory\n"); } /* * Allocate memory on specified heap. */ void * -rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) +rte_malloc_socket(const char *type, size_t size, unsigned int align, + int socket_arg) { - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int socket, i; - void *ret; - /* return NULL if size is 0 or alignment is not power-of-2 */ if (size == 0 || (align && !rte_is_power_of_2(align))) return NULL; @@ -50,33 +47,12 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) if (!rte_eal_has_hugepages()) socket_arg = SOCKET_ID_ANY; - if (socket_arg == SOCKET_ID_ANY) - socket = malloc_get_numa_socket(); - else - socket = socket_arg; - /* Check socket parameter */ - if (socket >= RTE_MAX_NUMA_NODES) + if (socket_arg >= RTE_MAX_NUMA_NODES) return NULL; - ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type, - size, 0, align == 0 ? 1 : align, 0); - if (ret != NULL || socket_arg != SOCKET_ID_ANY) - return ret; - - /* try other heaps */ - for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { - /* we already tried this one */ - if (i == socket) - continue; - - ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type, - size, 0, align == 0 ? 1 : align, 0); - if (ret != NULL) - return ret; - } - - return NULL; + return malloc_heap_alloc(type, size, socket_arg, 0, + align == 0 ? 1 : align, 0, false); } /* @@ -134,13 +110,15 @@ rte_realloc(void *ptr, size_t size, unsigned align) return rte_malloc(NULL, size, align); struct malloc_elem *elem = malloc_elem_from_data(ptr); - if (elem == NULL) - rte_panic("Fatal error: memory corruption detected\n"); + if (elem == NULL) { + RTE_LOG(ERR, EAL, "Error: memory corruption detected\n"); + return NULL; + } size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align); /* check alignment matches first, and if ok, see if we can resize block */ if (RTE_PTR_ALIGN(ptr,align) == ptr && - malloc_elem_resize(elem, size) == 0) + malloc_heap_resize(elem, size) == 0) return ptr; /* either alignment is off, or we have no room to expand, @@ -181,6 +159,23 @@ rte_malloc_get_socket_stats(int socket, return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats); } +/* + * Function to dump contents of all heaps + */ +void __rte_experimental +rte_malloc_dump_heaps(FILE *f) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + unsigned int idx; + + for (idx = 0; idx < rte_socket_count(); idx++) { + unsigned int socket = rte_socket_id_by_idx(idx); + fprintf(f, "Heap on socket %i:\n", socket); + malloc_heap_dump(&mcfg->malloc_heaps[socket], f); + } + +} + /* * Print stats on memory type. If type is NULL, info on all types is printed */ @@ -222,17 +217,21 @@ rte_malloc_set_limit(__rte_unused const char *type, rte_iova_t rte_malloc_virt2iova(const void *addr) { - rte_iova_t iova; - const struct malloc_elem *elem = malloc_elem_from_data(addr); + const struct rte_memseg *ms; + struct malloc_elem *elem = malloc_elem_from_data(addr); + if (elem == NULL) return RTE_BAD_IOVA; - if (elem->ms->iova == RTE_BAD_IOVA) - return RTE_BAD_IOVA; if (rte_eal_iova_mode() == RTE_IOVA_VA) - iova = (uintptr_t)addr; - else - iova = elem->ms->iova + - RTE_PTR_DIFF(addr, elem->ms->addr); - return iova; + return (uintptr_t) addr; + + ms = rte_mem_virt2memseg(addr, elem->msl); + if (ms == NULL) + return RTE_BAD_IOVA; + + if (ms->iova == RTE_BAD_IOVA) + return RTE_BAD_IOVA; + + return ms->iova + RTE_PTR_DIFF(addr, ms->addr); } diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c index be9b5e6d..8767c722 100644 --- a/lib/librte_eal/common/rte_service.c +++ b/lib/librte_eal/common/rte_service.c @@ -52,6 +52,7 @@ struct rte_service_spec_impl { rte_atomic32_t num_mapped_cores; uint64_t calls; uint64_t cycles_spent; + uint8_t active_on_lcore[RTE_MAX_LCORE]; } __rte_cache_aligned; /* the internal values of a service core */ @@ -61,7 +62,7 @@ struct core_state { uint8_t runstate; /* running or stopped */ uint8_t is_service_core; /* set if core is currently a service core */ - /* extreme statistics */ + uint64_t loops; uint64_t calls_per_service[RTE_SERVICE_NUM_MAX]; } __rte_cache_aligned; @@ -115,7 +116,7 @@ fail_mem: return -ENOMEM; } -void __rte_experimental +void rte_service_finalize(void) { if (!rte_service_library_initialized) @@ -161,7 +162,7 @@ service_mt_safe(struct rte_service_spec_impl *s) return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE); } -int32_t __rte_experimental +int32_t rte_service_set_stats_enable(uint32_t id, int32_t enabled) { struct rte_service_spec_impl *s; @@ -175,7 +176,7 @@ rte_service_set_stats_enable(uint32_t id, int32_t enabled) return 0; } -int32_t __rte_experimental +int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled) { struct rte_service_spec_impl *s; @@ -189,13 +190,13 @@ rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled) return 0; } -uint32_t __rte_experimental +uint32_t rte_service_get_count(void) { return rte_service_count; } -int32_t __rte_experimental +int32_t rte_service_get_by_name(const char *name, uint32_t *service_id) { if (!service_id) @@ -213,7 +214,7 @@ rte_service_get_by_name(const char *name, uint32_t *service_id) return -ENODEV; } -const char * __rte_experimental +const char * rte_service_get_name(uint32_t id) { struct rte_service_spec_impl *s; @@ -221,7 +222,7 @@ rte_service_get_name(uint32_t id) return s->spec.name; } -int32_t __rte_experimental +int32_t rte_service_probe_capability(uint32_t id, uint32_t capability) { struct rte_service_spec_impl *s; @@ -229,7 +230,7 @@ rte_service_probe_capability(uint32_t id, uint32_t capability) return !!(s->spec.capabilities & capability); } -int32_t __rte_experimental +int32_t rte_service_component_register(const struct rte_service_spec *spec, uint32_t *id_ptr) { @@ -262,7 +263,7 @@ rte_service_component_register(const struct rte_service_spec *spec, return 0; } -int32_t __rte_experimental +int32_t rte_service_component_unregister(uint32_t id) { uint32_t i; @@ -283,7 +284,7 @@ rte_service_component_unregister(uint32_t id) return 0; } -int32_t __rte_experimental +int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate) { struct rte_service_spec_impl *s; @@ -298,7 +299,7 @@ rte_service_component_runstate_set(uint32_t id, uint32_t runstate) return 0; } -int32_t __rte_experimental +int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate) { struct rte_service_spec_impl *s; @@ -313,7 +314,7 @@ rte_service_runstate_set(uint32_t id, uint32_t runstate) return 0; } -int32_t __rte_experimental +int32_t rte_service_runstate_get(uint32_t id) { struct rte_service_spec_impl *s; @@ -347,15 +348,19 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s, static inline int32_t -service_run(uint32_t i, struct core_state *cs, uint64_t service_mask) +service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t service_mask) { if (!service_valid(i)) return -EINVAL; struct rte_service_spec_impl *s = &rte_services[i]; if (s->comp_runstate != RUNSTATE_RUNNING || s->app_runstate != RUNSTATE_RUNNING || - !(service_mask & (UINT64_C(1) << i))) + !(service_mask & (UINT64_C(1) << i))) { + s->active_on_lcore[lcore] = 0; return -ENOEXEC; + } + + s->active_on_lcore[lcore] = 1; /* check do we need cmpset, if MT safe or <= 1 core * mapped, atomic ops are not required. @@ -374,7 +379,26 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask) return 0; } -int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id, +int32_t __rte_experimental +rte_service_may_be_active(uint32_t id) +{ + uint32_t ids[RTE_MAX_LCORE] = {0}; + struct rte_service_spec_impl *s = &rte_services[id]; + int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE); + int i; + + if (!service_valid(id)) + return -EINVAL; + + for (i = 0; i < lcore_count; i++) { + if (s->active_on_lcore[ids[i]]) + return 1; + } + + return 0; +} + +int32_t rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe) { /* run service on calling core, using all-ones as the service mask */ @@ -398,7 +422,7 @@ int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id, return -EBUSY; } - int ret = service_run(id, cs, UINT64_MAX); + int ret = service_run(id, rte_lcore_id(), cs, UINT64_MAX); if (serialize_mt_unsafe) rte_atomic32_dec(&s->num_mapped_cores); @@ -419,9 +443,11 @@ rte_service_runner_func(void *arg) for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { /* return value ignored as no change to code flow */ - service_run(i, cs, service_mask); + service_run(i, lcore, cs, service_mask); } + cs->loops++; + rte_smp_rmb(); } @@ -430,7 +456,7 @@ rte_service_runner_func(void *arg) return 0; } -int32_t __rte_experimental +int32_t rte_service_lcore_count(void) { int32_t count = 0; @@ -440,7 +466,7 @@ rte_service_lcore_count(void) return count; } -int32_t __rte_experimental +int32_t rte_service_lcore_list(uint32_t array[], uint32_t n) { uint32_t count = rte_service_lcore_count(); @@ -463,7 +489,7 @@ rte_service_lcore_list(uint32_t array[], uint32_t n) return count; } -int32_t __rte_experimental +int32_t rte_service_lcore_count_services(uint32_t lcore) { if (lcore >= RTE_MAX_LCORE) @@ -476,7 +502,7 @@ rte_service_lcore_count_services(uint32_t lcore) return __builtin_popcountll(cs->service_mask); } -int32_t __rte_experimental +int32_t rte_service_start_with_defaults(void) { /* create a default mapping from cores to services, then start the @@ -562,7 +588,7 @@ service_update(struct rte_service_spec *service, uint32_t lcore, return 0; } -int32_t __rte_experimental +int32_t rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled) { struct rte_service_spec_impl *s; @@ -571,7 +597,7 @@ rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled) return service_update(&s->spec, lcore, &on, 0); } -int32_t __rte_experimental +int32_t rte_service_map_lcore_get(uint32_t id, uint32_t lcore) { struct rte_service_spec_impl *s; @@ -597,7 +623,7 @@ set_lcore_state(uint32_t lcore, int32_t state) lcore_states[lcore].is_service_core = (state == ROLE_SERVICE); } -int32_t __rte_experimental +int32_t rte_service_lcore_reset_all(void) { /* loop over cores, reset all to mask 0 */ @@ -617,7 +643,7 @@ rte_service_lcore_reset_all(void) return 0; } -int32_t __rte_experimental +int32_t rte_service_lcore_add(uint32_t lcore) { if (lcore >= RTE_MAX_LCORE) @@ -636,7 +662,7 @@ rte_service_lcore_add(uint32_t lcore) return rte_eal_wait_lcore(lcore); } -int32_t __rte_experimental +int32_t rte_service_lcore_del(uint32_t lcore) { if (lcore >= RTE_MAX_LCORE) @@ -655,7 +681,7 @@ rte_service_lcore_del(uint32_t lcore) return 0; } -int32_t __rte_experimental +int32_t rte_service_lcore_start(uint32_t lcore) { if (lcore >= RTE_MAX_LCORE) @@ -678,7 +704,7 @@ rte_service_lcore_start(uint32_t lcore) return ret; } -int32_t __rte_experimental +int32_t rte_service_lcore_stop(uint32_t lcore) { if (lcore >= RTE_MAX_LCORE) @@ -708,7 +734,7 @@ rte_service_lcore_stop(uint32_t lcore) return 0; } -int32_t __rte_experimental +int32_t rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value) { struct rte_service_spec_impl *s; @@ -729,6 +755,28 @@ rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value) } } +int32_t __rte_experimental +rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id, + uint64_t *attr_value) +{ + struct core_state *cs; + + if (lcore >= RTE_MAX_LCORE || !attr_value) + return -EINVAL; + + cs = &lcore_states[lcore]; + if (!cs->is_service_core) + return -ENOTSUP; + + switch (attr_id) { + case RTE_SERVICE_LCORE_ATTR_LOOPS: + *attr_value = cs->loops; + return 0; + default: + return -EINVAL; + } +} + static void rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s, uint64_t all_cycles, uint32_t reset) @@ -753,7 +801,7 @@ rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s, s->cycles_spent, s->cycles_spent / calls); } -int32_t __rte_experimental +int32_t rte_service_attr_reset_all(uint32_t id) { struct rte_service_spec_impl *s; @@ -764,6 +812,23 @@ rte_service_attr_reset_all(uint32_t id) return 0; } +int32_t __rte_experimental +rte_service_lcore_attr_reset_all(uint32_t lcore) +{ + struct core_state *cs; + + if (lcore >= RTE_MAX_LCORE) + return -EINVAL; + + cs = &lcore_states[lcore]; + if (!cs->is_service_core) + return -ENOTSUP; + + cs->loops = 0; + + return 0; +} + static void service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset) { @@ -781,7 +846,8 @@ service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset) fprintf(f, "\n"); } -int32_t __rte_experimental rte_service_dump(FILE *f, uint32_t id) +int32_t +rte_service_dump(FILE *f, uint32_t id) { uint32_t i; int print_one = (id != UINT32_MAX); diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile index aa52a01e..a0fffa98 100644 --- a/lib/librte_eal/linuxapp/Makefile +++ b/lib/librte_eal/linuxapp/Makefile @@ -4,8 +4,6 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal -DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio -DIRS-$(CONFIG_RTE_KNI_KMOD) += kni DEPDIRS-kni := eal CFLAGS += -DALLOW_EXPERIMENTAL_API diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 7e5bbe88..fd92c75c 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -10,7 +10,7 @@ ARCH_DIR ?= $(RTE_ARCH) EXPORT_MAP := ../../rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 6 +LIBABIVER := 8 VPATH += $(RTE_SDK)/lib/librte_eal/common @@ -24,23 +24,27 @@ LDLIBS += -ldl LDLIBS += -lpthread LDLIBS += -lgcc_s LDLIBS += -lrt +LDLIBS += -lrte_kvargs ifeq ($(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),y) LDLIBS += -lnuma endif # specific to linuxapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_cpuflags.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memalloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_dev.c # from common dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c @@ -48,6 +52,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memalloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c @@ -56,14 +61,18 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hypervisor.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_class.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_bus.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_fbarray.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_uuid.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_mp.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_reciprocal.c @@ -81,6 +90,7 @@ CFLAGS_eal_interrupts.o := -D_GNU_SOURCE CFLAGS_eal_vfio_mp_sync.o := -D_GNU_SOURCE CFLAGS_eal_timer.o := -D_GNU_SOURCE CFLAGS_eal_lcore.o := -D_GNU_SOURCE +CFLAGS_eal_memalloc.o := -D_GNU_SOURCE CFLAGS_eal_thread.o := -D_GNU_SOURCE CFLAGS_eal_log.o := -D_GNU_SOURCE CFLAGS_eal_common_log.o := -D_GNU_SOURCE diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 38306bf5..e59ac657 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -74,8 +74,8 @@ static int mem_cfg_fd = -1; static struct flock wr_lock = { .l_type = F_WRLCK, .l_whence = SEEK_SET, - .l_start = offsetof(struct rte_mem_config, memseg), - .l_len = sizeof(early_mem_config.memseg), + .l_start = offsetof(struct rte_mem_config, memsegs), + .l_len = sizeof(early_mem_config.memsegs), }; /* Address of global and public configuration */ @@ -92,20 +92,72 @@ struct internal_config internal_config; /* used by rte_rdtsc() */ int rte_cycles_vmware_tsc_map; -/* Return user provided mbuf pool ops name */ -const char * __rte_experimental -rte_eal_mbuf_user_pool_ops(void) +/* platform-specific runtime dir */ +static char runtime_dir[PATH_MAX]; + +static const char *default_runtime_dir = "/var/run"; + +int +eal_create_runtime_dir(void) { - return internal_config.user_mbuf_pool_ops_name; + const char *directory = default_runtime_dir; + const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR"); + const char *fallback = "/tmp"; + char tmp[PATH_MAX]; + int ret; + + if (getuid() != 0) { + /* try XDG path first, fall back to /tmp */ + if (xdg_runtime_dir != NULL) + directory = xdg_runtime_dir; + else + directory = fallback; + } + /* create DPDK subdirectory under runtime dir */ + ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory); + if (ret < 0 || ret == sizeof(tmp)) { + RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n"); + return -1; + } + + /* create prefix-specific subdirectory under DPDK runtime dir */ + ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s", + tmp, internal_config.hugefile_prefix); + if (ret < 0 || ret == sizeof(runtime_dir)) { + RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n"); + return -1; + } + + /* create the path if it doesn't exist. no "mkdir -p" here, so do it + * step by step. + */ + ret = mkdir(tmp, 0700); + if (ret < 0 && errno != EEXIST) { + RTE_LOG(ERR, EAL, "Error creating '%s': %s\n", + tmp, strerror(errno)); + return -1; + } + + ret = mkdir(runtime_dir, 0700); + if (ret < 0 && errno != EEXIST) { + RTE_LOG(ERR, EAL, "Error creating '%s': %s\n", + runtime_dir, strerror(errno)); + return -1; + } + + return 0; } -/* Return mbuf pool ops name */ const char * -rte_eal_mbuf_default_mempool_ops(void) +eal_get_runtime_dir(void) { - if (internal_config.user_mbuf_pool_ops_name == NULL) - return RTE_MBUF_DEFAULT_MEMPOOL_OPS; + return runtime_dir; +} +/* Return user provided mbuf pool ops name */ +const char * +rte_eal_mbuf_user_pool_ops(void) +{ return internal_config.user_mbuf_pool_ops_name; } @@ -282,12 +334,17 @@ eal_proc_type_detect(void) enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; const char *pathname = eal_runtime_config_path(); - /* if we can open the file but not get a write-lock we are a secondary - * process. NOTE: if we get a file handle back, we keep that open - * and don't close it to prevent a race condition between multiple opens */ - if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && - (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) - ptype = RTE_PROC_SECONDARY; + /* if there no shared config, there can be no secondary processes */ + if (!internal_config.no_shconf) { + /* if we can open the file but not get a write-lock we are a + * secondary process. NOTE: if we get a file handle back, we + * keep that open and don't close it to prevent a race condition + * between multiple opens. + */ + if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && + (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) + ptype = RTE_PROC_SECONDARY; + } RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); @@ -343,11 +400,14 @@ eal_usage(const char *prgname) eal_common_usage(); printf("EAL Linux options:\n" " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" + " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n" " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" " --"OPT_BASE_VIRTADDR" Base virtual address\n" " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" + " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n" + " --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n" "\n"); /* Allow the application to print its usage message too if hook is set */ if ( rte_application_usage_hook ) { @@ -370,46 +430,45 @@ rte_set_application_usage_hook( rte_usage_hook_t usage_func ) } static int -eal_parse_socket_mem(char *socket_mem) +eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) { char * arg[RTE_MAX_NUMA_NODES]; char *end; int arg_num, i, len; uint64_t total_mem = 0; - len = strnlen(socket_mem, SOCKET_MEM_STRLEN); + len = strnlen(strval, SOCKET_MEM_STRLEN); if (len == SOCKET_MEM_STRLEN) { RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); return -1; } /* all other error cases will be caught later */ - if (!isdigit(socket_mem[len-1])) + if (!isdigit(strval[len-1])) return -1; /* split the optarg into separate socket values */ - arg_num = rte_strsplit(socket_mem, len, + arg_num = rte_strsplit(strval, len, arg, RTE_MAX_NUMA_NODES, ','); /* if split failed, or 0 arguments */ if (arg_num <= 0) return -1; - internal_config.force_sockets = 1; - /* parse each defined socket option */ errno = 0; for (i = 0; i < arg_num; i++) { + uint64_t val; end = NULL; - internal_config.socket_mem[i] = strtoull(arg[i], &end, 10); + val = strtoull(arg[i], &end, 10); /* check for invalid input */ if ((errno != 0) || (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) return -1; - internal_config.socket_mem[i] *= 1024ULL; - internal_config.socket_mem[i] *= 1024ULL; - total_mem += internal_config.socket_mem[i]; + val <<= 20; + total_mem += val; + socket_arg[i] = val; } /* check if we have a positive amount of total memory */ @@ -557,13 +616,27 @@ eal_parse_args(int argc, char **argv) break; case OPT_SOCKET_MEM_NUM: - if (eal_parse_socket_mem(optarg) < 0) { + if (eal_parse_socket_arg(optarg, + internal_config.socket_mem) < 0) { RTE_LOG(ERR, EAL, "invalid parameters for --" OPT_SOCKET_MEM "\n"); eal_usage(prgname); ret = -1; goto out; } + internal_config.force_sockets = 1; + break; + + case OPT_SOCKET_LIMIT_NUM: + if (eal_parse_socket_arg(optarg, + internal_config.socket_limit) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SOCKET_LIMIT "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + internal_config.force_socket_limits = 1; break; case OPT_BASE_VIRTADDR_NUM: @@ -591,7 +664,8 @@ eal_parse_args(int argc, char **argv) break; case OPT_MBUF_POOL_OPS_NAME_NUM: - internal_config.user_mbuf_pool_ops_name = optarg; + internal_config.user_mbuf_pool_ops_name = + strdup(optarg); break; default: @@ -613,6 +687,14 @@ eal_parse_args(int argc, char **argv) } } + /* create runtime data directory */ + if (internal_config.no_shconf == 0 && + eal_create_runtime_dir() < 0) { + RTE_LOG(ERR, EAL, "Cannot create runtime directory\n"); + ret = -1; + goto out; + } + if (eal_adjust_config(&internal_config) != 0) { ret = -1; goto out; @@ -638,23 +720,23 @@ out: return ret; } +static int +check_socket(const struct rte_memseg_list *msl, void *arg) +{ + int *socket_id = arg; + + return *socket_id == msl->socket_id; +} + static void eal_check_mem_on_local_socket(void) { - const struct rte_memseg *ms; - int i, socket_id; + int socket_id; socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); - ms = rte_eal_get_physmem_layout(); - - for (i = 0; i < RTE_MAX_MEMSEG; i++) - if (ms[i].socket_id == socket_id && - ms[i].len > 0) - return; - - RTE_LOG(WARNING, EAL, "WARNING: Master core has no " - "memory on local socket!\n"); + if (rte_memseg_list_walk(check_socket, &socket_id) == 0) + RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n"); } static int @@ -669,6 +751,8 @@ rte_eal_mcfg_complete(void) /* ALL shared mem_config related INIT DONE */ if (rte_config.process_type == RTE_PROC_PRIMARY) rte_config.mem_config->magic = RTE_MAGIC; + + internal_config.init_complete = 1; } /* @@ -689,24 +773,8 @@ rte_eal_iopl_init(void) #ifdef VFIO_PRESENT static int rte_eal_vfio_setup(void) { - int vfio_enabled = 0; - if (rte_vfio_enable("vfio")) return -1; - vfio_enabled = rte_vfio_is_enabled("vfio"); - - if (vfio_enabled) { - - /* if we are primary process, create a thread to communicate with - * secondary processes. the thread will use a socket to wait for - * requests from secondary process to send open file descriptors, - * because VFIO does not allow multiple open descriptors on a group or - * VFIO container. - */ - if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_mp_sync_setup() < 0) - return -1; - } return 0; } @@ -779,6 +847,24 @@ rte_eal_init(int argc, char **argv) return -1; } + rte_config_init(); + + if (rte_eal_intr_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + return -1; + } + + /* Put mp channel init before bus scan so that we can init the vdev + * bus through mp channel in the secondary process before the bus scan. + */ + if (rte_mp_channel_init() < 0) { + rte_eal_init_alert("failed to init mp channel\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_errno = EFAULT; + return -1; + } + } + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; @@ -798,13 +884,17 @@ rte_eal_init(int argc, char **argv) "KNI module inserted\n"); } - if (internal_config.no_hugetlbfs == 0 && - internal_config.process_type != RTE_PROC_SECONDARY && - eal_hugepage_info_init() < 0) { - rte_eal_init_alert("Cannot get hugepage information."); - rte_errno = EACCES; - rte_atomic32_clear(&run_once); - return -1; + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? + eal_hugepage_info_init() : + eal_hugepage_info_read(); + if (ret < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { @@ -825,8 +915,6 @@ rte_eal_init(int argc, char **argv) rte_srand(rte_rdtsc()); - rte_config_init(); - if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) { rte_eal_init_alert("Cannot init logging."); rte_errno = ENOMEM; @@ -834,14 +922,6 @@ rte_eal_init(int argc, char **argv) return -1; } - if (rte_mp_channel_init() < 0) { - rte_eal_init_alert("failed to init mp channel\n"); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - rte_errno = EFAULT; - return -1; - } - } - #ifdef VFIO_PRESENT if (rte_eal_vfio_setup() < 0) { rte_eal_init_alert("Cannot init VFIO\n"); @@ -850,6 +930,15 @@ rte_eal_init(int argc, char **argv) return -1; } #endif + /* in secondary processes, memory init may allocate additional fbarrays + * not present in primary processes, so to avoid any potential issues, + * initialize memzones first. + */ + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; + } if (rte_eal_memory_init() < 0) { rte_eal_init_alert("Cannot init memory\n"); @@ -860,8 +949,8 @@ rte_eal_init(int argc, char **argv) /* the directories are locked during eal_hugepage_info_init */ eal_hugedirs_unlock(); - if (rte_eal_memzone_init() < 0) { - rte_eal_init_alert("Cannot init memzone\n"); + if (rte_eal_malloc_heap_init() < 0) { + rte_eal_init_alert("Cannot init malloc heap\n"); rte_errno = ENODEV; return -1; } @@ -888,17 +977,12 @@ rte_eal_init(int argc, char **argv) eal_thread_init_master(rte_config.master_lcore); - ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset)); RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n", rte_config.master_lcore, (int)thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_intr_init() < 0) { - rte_eal_init_alert("Cannot init interrupt-handling thread\n"); - return -1; - } - RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -919,7 +1003,7 @@ rte_eal_init(int argc, char **argv) rte_panic("Cannot create thread\n"); /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + snprintf(thread_name, sizeof(thread_name), "lcore-slave-%d", i); ret = rte_thread_setname(lcore_config[i].thread_id, thread_name); @@ -950,6 +1034,12 @@ rte_eal_init(int argc, char **argv) return -1; } +#ifdef VFIO_PRESENT + /* Register mp action after probe() so that we got enough info */ + if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0) + return -1; +#endif + /* initialize default service/lcore mappings and start running. Ignore * -ENOTSUP, as it indicates no service coremask passed to EAL. */ @@ -964,9 +1054,26 @@ rte_eal_init(int argc, char **argv) return fctret; } +static int +mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms, + void *arg __rte_unused) +{ + /* ms is const, so find this memseg */ + struct rte_memseg *found = rte_mem_virt2memseg(ms->addr, msl); + + found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE; + + return 0; +} + int __rte_experimental rte_eal_cleanup(void) { + /* if we're in a primary process, we need to mark hugepages as freeable + * so that finalization can release them back to the system. + */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_memseg_walk(mark_freeable, NULL); rte_service_finalize(); return 0; } diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c index c115e823..391d2a65 100644 --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -91,7 +90,7 @@ eal_alarm_callback(void *arg __rte_unused) rte_spinlock_lock(&alarm_list_lk); LIST_REMOVE(ap, next); - rte_free(ap); + free(ap); } if (!LIST_EMPTY(&alarm_list)) { @@ -122,7 +121,7 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg) if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL) return -EINVAL; - new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0); + new_alarm = calloc(1, sizeof(*new_alarm)); if (new_alarm == NULL) return -ENOMEM; @@ -196,7 +195,7 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) if (ap->executing == 0) { LIST_REMOVE(ap, next); - rte_free(ap); + free(ap); count++; } else { /* If calling from other context, mark that alarm is executing @@ -220,7 +219,7 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) if (ap->executing == 0) { LIST_REMOVE(ap, next); - rte_free(ap); + free(ap); count++; ap = ap_prev; } else if (pthread_equal(ap->executing_id, pthread_self()) == 0) diff --git a/lib/librte_eal/linuxapp/eal/eal_cpuflags.c b/lib/librte_eal/linuxapp/eal/eal_cpuflags.c new file mode 100644 index 00000000..d38296e1 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_cpuflags.c @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 Red Hat, Inc. + */ + +#include +#include +#include +#include +#include +#include + +#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) +#if __GLIBC_PREREQ(2, 16) +#include +#define HAS_AUXV 1 +#endif +#endif + +#include + +#ifndef HAS_AUXV +static unsigned long +getauxval(unsigned long type __rte_unused) +{ + errno = ENOTSUP; + return 0; +} +#endif + +#ifdef RTE_ARCH_64 +typedef Elf64_auxv_t Internal_Elfx_auxv_t; +#else +typedef Elf32_auxv_t Internal_Elfx_auxv_t; +#endif + +/** + * Provides a method for retrieving values from the auxiliary vector and + * possibly running a string comparison. + * + * @return Always returns a result. When the result is 0, check errno + * to see if an error occurred during processing. + */ +static unsigned long +_rte_cpu_getauxval(unsigned long type, const char *str) +{ + unsigned long val; + + errno = 0; + val = getauxval(type); + + if (!val && (errno == ENOTSUP || errno == ENOENT)) { + int auxv_fd = open("/proc/self/auxv", O_RDONLY); + Internal_Elfx_auxv_t auxv; + + if (auxv_fd == -1) + return 0; + + errno = ENOENT; + while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { + if (auxv.a_type == type) { + errno = 0; + val = auxv.a_un.a_val; + if (str) + val = strcmp((const char *)val, str); + break; + } + } + close(auxv_fd); + } + + return val; +} + +unsigned long +rte_cpu_getauxval(unsigned long type) +{ + return _rte_cpu_getauxval(type, NULL); +} + +int +rte_cpu_strcmp_auxval(unsigned long type, const char *str) +{ + return _rte_cpu_getauxval(type, str); +} diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c new file mode 100644 index 00000000..1cf6aebf --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +static struct rte_intr_handle intr_handle = {.fd = -1 }; +static bool monitor_started; + +#define EAL_UEV_MSG_LEN 4096 +#define EAL_UEV_MSG_ELEM_LEN 128 + +static void dev_uev_handler(__rte_unused void *param); + +/* identify the system layer which reports this event. */ +enum eal_dev_event_subsystem { + EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */ + EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */ + EAL_DEV_EVENT_SUBSYSTEM_VFIO, /* VFIO driver device event */ + EAL_DEV_EVENT_SUBSYSTEM_MAX +}; + +static int +dev_uev_socket_fd_create(void) +{ + struct sockaddr_nl addr; + int ret; + + intr_handle.fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC | + SOCK_NONBLOCK, + NETLINK_KOBJECT_UEVENT); + if (intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "create uevent fd failed.\n"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + addr.nl_pid = 0; + addr.nl_groups = 0xffffffff; + + ret = bind(intr_handle.fd, (struct sockaddr *) &addr, sizeof(addr)); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Failed to bind uevent socket.\n"); + goto err; + } + + return 0; +err: + close(intr_handle.fd); + intr_handle.fd = -1; + return ret; +} + +static int +dev_uev_parse(const char *buf, struct rte_dev_event *event, int length) +{ + char action[EAL_UEV_MSG_ELEM_LEN]; + char subsystem[EAL_UEV_MSG_ELEM_LEN]; + char pci_slot_name[EAL_UEV_MSG_ELEM_LEN]; + int i = 0; + + memset(action, 0, EAL_UEV_MSG_ELEM_LEN); + memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN); + memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN); + + while (i < length) { + for (; i < length; i++) { + if (*buf) + break; + buf++; + } + /** + * check device uevent from kernel side, no need to check + * uevent from udev. + */ + if (!strncmp(buf, "libudev", 7)) { + buf += 7; + i += 7; + return -1; + } + if (!strncmp(buf, "ACTION=", 7)) { + buf += 7; + i += 7; + strlcpy(action, buf, sizeof(action)); + } else if (!strncmp(buf, "SUBSYSTEM=", 10)) { + buf += 10; + i += 10; + strlcpy(subsystem, buf, sizeof(subsystem)); + } else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) { + buf += 14; + i += 14; + strlcpy(pci_slot_name, buf, sizeof(subsystem)); + event->devname = strdup(pci_slot_name); + } + for (; i < length; i++) { + if (*buf == '\0') + break; + buf++; + } + } + + /* parse the subsystem layer */ + if (!strncmp(subsystem, "uio", 3)) + event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO; + else if (!strncmp(subsystem, "pci", 3)) + event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI; + else if (!strncmp(subsystem, "vfio", 4)) + event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_VFIO; + else + return -1; + + /* parse the action type */ + if (!strncmp(action, "add", 3)) + event->type = RTE_DEV_EVENT_ADD; + else if (!strncmp(action, "remove", 6)) + event->type = RTE_DEV_EVENT_REMOVE; + else + return -1; + return 0; +} + +static void +dev_delayed_unregister(void *param) +{ + rte_intr_callback_unregister(&intr_handle, dev_uev_handler, param); + close(intr_handle.fd); + intr_handle.fd = -1; +} + +static void +dev_uev_handler(__rte_unused void *param) +{ + struct rte_dev_event uevent; + int ret; + char buf[EAL_UEV_MSG_LEN]; + + memset(&uevent, 0, sizeof(struct rte_dev_event)); + memset(buf, 0, EAL_UEV_MSG_LEN); + + ret = recv(intr_handle.fd, buf, EAL_UEV_MSG_LEN, MSG_DONTWAIT); + if (ret < 0 && errno == EAGAIN) + return; + else if (ret <= 0) { + /* connection is closed or broken, can not up again. */ + RTE_LOG(ERR, EAL, "uevent socket connection is broken.\n"); + rte_eal_alarm_set(1, dev_delayed_unregister, NULL); + return; + } + + ret = dev_uev_parse(buf, &uevent, EAL_UEV_MSG_LEN); + if (ret < 0) { + RTE_LOG(DEBUG, EAL, "It is not an valid event " + "that need to be handle.\n"); + return; + } + + RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n", + uevent.devname, uevent.type, uevent.subsystem); + + if (uevent.devname) + dev_callback_process(uevent.devname, uevent.type); +} + +int __rte_experimental +rte_dev_event_monitor_start(void) +{ + int ret; + + if (monitor_started) + return 0; + + ret = dev_uev_socket_fd_create(); + if (ret) { + RTE_LOG(ERR, EAL, "error create device event fd.\n"); + return -1; + } + + intr_handle.type = RTE_INTR_HANDLE_DEV_EVENT; + ret = rte_intr_callback_register(&intr_handle, dev_uev_handler, NULL); + + if (ret) { + RTE_LOG(ERR, EAL, "fail to register uevent callback.\n"); + return -1; + } + + monitor_started = true; + + return 0; +} + +int __rte_experimental +rte_dev_event_monitor_stop(void) +{ + int ret; + + if (!monitor_started) + return 0; + + ret = rte_intr_callback_unregister(&intr_handle, dev_uev_handler, + (void *)-1); + if (ret < 0) { + RTE_LOG(ERR, EAL, "fail to unregister uevent callback.\n"); + return ret; + } + + close(intr_handle.fd); + intr_handle.fd = -1; + monitor_started = false; + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 8bbf771a..3a7d4b22 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -14,7 +14,11 @@ #include #include #include +#include #include +#include + +#include /* for hugetlb-related flags */ #include #include @@ -30,6 +34,40 @@ #include "eal_filesystem.h" static const char sys_dir_path[] = "/sys/kernel/mm/hugepages"; +static const char sys_pages_numa_dir_path[] = "/sys/devices/system/node"; + +/* + * Uses mmap to create a shared memory area for storage of data + * Used in this file to store the hugepage file map on disk + */ +static void * +map_shared_memory(const char *filename, const size_t mem_size, int flags) +{ + void *retval; + int fd = open(filename, flags, 0666); + if (fd < 0) + return NULL; + if (ftruncate(fd, mem_size) < 0) { + close(fd); + return NULL; + } + retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); + return retval; +} + +static void * +open_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR); +} + +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT); +} /* this function is only called from eal_hugepage_info_init which itself * is only called from a primary process */ @@ -70,6 +108,45 @@ get_num_hugepages(const char *subdir) return num_pages; } +static uint32_t +get_num_hugepages_on_node(const char *subdir, unsigned int socket) +{ + char path[PATH_MAX], socketpath[PATH_MAX]; + DIR *socketdir; + unsigned long num_pages = 0; + const char *nr_hp_file = "free_hugepages"; + + snprintf(socketpath, sizeof(socketpath), "%s/node%u/hugepages", + sys_pages_numa_dir_path, socket); + + socketdir = opendir(socketpath); + if (socketdir) { + /* Keep calm and carry on */ + closedir(socketdir); + } else { + /* Can't find socket dir, so ignore it */ + return 0; + } + + snprintf(path, sizeof(path), "%s/%s/%s", + socketpath, subdir, nr_hp_file); + if (eal_parse_sysfs_value(path, &num_pages) < 0) + return 0; + + if (num_pages == 0) + RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n", + subdir); + + /* + * we want to return a uint32_t and more than this looks suspicious + * anyway ... + */ + if (num_pages > UINT32_MAX) + num_pages = UINT32_MAX; + + return num_pages; +} + static uint64_t get_default_hp_size(void) { @@ -94,8 +171,8 @@ get_default_hp_size(void) return size; } -static const char * -get_hugepage_dir(uint64_t hugepage_sz) +static int +get_hugepage_dir(uint64_t hugepage_sz, char *hugedir, int len) { enum proc_mount_fieldnames { DEVICE = 0, @@ -113,7 +190,7 @@ get_hugepage_dir(uint64_t hugepage_sz) const char split_tok = ' '; char *splitstr[_FIELDNAME_MAX]; char buf[BUFSIZ]; - char *retval = NULL; + int retval = -1; FILE *fd = fopen(proc_mounts, "r"); if (fd == NULL) @@ -140,7 +217,8 @@ get_hugepage_dir(uint64_t hugepage_sz) /* if no explicit page size, the default page size is compared */ if (pagesz_str == NULL){ if (hugepage_sz == default_size){ - retval = strdup(splitstr[MOUNTPT]); + strlcpy(hugedir, splitstr[MOUNTPT], len); + retval = 0; break; } } @@ -148,7 +226,8 @@ get_hugepage_dir(uint64_t hugepage_sz) else { uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]); if (pagesz == hugepage_sz) { - retval = strdup(splitstr[MOUNTPT]); + strlcpy(hugedir, splitstr[MOUNTPT], len); + retval = 0; break; } } @@ -207,11 +286,9 @@ clear_hugedir(const char * hugedir) /* non-blocking lock */ lck_result = flock(fd, LOCK_EX | LOCK_NB); - /* if lock succeeds, unlock and remove the file */ - if (lck_result != -1) { - flock(fd, LOCK_UN); + /* if lock succeeds, remove the file */ + if (lck_result != -1) unlinkat(dir_fd, dirent->d_name, 0); - } close (fd); dirent = readdir(dir); } @@ -238,17 +315,49 @@ compare_hpi(const void *a, const void *b) return hpi_b->hugepage_sz - hpi_a->hugepage_sz; } -/* - * when we initialize the hugepage info, everything goes - * to socket 0 by default. it will later get sorted by memory - * initialization procedure. - */ -int -eal_hugepage_info_init(void) +static void +calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent) { - const char dirent_start_text[] = "hugepages-"; + uint64_t total_pages = 0; + unsigned int i; + + /* + * first, try to put all hugepages into relevant sockets, but + * if first attempts fails, fall back to collecting all pages + * in one socket and sorting them later + */ + total_pages = 0; + /* we also don't want to do this for legacy init */ + if (!internal_config.legacy_mem) + for (i = 0; i < rte_socket_count(); i++) { + int socket = rte_socket_id_by_idx(i); + unsigned int num_pages = + get_num_hugepages_on_node( + dirent->d_name, socket); + hpi->num_pages[socket] = num_pages; + total_pages += num_pages; + } + /* + * we failed to sort memory from the get go, so fall + * back to old way + */ + if (total_pages == 0) { + hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + +#ifndef RTE_ARCH_64 + /* for 32-bit systems, limit number of hugepages to + * 1GB per page size */ + hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0], + RTE_PGSIZE_1G / hpi->hugepage_sz); +#endif + } +} + +static int +hugepage_info_init(void) +{ const char dirent_start_text[] = "hugepages-"; const size_t dirent_start_len = sizeof(dirent_start_text) - 1; - unsigned i, num_sizes = 0; + unsigned int i, num_sizes = 0; DIR *dir; struct dirent *dirent; @@ -273,10 +382,10 @@ eal_hugepage_info_init(void) hpi = &internal_config.hugepage_info[num_sizes]; hpi->hugepage_sz = rte_str_to_size(&dirent->d_name[dirent_start_len]); - hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz); /* first, check if we have a mountpoint */ - if (hpi->hugedir == NULL) { + if (get_hugepage_dir(hpi->hugepage_sz, + hpi->hugedir, sizeof(hpi->hugedir)) < 0) { uint32_t num_pages; num_pages = get_num_hugepages(dirent->d_name); @@ -286,6 +395,22 @@ eal_hugepage_info_init(void) "%" PRIu64 " reserved, but no mounted " "hugetlbfs found for that size\n", num_pages, hpi->hugepage_sz); + /* if we have kernel support for reserving hugepages + * through mmap, and we're in in-memory mode, treat this + * page size as valid. we cannot be in legacy mode at + * this point because we've checked this earlier in the + * init process. + */ +#ifdef MAP_HUGE_SHIFT + if (internal_config.in_memory) { + RTE_LOG(DEBUG, EAL, "In-memory mode enabled, " + "hugepages of size %" PRIu64 " bytes " + "will be allocated anonymously\n", + hpi->hugepage_sz); + calc_num_pages(hpi, dirent); + num_sizes++; + } +#endif continue; } @@ -302,16 +427,7 @@ eal_hugepage_info_init(void) if (clear_hugedir(hpi->hugedir) == -1) break; - /* for now, put all pages into socket 0, - * later they will be sorted */ - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); - -#ifndef RTE_ARCH_64 - /* for 32-bit systems, limit number of hugepages to - * 1GB per page size */ - hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0], - RTE_PGSIZE_1G / hpi->hugepage_sz); -#endif + calc_num_pages(hpi, dirent); num_sizes++; } @@ -328,11 +444,82 @@ eal_hugepage_info_init(void) sizeof(internal_config.hugepage_info[0]), compare_hpi); /* now we have all info, check we have at least one valid size */ - for (i = 0; i < num_sizes; i++) - if (internal_config.hugepage_info[i].hugedir != NULL && - internal_config.hugepage_info[i].num_pages[0] > 0) + for (i = 0; i < num_sizes; i++) { + /* pages may no longer all be on socket 0, so check all */ + unsigned int j, num_pages = 0; + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) + num_pages += hpi->num_pages[j]; + if (num_pages > 0) return 0; + } /* no valid hugepage mounts available, return error */ return -1; } + +/* + * when we initialize the hugepage info, everything goes + * to socket 0 by default. it will later get sorted by memory + * initialization procedure. + */ +int +eal_hugepage_info_init(void) +{ + struct hugepage_info *hpi, *tmp_hpi; + unsigned int i; + + if (hugepage_info_init() < 0) + return -1; + + /* for no shared files mode, we're done */ + if (internal_config.no_shconf) + return 0; + + hpi = &internal_config.hugepage_info[0]; + + tmp_hpi = create_shared_memory(eal_hugepage_info_path(), + sizeof(internal_config.hugepage_info)); + if (tmp_hpi == NULL) { + RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); + return -1; + } + + memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info)); + + /* we've copied file descriptors along with everything else, but they + * will be invalid in secondary process, so overwrite them + */ + for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) { + struct hugepage_info *tmp = &tmp_hpi[i]; + tmp->lock_descriptor = -1; + } + + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { + RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); + return -1; + } + return 0; +} + +int eal_hugepage_info_read(void) +{ + struct hugepage_info *hpi = &internal_config.hugepage_info[0]; + struct hugepage_info *tmp_hpi; + + tmp_hpi = open_shared_memory(eal_hugepage_info_path(), + sizeof(internal_config.hugepage_info)); + if (tmp_hpi == NULL) { + RTE_LOG(ERR, EAL, "Failed to open shared memory!\n"); + return -1; + } + + memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info)); + + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { + RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); + return -1; + } + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index f86f22f7..4076c6d6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -405,8 +404,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle, } /* allocate a new interrupt callback entity */ - callback = rte_zmalloc("interrupt callback list", - sizeof(*callback), 0); + callback = calloc(1, sizeof(*callback)); if (callback == NULL) { RTE_LOG(ERR, EAL, "Can not allocate memory\n"); return -ENOMEM; @@ -420,7 +418,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle, TAILQ_FOREACH(src, &intr_sources, next) { if (src->intr_handle.fd == intr_handle->fd) { /* we had no interrupts for this */ - if TAILQ_EMPTY(&src->callbacks) + if (TAILQ_EMPTY(&src->callbacks)) wake_thread = 1; TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); @@ -431,10 +429,10 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle, /* no existing callbacks for this - add new source */ if (src == NULL) { - if ((src = rte_zmalloc("interrupt source list", - sizeof(*src), 0)) == NULL) { + src = calloc(1, sizeof(*src)); + if (src == NULL) { RTE_LOG(ERR, EAL, "Can not allocate memory\n"); - rte_free(callback); + free(callback); ret = -ENOMEM; } else { src->intr_handle = *intr_handle; @@ -501,7 +499,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || cb->cb_arg == cb_arg)) { TAILQ_REMOVE(&src->callbacks, cb, next); - rte_free(cb); + free(cb); ret++; } } @@ -509,7 +507,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, /* all callbacks for that source are removed. */ if (TAILQ_EMPTY(&src->callbacks)) { TAILQ_REMOVE(&intr_sources, src, next); - rte_free(src); + free(src); } } @@ -559,6 +557,9 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) return -1; break; #endif + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; /* unknown handle type */ default: RTE_LOG(ERR, EAL, @@ -606,6 +607,9 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return -1; break; #endif + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; /* unknown handle type */ default: RTE_LOG(ERR, EAL, @@ -674,7 +678,10 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) bytes_read = 0; call = true; break; - + case RTE_INTR_HANDLE_DEV_EVENT: + bytes_read = 0; + call = true; + break; default: bytes_read = 1; break; @@ -844,8 +851,7 @@ eal_intr_thread_main(__rte_unused void *arg) int rte_eal_intr_init(void) { - int ret = 0, ret_1 = 0; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; + int ret = 0; /* init the global interrupt source head */ TAILQ_INIT(&intr_sources); @@ -860,23 +866,15 @@ rte_eal_intr_init(void) } /* create the host thread to wait/handle the interrupt */ - ret = pthread_create(&intr_thread, NULL, + ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL, eal_intr_thread_main, NULL); if (ret != 0) { - rte_errno = ret; + rte_errno = -ret; RTE_LOG(ERR, EAL, "Failed to create thread for interrupt handling\n"); - } else { - /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, - "eal-intr-thread"); - ret_1 = rte_thread_setname(intr_thread, thread_name); - if (ret_1 != 0) - RTE_LOG(DEBUG, EAL, - "Failed to set thread name for interrupt handling\n"); } - return -ret; + return ret; } static void diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c index ff145884..9d02dddb 100644 --- a/lib/librte_eal/linuxapp/eal/eal_log.c +++ b/lib/librte_eal/linuxapp/eal/eal_log.c @@ -25,25 +25,14 @@ static ssize_t console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) { - char copybuf[BUFSIZ + 1]; ssize_t ret; - uint32_t loglevel; /* write on stdout */ ret = fwrite(buf, 1, size, stdout); fflush(stdout); - /* truncate message if too big (should not happen) */ - if (size > BUFSIZ) - size = BUFSIZ; - /* Syslog error levels are from 0 to 7, so subtract 1 to convert */ - loglevel = rte_log_cur_msg_loglevel() - 1; - memcpy(copybuf, buf, size); - copybuf[size] = '\0'; - - /* write on syslog too */ - syslog(loglevel, "%s", copybuf); + syslog(rte_log_cur_msg_loglevel() - 1, "%.*s", (int)size, buf); return ret; } diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c new file mode 100644 index 00000000..aa95551a --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c @@ -0,0 +1,1363 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#define _FILE_OFFSET_BITS 64 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES +#include +#include +#endif +#include +#include /* for hugetlb-related mmap flags */ + +#include +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_internal_cfg.h" +#include "eal_memalloc.h" +#include "eal_private.h" + +const int anonymous_hugepages_supported = +#ifdef MAP_HUGE_SHIFT + 1; +#define RTE_MAP_HUGE_SHIFT MAP_HUGE_SHIFT +#else + 0; +#define RTE_MAP_HUGE_SHIFT 26 +#endif + +/* + * not all kernel version support fallocate on hugetlbfs, so fall back to + * ftruncate and disallow deallocation if fallocate is not supported. + */ +static int fallocate_supported = -1; /* unknown */ + +/* for single-file segments, we need some kind of mechanism to keep track of + * which hugepages can be freed back to the system, and which cannot. we cannot + * use flock() because they don't allow locking parts of a file, and we cannot + * use fcntl() due to issues with their semantics, so we will have to rely on a + * bunch of lockfiles for each page. + * + * we cannot know how many pages a system will have in advance, but we do know + * that they come in lists, and we know lengths of these lists. so, simply store + * a malloc'd array of fd's indexed by list and segment index. + * + * they will be initialized at startup, and filled as we allocate/deallocate + * segments. also, use this to track memseg list proper fd. + */ +static struct { + int *fds; /**< dynamically allocated array of segment lock fd's */ + int memseg_list_fd; /**< memseg list fd */ + int len; /**< total length of the array */ + int count; /**< entries used in an array */ +} lock_fds[RTE_MAX_MEMSEG_LISTS]; + +/** local copy of a memory map, used to synchronize memory hotplug in MP */ +static struct rte_memseg_list local_memsegs[RTE_MAX_MEMSEG_LISTS]; + +static sigjmp_buf huge_jmpenv; + +static void __rte_unused huge_sigbus_handler(int signo __rte_unused) +{ + siglongjmp(huge_jmpenv, 1); +} + +/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile, + * non-static local variable in the stack frame calling sigsetjmp might be + * clobbered by a call to longjmp. + */ +static int __rte_unused huge_wrap_sigsetjmp(void) +{ + return sigsetjmp(huge_jmpenv, 1); +} + +static struct sigaction huge_action_old; +static int huge_need_recover; + +static void __rte_unused +huge_register_sigbus(void) +{ + sigset_t mask; + struct sigaction action; + + sigemptyset(&mask); + sigaddset(&mask, SIGBUS); + action.sa_flags = 0; + action.sa_mask = mask; + action.sa_handler = huge_sigbus_handler; + + huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old); +} + +static void __rte_unused +huge_recover_sigbus(void) +{ + if (huge_need_recover) { + sigaction(SIGBUS, &huge_action_old, NULL); + huge_need_recover = 0; + } +} + +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES +static bool +check_numa(void) +{ + bool ret = true; + /* Check if kernel supports NUMA. */ + if (numa_available() != 0) { + RTE_LOG(DEBUG, EAL, "NUMA is not supported.\n"); + ret = false; + } + return ret; +} + +static void +prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id) +{ + RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n"); + if (get_mempolicy(oldpolicy, oldmask->maskp, + oldmask->size + 1, 0, 0) < 0) { + RTE_LOG(ERR, EAL, + "Failed to get current mempolicy: %s. " + "Assuming MPOL_DEFAULT.\n", strerror(errno)); + oldpolicy = MPOL_DEFAULT; + } + RTE_LOG(DEBUG, EAL, + "Setting policy MPOL_PREFERRED for socket %d\n", + socket_id); + numa_set_preferred(socket_id); +} + +static void +restore_numa(int *oldpolicy, struct bitmask *oldmask) +{ + RTE_LOG(DEBUG, EAL, + "Restoring previous memory policy: %d\n", *oldpolicy); + if (*oldpolicy == MPOL_DEFAULT) { + numa_set_localalloc(); + } else if (set_mempolicy(*oldpolicy, oldmask->maskp, + oldmask->size + 1) < 0) { + RTE_LOG(ERR, EAL, "Failed to restore mempolicy: %s\n", + strerror(errno)); + numa_set_localalloc(); + } + numa_free_cpumask(oldmask); +} +#endif + +/* + * uses fstat to report the size of a file on disk + */ +static off_t +get_file_size(int fd) +{ + struct stat st; + if (fstat(fd, &st) < 0) + return 0; + return st.st_size; +} + +/* returns 1 on successful lock, 0 on unsuccessful lock, -1 on error */ +static int lock(int fd, int type) +{ + int ret; + + /* flock may be interrupted */ + do { + ret = flock(fd, type | LOCK_NB); + } while (ret && errno == EINTR); + + if (ret && errno == EWOULDBLOCK) { + /* couldn't lock */ + return 0; + } else if (ret) { + RTE_LOG(ERR, EAL, "%s(): error calling flock(): %s\n", + __func__, strerror(errno)); + return -1; + } + /* lock was successful */ + return 1; +} + +static int get_segment_lock_fd(int list_idx, int seg_idx) +{ + char path[PATH_MAX] = {0}; + int fd; + + if (list_idx < 0 || list_idx >= (int)RTE_DIM(lock_fds)) + return -1; + if (seg_idx < 0 || seg_idx >= lock_fds[list_idx].len) + return -1; + + fd = lock_fds[list_idx].fds[seg_idx]; + /* does this lock already exist? */ + if (fd >= 0) + return fd; + + eal_get_hugefile_lock_path(path, sizeof(path), + list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx); + + fd = open(path, O_CREAT | O_RDWR, 0660); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error creating lockfile '%s': %s\n", + __func__, path, strerror(errno)); + return -1; + } + /* take out a read lock */ + if (lock(fd, LOCK_SH) != 1) { + RTE_LOG(ERR, EAL, "%s(): failed to take out a readlock on '%s': %s\n", + __func__, path, strerror(errno)); + close(fd); + return -1; + } + /* store it for future reference */ + lock_fds[list_idx].fds[seg_idx] = fd; + lock_fds[list_idx].count++; + return fd; +} + +static int unlock_segment(int list_idx, int seg_idx) +{ + int fd, ret; + + if (list_idx < 0 || list_idx >= (int)RTE_DIM(lock_fds)) + return -1; + if (seg_idx < 0 || seg_idx >= lock_fds[list_idx].len) + return -1; + + fd = lock_fds[list_idx].fds[seg_idx]; + + /* upgrade lock to exclusive to see if we can remove the lockfile */ + ret = lock(fd, LOCK_EX); + if (ret == 1) { + /* we've succeeded in taking exclusive lock, this lockfile may + * be removed. + */ + char path[PATH_MAX] = {0}; + eal_get_hugefile_lock_path(path, sizeof(path), + list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx); + if (unlink(path)) { + RTE_LOG(ERR, EAL, "%s(): error removing lockfile '%s': %s\n", + __func__, path, strerror(errno)); + } + } + /* we don't want to leak the fd, so even if we fail to lock, close fd + * and remove it from list anyway. + */ + close(fd); + lock_fds[list_idx].fds[seg_idx] = -1; + lock_fds[list_idx].count--; + + if (ret < 0) + return -1; + return 0; +} + +static int +get_seg_fd(char *path, int buflen, struct hugepage_info *hi, + unsigned int list_idx, unsigned int seg_idx) +{ + int fd; + + if (internal_config.single_file_segments) { + /* create a hugepage file path */ + eal_get_hugefile_path(path, buflen, hi->hugedir, list_idx); + + fd = lock_fds[list_idx].memseg_list_fd; + + if (fd < 0) { + fd = open(path, O_CREAT | O_RDWR, 0600); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", + __func__, strerror(errno)); + return -1; + } + /* take out a read lock and keep it indefinitely */ + if (lock(fd, LOCK_SH) < 0) { + RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n", + __func__, strerror(errno)); + close(fd); + return -1; + } + lock_fds[list_idx].memseg_list_fd = fd; + } + } else { + /* create a hugepage file path */ + eal_get_hugefile_path(path, buflen, hi->hugedir, + list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx); + fd = open(path, O_CREAT | O_RDWR, 0600); + if (fd < 0) { + RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, + strerror(errno)); + return -1; + } + /* take out a read lock */ + if (lock(fd, LOCK_SH) < 0) { + RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n", + __func__, strerror(errno)); + close(fd); + return -1; + } + } + return fd; +} + +static int +resize_hugefile(int fd, char *path, int list_idx, int seg_idx, + uint64_t fa_offset, uint64_t page_sz, bool grow) +{ + bool again = false; + do { + if (fallocate_supported == 0) { + /* we cannot deallocate memory if fallocate() is not + * supported, and hugepage file is already locked at + * creation, so no further synchronization needed. + */ + + if (!grow) { + RTE_LOG(DEBUG, EAL, "%s(): fallocate not supported, not freeing page back to the system\n", + __func__); + return -1; + } + uint64_t new_size = fa_offset + page_sz; + uint64_t cur_size = get_file_size(fd); + + /* fallocate isn't supported, fall back to ftruncate */ + if (new_size > cur_size && + ftruncate(fd, new_size) < 0) { + RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n", + __func__, strerror(errno)); + return -1; + } + } else { + int flags = grow ? 0 : FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_KEEP_SIZE; + int ret, lock_fd; + + /* if fallocate() is supported, we need to take out a + * read lock on allocate (to prevent other processes + * from deallocating this page), and take out a write + * lock on deallocate (to ensure nobody else is using + * this page). + * + * read locks on page itself are already taken out at + * file creation, in get_seg_fd(). + * + * we cannot rely on simple use of flock() call, because + * we need to be able to lock a section of the file, + * and we cannot use fcntl() locks, because of numerous + * problems with their semantics, so we will use + * deterministically named lock files for each section + * of the file. + * + * if we're shrinking the file, we want to upgrade our + * lock from shared to exclusive. + * + * lock_fd is an fd for a lockfile, not for the segment + * list. + */ + lock_fd = get_segment_lock_fd(list_idx, seg_idx); + + if (!grow) { + /* we are using this lockfile to determine + * whether this particular page is locked, as we + * are in single file segments mode and thus + * cannot use regular flock() to get this info. + * + * we want to try and take out an exclusive lock + * on the lock file to determine if we're the + * last ones using this page, and if not, we + * won't be shrinking it, and will instead exit + * prematurely. + */ + ret = lock(lock_fd, LOCK_EX); + + /* drop the lock on the lockfile, so that even + * if we couldn't shrink the file ourselves, we + * are signalling to other processes that we're + * no longer using this page. + */ + if (unlock_segment(list_idx, seg_idx)) + RTE_LOG(ERR, EAL, "Could not unlock segment\n"); + + /* additionally, if this was the last lock on + * this segment list, we can safely close the + * page file fd, so that one of the processes + * could then delete the file after shrinking. + */ + if (ret < 1 && lock_fds[list_idx].count == 0) { + close(fd); + lock_fds[list_idx].memseg_list_fd = -1; + } + + if (ret < 0) { + RTE_LOG(ERR, EAL, "Could not lock segment\n"); + return -1; + } + if (ret == 0) + /* failed to lock, not an error. */ + return 0; + } + + /* grow or shrink the file */ + ret = fallocate(fd, flags, fa_offset, page_sz); + + if (ret < 0) { + if (fallocate_supported == -1 && + errno == ENOTSUP) { + RTE_LOG(ERR, EAL, "%s(): fallocate() not supported, hugepage deallocation will be disabled\n", + __func__); + again = true; + fallocate_supported = 0; + } else { + RTE_LOG(DEBUG, EAL, "%s(): fallocate() failed: %s\n", + __func__, + strerror(errno)); + return -1; + } + } else { + fallocate_supported = 1; + + /* we've grew/shrunk the file, and we hold an + * exclusive lock now. check if there are no + * more segments active in this segment list, + * and remove the file if there aren't. + */ + if (lock_fds[list_idx].count == 0) { + if (unlink(path)) + RTE_LOG(ERR, EAL, "%s(): unlinking '%s' failed: %s\n", + __func__, path, + strerror(errno)); + close(fd); + lock_fds[list_idx].memseg_list_fd = -1; + } + } + } + } while (again); + return 0; +} + +static int +alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, + struct hugepage_info *hi, unsigned int list_idx, + unsigned int seg_idx) +{ +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + int cur_socket_id = 0; +#endif + uint64_t map_offset; + rte_iova_t iova; + void *va; + char path[PATH_MAX]; + int ret = 0; + int fd; + size_t alloc_sz; + int flags; + void *new_addr; + + alloc_sz = hi->hugepage_sz; + if (!internal_config.single_file_segments && + internal_config.in_memory && + anonymous_hugepages_supported) { + int log2, flags; + + log2 = rte_log2_u32(alloc_sz); + /* as per mmap() manpage, all page sizes are log2 of page size + * shifted by MAP_HUGE_SHIFT + */ + flags = (log2 << RTE_MAP_HUGE_SHIFT) | MAP_HUGETLB | MAP_FIXED | + MAP_PRIVATE | MAP_ANONYMOUS; + fd = -1; + va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, flags, -1, 0); + + /* single-file segments codepath will never be active because + * in-memory mode is incompatible with it and it's stopped at + * EAL initialization stage, however the compiler doesn't know + * that and complains about map_offset being used uninitialized + * on failure codepaths while having in-memory mode enabled. so, + * assign a value here. + */ + map_offset = 0; + } else { + /* takes out a read lock on segment or segment list */ + fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Couldn't get fd on hugepage file\n"); + return -1; + } + + if (internal_config.single_file_segments) { + map_offset = seg_idx * alloc_sz; + ret = resize_hugefile(fd, path, list_idx, seg_idx, + map_offset, alloc_sz, true); + if (ret < 0) + goto resized; + } else { + map_offset = 0; + if (ftruncate(fd, alloc_sz) < 0) { + RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n", + __func__, strerror(errno)); + goto resized; + } + if (internal_config.hugepage_unlink) { + if (unlink(path)) { + RTE_LOG(DEBUG, EAL, "%s(): unlink() failed: %s\n", + __func__, strerror(errno)); + goto resized; + } + } + } + + /* + * map the segment, and populate page tables, the kernel fills + * this segment with zeros if it's a new page. + */ + va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, + map_offset); + } + + if (va == MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "%s(): mmap() failed: %s\n", __func__, + strerror(errno)); + /* mmap failed, but the previous region might have been + * unmapped anyway. try to remap it + */ + goto unmapped; + } + if (va != addr) { + RTE_LOG(DEBUG, EAL, "%s(): wrong mmap() address\n", __func__); + munmap(va, alloc_sz); + goto resized; + } + + /* In linux, hugetlb limitations, like cgroup, are + * enforced at fault time instead of mmap(), even + * with the option of MAP_POPULATE. Kernel will send + * a SIGBUS signal. To avoid to be killed, save stack + * environment here, if SIGBUS happens, we can jump + * back here. + */ + if (huge_wrap_sigsetjmp()) { + RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more hugepages of size %uMB\n", + (unsigned int)(alloc_sz >> 20)); + goto mapped; + } + + /* we need to trigger a write to the page to enforce page fault and + * ensure that page is accessible to us, but we can't overwrite value + * that is already there, so read the old value, and write itback. + * kernel populates the page with zeroes initially. + */ + *(volatile int *)addr = *(volatile int *)addr; + + iova = rte_mem_virt2iova(addr); + if (iova == RTE_BAD_PHYS_ADDR) { + RTE_LOG(DEBUG, EAL, "%s(): can't get IOVA addr\n", + __func__); + goto mapped; + } + +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + move_pages(getpid(), 1, &addr, NULL, &cur_socket_id, 0); + + if (cur_socket_id != socket_id) { + RTE_LOG(DEBUG, EAL, + "%s(): allocation happened on wrong socket (wanted %d, got %d)\n", + __func__, socket_id, cur_socket_id); + goto mapped; + } +#endif + /* for non-single file segments that aren't in-memory, we can close fd + * here */ + if (!internal_config.single_file_segments && !internal_config.in_memory) + close(fd); + + ms->addr = addr; + ms->hugepage_sz = alloc_sz; + ms->len = alloc_sz; + ms->nchannel = rte_memory_get_nchannel(); + ms->nrank = rte_memory_get_nrank(); + ms->iova = iova; + ms->socket_id = socket_id; + + return 0; + +mapped: + munmap(addr, alloc_sz); +unmapped: + flags = MAP_FIXED; +#ifdef RTE_ARCH_PPC_64 + flags |= MAP_HUGETLB; +#endif + new_addr = eal_get_virtual_area(addr, &alloc_sz, alloc_sz, 0, flags); + if (new_addr != addr) { + if (new_addr != NULL) + munmap(new_addr, alloc_sz); + /* we're leaving a hole in our virtual address space. if + * somebody else maps this hole now, we could accidentally + * override it in the future. + */ + RTE_LOG(CRIT, EAL, "Can't mmap holes in our virtual address space\n"); + } +resized: + /* in-memory mode will never be single-file-segments mode */ + if (internal_config.single_file_segments) { + resize_hugefile(fd, path, list_idx, seg_idx, map_offset, + alloc_sz, false); + /* ignore failure, can't make it any worse */ + } else { + /* only remove file if we can take out a write lock */ + if (internal_config.hugepage_unlink == 0 && + internal_config.in_memory == 0 && + lock(fd, LOCK_EX) == 1) + unlink(path); + close(fd); + } + return -1; +} + +static int +free_seg(struct rte_memseg *ms, struct hugepage_info *hi, + unsigned int list_idx, unsigned int seg_idx) +{ + uint64_t map_offset; + char path[PATH_MAX]; + int fd, ret; + + /* erase page data */ + memset(ms->addr, 0, ms->len); + + if (mmap(ms->addr, ms->len, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == + MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "couldn't unmap page\n"); + return -1; + } + + /* if we've already unlinked the page, nothing needs to be done */ + if (internal_config.hugepage_unlink) { + memset(ms, 0, sizeof(*ms)); + return 0; + } + + /* if we are not in single file segments mode, we're going to unmap the + * segment and thus drop the lock on original fd, but hugepage dir is + * now locked so we can take out another one without races. + */ + fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx); + if (fd < 0) + return -1; + + if (internal_config.single_file_segments) { + map_offset = seg_idx * ms->len; + if (resize_hugefile(fd, path, list_idx, seg_idx, map_offset, + ms->len, false)) + return -1; + ret = 0; + } else { + /* if we're able to take out a write lock, we're the last one + * holding onto this page. + */ + ret = lock(fd, LOCK_EX); + if (ret >= 0) { + /* no one else is using this page */ + if (ret == 1) + unlink(path); + } + /* closing fd will drop the lock */ + close(fd); + } + + memset(ms, 0, sizeof(*ms)); + + return ret < 0 ? -1 : 0; +} + +struct alloc_walk_param { + struct hugepage_info *hi; + struct rte_memseg **ms; + size_t page_sz; + unsigned int segs_allocated; + unsigned int n_segs; + int socket; + bool exact; +}; +static int +alloc_seg_walk(const struct rte_memseg_list *msl, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct alloc_walk_param *wa = arg; + struct rte_memseg_list *cur_msl; + size_t page_sz; + int cur_idx, start_idx, j, dir_fd = -1; + unsigned int msl_idx, need, i; + + if (msl->page_sz != wa->page_sz) + return 0; + if (msl->socket_id != wa->socket) + return 0; + + page_sz = (size_t)msl->page_sz; + + msl_idx = msl - mcfg->memsegs; + cur_msl = &mcfg->memsegs[msl_idx]; + + need = wa->n_segs; + + /* try finding space in memseg list */ + cur_idx = rte_fbarray_find_next_n_free(&cur_msl->memseg_arr, 0, need); + if (cur_idx < 0) + return 0; + start_idx = cur_idx; + + /* do not allow any page allocations during the time we're allocating, + * because file creation and locking operations are not atomic, + * and we might be the first or the last ones to use a particular page, + * so we need to ensure atomicity of every operation. + * + * during init, we already hold a write lock, so don't try to take out + * another one. + */ + if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) { + dir_fd = open(wa->hi->hugedir, O_RDONLY); + if (dir_fd < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n", + __func__, wa->hi->hugedir, strerror(errno)); + return -1; + } + /* blocking writelock */ + if (flock(dir_fd, LOCK_EX)) { + RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n", + __func__, wa->hi->hugedir, strerror(errno)); + close(dir_fd); + return -1; + } + } + + for (i = 0; i < need; i++, cur_idx++) { + struct rte_memseg *cur; + void *map_addr; + + cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx); + map_addr = RTE_PTR_ADD(cur_msl->base_va, + cur_idx * page_sz); + + if (alloc_seg(cur, map_addr, wa->socket, wa->hi, + msl_idx, cur_idx)) { + RTE_LOG(DEBUG, EAL, "attempted to allocate %i segments, but only %i were allocated\n", + need, i); + + /* if exact number wasn't requested, stop */ + if (!wa->exact) + goto out; + + /* clean up */ + for (j = start_idx; j < cur_idx; j++) { + struct rte_memseg *tmp; + struct rte_fbarray *arr = + &cur_msl->memseg_arr; + + tmp = rte_fbarray_get(arr, j); + rte_fbarray_set_free(arr, j); + + /* free_seg may attempt to create a file, which + * may fail. + */ + if (free_seg(tmp, wa->hi, msl_idx, j)) + RTE_LOG(DEBUG, EAL, "Cannot free page\n"); + } + /* clear the list */ + if (wa->ms) + memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs); + + if (dir_fd >= 0) + close(dir_fd); + return -1; + } + if (wa->ms) + wa->ms[i] = cur; + + rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx); + } +out: + wa->segs_allocated = i; + if (i > 0) + cur_msl->version++; + if (dir_fd >= 0) + close(dir_fd); + return 1; +} + +struct free_walk_param { + struct hugepage_info *hi; + struct rte_memseg *ms; +}; +static int +free_seg_walk(const struct rte_memseg_list *msl, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *found_msl; + struct free_walk_param *wa = arg; + uintptr_t start_addr, end_addr; + int msl_idx, seg_idx, ret, dir_fd = -1; + + start_addr = (uintptr_t) msl->base_va; + end_addr = start_addr + msl->memseg_arr.len * (size_t)msl->page_sz; + + if ((uintptr_t)wa->ms->addr < start_addr || + (uintptr_t)wa->ms->addr >= end_addr) + return 0; + + msl_idx = msl - mcfg->memsegs; + seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz; + + /* msl is const */ + found_msl = &mcfg->memsegs[msl_idx]; + + /* do not allow any page allocations during the time we're freeing, + * because file creation and locking operations are not atomic, + * and we might be the first or the last ones to use a particular page, + * so we need to ensure atomicity of every operation. + * + * during init, we already hold a write lock, so don't try to take out + * another one. + */ + if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) { + dir_fd = open(wa->hi->hugedir, O_RDONLY); + if (dir_fd < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n", + __func__, wa->hi->hugedir, strerror(errno)); + return -1; + } + /* blocking writelock */ + if (flock(dir_fd, LOCK_EX)) { + RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n", + __func__, wa->hi->hugedir, strerror(errno)); + close(dir_fd); + return -1; + } + } + + found_msl->version++; + + rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx); + + ret = free_seg(wa->ms, wa->hi, msl_idx, seg_idx); + + if (dir_fd >= 0) + close(dir_fd); + + if (ret < 0) + return -1; + + return 1; +} + +int +eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz, + int socket, bool exact) +{ + int i, ret = -1; +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + bool have_numa = false; + int oldpolicy; + struct bitmask *oldmask; +#endif + struct alloc_walk_param wa; + struct hugepage_info *hi = NULL; + + memset(&wa, 0, sizeof(wa)); + + /* dynamic allocation not supported in legacy mode */ + if (internal_config.legacy_mem) + return -1; + + for (i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) { + if (page_sz == + internal_config.hugepage_info[i].hugepage_sz) { + hi = &internal_config.hugepage_info[i]; + break; + } + } + if (!hi) { + RTE_LOG(ERR, EAL, "%s(): can't find relevant hugepage_info entry\n", + __func__); + return -1; + } + +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (check_numa()) { + oldmask = numa_allocate_nodemask(); + prepare_numa(&oldpolicy, oldmask, socket); + have_numa = true; + } +#endif + + wa.exact = exact; + wa.hi = hi; + wa.ms = ms; + wa.n_segs = n_segs; + wa.page_sz = page_sz; + wa.socket = socket; + wa.segs_allocated = 0; + + /* memalloc is locked, so it's safe to use thread-unsafe version */ + ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa); + if (ret == 0) { + RTE_LOG(ERR, EAL, "%s(): couldn't find suitable memseg_list\n", + __func__); + ret = -1; + } else if (ret > 0) { + ret = (int)wa.segs_allocated; + } + +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (have_numa) + restore_numa(&oldpolicy, oldmask); +#endif + return ret; +} + +struct rte_memseg * +eal_memalloc_alloc_seg(size_t page_sz, int socket) +{ + struct rte_memseg *ms; + if (eal_memalloc_alloc_seg_bulk(&ms, 1, page_sz, socket, true) < 0) + return NULL; + /* return pointer to newly allocated memseg */ + return ms; +} + +int +eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs) +{ + int seg, ret = 0; + + /* dynamic free not supported in legacy mode */ + if (internal_config.legacy_mem) + return -1; + + for (seg = 0; seg < n_segs; seg++) { + struct rte_memseg *cur = ms[seg]; + struct hugepage_info *hi = NULL; + struct free_walk_param wa; + int i, walk_res; + + /* if this page is marked as unfreeable, fail */ + if (cur->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) { + RTE_LOG(DEBUG, EAL, "Page is not allowed to be freed\n"); + ret = -1; + continue; + } + + memset(&wa, 0, sizeof(wa)); + + for (i = 0; i < (int)RTE_DIM(internal_config.hugepage_info); + i++) { + hi = &internal_config.hugepage_info[i]; + if (cur->hugepage_sz == hi->hugepage_sz) + break; + } + if (i == (int)RTE_DIM(internal_config.hugepage_info)) { + RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n"); + ret = -1; + continue; + } + + wa.ms = cur; + wa.hi = hi; + + /* memalloc is locked, so it's safe to use thread-unsafe version + */ + walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk, + &wa); + if (walk_res == 1) + continue; + if (walk_res == 0) + RTE_LOG(ERR, EAL, "Couldn't find memseg list\n"); + ret = -1; + } + return ret; +} + +int +eal_memalloc_free_seg(struct rte_memseg *ms) +{ + /* dynamic free not supported in legacy mode */ + if (internal_config.legacy_mem) + return -1; + + return eal_memalloc_free_seg_bulk(&ms, 1); +} + +static int +sync_chunk(struct rte_memseg_list *primary_msl, + struct rte_memseg_list *local_msl, struct hugepage_info *hi, + unsigned int msl_idx, bool used, int start, int end) +{ + struct rte_fbarray *l_arr, *p_arr; + int i, ret, chunk_len, diff_len; + + l_arr = &local_msl->memseg_arr; + p_arr = &primary_msl->memseg_arr; + + /* we need to aggregate allocations/deallocations into bigger chunks, + * as we don't want to spam the user with per-page callbacks. + * + * to avoid any potential issues, we also want to trigger + * deallocation callbacks *before* we actually deallocate + * memory, so that the user application could wrap up its use + * before it goes away. + */ + + chunk_len = end - start; + + /* find how many contiguous pages we can map/unmap for this chunk */ + diff_len = used ? + rte_fbarray_find_contig_free(l_arr, start) : + rte_fbarray_find_contig_used(l_arr, start); + + /* has to be at least one page */ + if (diff_len < 1) + return -1; + + diff_len = RTE_MIN(chunk_len, diff_len); + + /* if we are freeing memory, notify the application */ + if (!used) { + struct rte_memseg *ms; + void *start_va; + size_t len, page_sz; + + ms = rte_fbarray_get(l_arr, start); + start_va = ms->addr; + page_sz = (size_t)primary_msl->page_sz; + len = page_sz * diff_len; + + eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, + start_va, len); + } + + for (i = 0; i < diff_len; i++) { + struct rte_memseg *p_ms, *l_ms; + int seg_idx = start + i; + + l_ms = rte_fbarray_get(l_arr, seg_idx); + p_ms = rte_fbarray_get(p_arr, seg_idx); + + if (l_ms == NULL || p_ms == NULL) + return -1; + + if (used) { + ret = alloc_seg(l_ms, p_ms->addr, + p_ms->socket_id, hi, + msl_idx, seg_idx); + if (ret < 0) + return -1; + rte_fbarray_set_used(l_arr, seg_idx); + } else { + ret = free_seg(l_ms, hi, msl_idx, seg_idx); + rte_fbarray_set_free(l_arr, seg_idx); + if (ret < 0) + return -1; + } + } + + /* if we just allocated memory, notify the application */ + if (used) { + struct rte_memseg *ms; + void *start_va; + size_t len, page_sz; + + ms = rte_fbarray_get(l_arr, start); + start_va = ms->addr; + page_sz = (size_t)primary_msl->page_sz; + len = page_sz * diff_len; + + eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, + start_va, len); + } + + /* calculate how much we can advance until next chunk */ + diff_len = used ? + rte_fbarray_find_contig_used(l_arr, start) : + rte_fbarray_find_contig_free(l_arr, start); + ret = RTE_MIN(chunk_len, diff_len); + + return ret; +} + +static int +sync_status(struct rte_memseg_list *primary_msl, + struct rte_memseg_list *local_msl, struct hugepage_info *hi, + unsigned int msl_idx, bool used) +{ + struct rte_fbarray *l_arr, *p_arr; + int p_idx, l_chunk_len, p_chunk_len, ret; + int start, end; + + /* this is a little bit tricky, but the basic idea is - walk both lists + * and spot any places where there are discrepancies. walking both lists + * and noting discrepancies in a single go is a hard problem, so we do + * it in two passes - first we spot any places where allocated segments + * mismatch (i.e. ensure that everything that's allocated in the primary + * is also allocated in the secondary), and then we do it by looking at + * free segments instead. + * + * we also need to aggregate changes into chunks, as we have to call + * callbacks per allocation, not per page. + */ + l_arr = &local_msl->memseg_arr; + p_arr = &primary_msl->memseg_arr; + + if (used) + p_idx = rte_fbarray_find_next_used(p_arr, 0); + else + p_idx = rte_fbarray_find_next_free(p_arr, 0); + + while (p_idx >= 0) { + int next_chunk_search_idx; + + if (used) { + p_chunk_len = rte_fbarray_find_contig_used(p_arr, + p_idx); + l_chunk_len = rte_fbarray_find_contig_used(l_arr, + p_idx); + } else { + p_chunk_len = rte_fbarray_find_contig_free(p_arr, + p_idx); + l_chunk_len = rte_fbarray_find_contig_free(l_arr, + p_idx); + } + /* best case scenario - no differences (or bigger, which will be + * fixed during next iteration), look for next chunk + */ + if (l_chunk_len >= p_chunk_len) { + next_chunk_search_idx = p_idx + p_chunk_len; + goto next_chunk; + } + + /* if both chunks start at the same point, skip parts we know + * are identical, and sync the rest. each call to sync_chunk + * will only sync contiguous segments, so we need to call this + * until we are sure there are no more differences in this + * chunk. + */ + start = p_idx + l_chunk_len; + end = p_idx + p_chunk_len; + do { + ret = sync_chunk(primary_msl, local_msl, hi, msl_idx, + used, start, end); + start += ret; + } while (start < end && ret >= 0); + /* if ret is negative, something went wrong */ + if (ret < 0) + return -1; + + next_chunk_search_idx = p_idx + p_chunk_len; +next_chunk: + /* skip to end of this chunk */ + if (used) { + p_idx = rte_fbarray_find_next_used(p_arr, + next_chunk_search_idx); + } else { + p_idx = rte_fbarray_find_next_free(p_arr, + next_chunk_search_idx); + } + } + return 0; +} + +static int +sync_existing(struct rte_memseg_list *primary_msl, + struct rte_memseg_list *local_msl, struct hugepage_info *hi, + unsigned int msl_idx) +{ + int ret, dir_fd; + + /* do not allow any page allocations during the time we're allocating, + * because file creation and locking operations are not atomic, + * and we might be the first or the last ones to use a particular page, + * so we need to ensure atomicity of every operation. + */ + dir_fd = open(hi->hugedir, O_RDONLY); + if (dir_fd < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n", __func__, + hi->hugedir, strerror(errno)); + return -1; + } + /* blocking writelock */ + if (flock(dir_fd, LOCK_EX)) { + RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n", __func__, + hi->hugedir, strerror(errno)); + close(dir_fd); + return -1; + } + + /* ensure all allocated space is the same in both lists */ + ret = sync_status(primary_msl, local_msl, hi, msl_idx, true); + if (ret < 0) + goto fail; + + /* ensure all unallocated space is the same in both lists */ + ret = sync_status(primary_msl, local_msl, hi, msl_idx, false); + if (ret < 0) + goto fail; + + /* update version number */ + local_msl->version = primary_msl->version; + + close(dir_fd); + + return 0; +fail: + close(dir_fd); + return -1; +} + +static int +sync_walk(const struct rte_memseg_list *msl, void *arg __rte_unused) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *primary_msl, *local_msl; + struct hugepage_info *hi = NULL; + unsigned int i; + int msl_idx; + + msl_idx = msl - mcfg->memsegs; + primary_msl = &mcfg->memsegs[msl_idx]; + local_msl = &local_memsegs[msl_idx]; + + for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) { + uint64_t cur_sz = + internal_config.hugepage_info[i].hugepage_sz; + uint64_t msl_sz = primary_msl->page_sz; + if (msl_sz == cur_sz) { + hi = &internal_config.hugepage_info[i]; + break; + } + } + if (!hi) { + RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n"); + return -1; + } + + /* if versions don't match, synchronize everything */ + if (local_msl->version != primary_msl->version && + sync_existing(primary_msl, local_msl, hi, msl_idx)) + return -1; + return 0; +} + + +int +eal_memalloc_sync_with_primary(void) +{ + /* nothing to be done in primary */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + return 0; + + /* memalloc is locked, so it's safe to call thread-unsafe version */ + if (rte_memseg_list_walk_thread_unsafe(sync_walk, NULL)) + return -1; + return 0; +} + +static int +secondary_msl_create_walk(const struct rte_memseg_list *msl, + void *arg __rte_unused) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *primary_msl, *local_msl; + char name[PATH_MAX]; + int msl_idx, ret; + + msl_idx = msl - mcfg->memsegs; + primary_msl = &mcfg->memsegs[msl_idx]; + local_msl = &local_memsegs[msl_idx]; + + /* create distinct fbarrays for each secondary */ + snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i", + primary_msl->memseg_arr.name, getpid()); + + ret = rte_fbarray_init(&local_msl->memseg_arr, name, + primary_msl->memseg_arr.len, + primary_msl->memseg_arr.elt_sz); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Cannot initialize local memory map\n"); + return -1; + } + local_msl->base_va = primary_msl->base_va; + + return 0; +} + +static int +secondary_lock_list_create_walk(const struct rte_memseg_list *msl, + void *arg __rte_unused) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + unsigned int i, len; + int msl_idx; + int *data; + + msl_idx = msl - mcfg->memsegs; + len = msl->memseg_arr.len; + + /* ensure we have space to store lock fd per each possible segment */ + data = malloc(sizeof(int) * len); + if (data == NULL) { + RTE_LOG(ERR, EAL, "Unable to allocate space for lock descriptors\n"); + return -1; + } + /* set all fd's as invalid */ + for (i = 0; i < len; i++) + data[i] = -1; + + lock_fds[msl_idx].fds = data; + lock_fds[msl_idx].len = len; + lock_fds[msl_idx].count = 0; + lock_fds[msl_idx].memseg_list_fd = -1; + + return 0; +} + +int +eal_memalloc_init(void) +{ + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + if (rte_memseg_list_walk(secondary_msl_create_walk, NULL) < 0) + return -1; + + /* initialize all of the lock fd lists */ + if (internal_config.single_file_segments) + if (rte_memseg_list_walk(secondary_lock_list_create_walk, NULL)) + return -1; + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 38853b75..dbf19499 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -28,6 +28,7 @@ #include #endif +#include #include #include #include @@ -39,6 +40,7 @@ #include #include "eal_private.h" +#include "eal_memalloc.h" #include "eal_internal_cfg.h" #include "eal_filesystem.h" #include "eal_hugepages.h" @@ -57,8 +59,6 @@ * zone as well as a physical contiguous zone. */ -static uint64_t baseaddr_offset; - static bool phys_addrs_available = true; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" @@ -66,7 +66,7 @@ static bool phys_addrs_available = true; static void test_phys_addrs_available(void) { - uint64_t tmp; + uint64_t tmp = 0; phys_addr_t physaddr; if (!rte_eal_has_hugepages()) { @@ -221,82 +221,6 @@ aslr_enabled(void) } } -/* - * Try to mmap *size bytes in /dev/zero. If it is successful, return the - * pointer to the mmap'd area and keep *size unmodified. Else, retry - * with a smaller zone: decrease *size by hugepage_sz until it reaches - * 0. In this case, return NULL. Note: this function returns an address - * which is a multiple of hugepage size. - */ -static void * -get_virtual_area(size_t *size, size_t hugepage_sz) -{ - void *addr; - void *addr_hint; - int fd; - long aligned_addr; - - if (internal_config.base_virtaddr != 0) { - int page_size = sysconf(_SC_PAGE_SIZE); - addr_hint = (void *) (uintptr_t) - (internal_config.base_virtaddr + baseaddr_offset); - addr_hint = RTE_PTR_ALIGN_FLOOR(addr_hint, page_size); - } else { - addr_hint = NULL; - } - - RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); - - - fd = open("/dev/zero", O_RDONLY); - if (fd < 0){ - RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); - return NULL; - } - do { - addr = mmap(addr_hint, (*size) + hugepage_sz, PROT_READ, -#ifdef RTE_ARCH_PPC_64 - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -#else - MAP_PRIVATE, -#endif - fd, 0); - if (addr == MAP_FAILED) { - *size -= hugepage_sz; - } else if (addr_hint != NULL && addr != addr_hint) { - RTE_LOG(WARNING, EAL, "WARNING! Base virtual address " - "hint (%p != %p) not respected!\n", - addr_hint, addr); - RTE_LOG(WARNING, EAL, " This may cause issues with " - "mapping memory into secondary processes\n"); - } - } while (addr == MAP_FAILED && *size > 0); - - if (addr == MAP_FAILED) { - close(fd); - RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", - strerror(errno)); - return NULL; - } - - munmap(addr, (*size) + hugepage_sz); - close(fd); - - /* align addr to a huge page size boundary */ - aligned_addr = (long)addr; - aligned_addr += (hugepage_sz - 1); - aligned_addr &= (~(hugepage_sz - 1)); - addr = (void *)(aligned_addr); - - RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", - addr, *size); - - /* increment offset */ - baseaddr_offset += *size; - - return addr; -} - static sigjmp_buf huge_jmpenv; static void huge_sigbus_handler(int signo __rte_unused) @@ -330,13 +254,11 @@ void numa_error(char *where) */ static unsigned map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, - uint64_t *essential_memory __rte_unused, int orig) + uint64_t *essential_memory __rte_unused) { int fd; unsigned i; void *virtaddr; - void *vma_addr = NULL; - size_t vma_len = 0; #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES int node_id = -1; int essential_prev = 0; @@ -351,7 +273,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, have_numa = false; } - if (orig && have_numa) { + if (have_numa) { RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n"); if (get_mempolicy(&oldpolicy, oldmask->maskp, oldmask->size + 1, 0, 0) < 0) { @@ -367,6 +289,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, #endif for (i = 0; i < hpi->num_pages[0]; i++) { + struct hugepage_file *hf = &hugepg_tbl[i]; uint64_t hugepage_sz = hpi->hugepage_sz; #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES @@ -401,57 +324,14 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, } #endif - if (orig) { - hugepg_tbl[i].file_id = i; - hugepg_tbl[i].size = hugepage_sz; - eal_get_hugefile_path(hugepg_tbl[i].filepath, - sizeof(hugepg_tbl[i].filepath), hpi->hugedir, - hugepg_tbl[i].file_id); - hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; - } -#ifndef RTE_ARCH_64 - /* for 32-bit systems, don't remap 1G and 16G pages, just reuse - * original map address as final map address. - */ - else if ((hugepage_sz == RTE_PGSIZE_1G) - || (hugepage_sz == RTE_PGSIZE_16G)) { - hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; - hugepg_tbl[i].orig_va = NULL; - continue; - } -#endif - else if (vma_len == 0) { - unsigned j, num_pages; - - /* reserve a virtual area for next contiguous - * physical block: count the number of - * contiguous physical pages. */ - for (j = i+1; j < hpi->num_pages[0] ; j++) { -#ifdef RTE_ARCH_PPC_64 - /* The physical addresses are sorted in - * descending order on PPC64 */ - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr - hugepage_sz) - break; -#else - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr + hugepage_sz) - break; -#endif - } - num_pages = j - i; - vma_len = num_pages * hugepage_sz; - - /* get the biggest virtual memory area up to - * vma_len. If it fails, vma_addr is NULL, so - * let the kernel provide the address. */ - vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); - if (vma_addr == NULL) - vma_len = hugepage_sz; - } + hf->file_id = i; + hf->size = hugepage_sz; + eal_get_hugefile_path(hf->filepath, sizeof(hf->filepath), + hpi->hugedir, hf->file_id); + hf->filepath[sizeof(hf->filepath) - 1] = '\0'; /* try to create hugepage file */ - fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600); + fd = open(hf->filepath, O_CREAT | O_RDWR, 0600); if (fd < 0) { RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); @@ -459,8 +339,11 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, } /* map the segment, and populate page tables, - * the kernel fills this segment with zeros */ - virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, + * the kernel fills this segment with zeros. we don't care where + * this gets mapped - we already have contiguous memory areas + * ready for us to map into. + */ + virtaddr = mmap(NULL, hugepage_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); if (virtaddr == MAP_FAILED) { RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__, @@ -469,41 +352,33 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, goto out; } - if (orig) { - hugepg_tbl[i].orig_va = virtaddr; - } - else { - hugepg_tbl[i].final_va = virtaddr; - } + hf->orig_va = virtaddr; - if (orig) { - /* In linux, hugetlb limitations, like cgroup, are - * enforced at fault time instead of mmap(), even - * with the option of MAP_POPULATE. Kernel will send - * a SIGBUS signal. To avoid to be killed, save stack - * environment here, if SIGBUS happens, we can jump - * back here. - */ - if (huge_wrap_sigsetjmp()) { - RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more " - "hugepages of size %u MB\n", - (unsigned)(hugepage_sz / 0x100000)); - munmap(virtaddr, hugepage_sz); - close(fd); - unlink(hugepg_tbl[i].filepath); + /* In linux, hugetlb limitations, like cgroup, are + * enforced at fault time instead of mmap(), even + * with the option of MAP_POPULATE. Kernel will send + * a SIGBUS signal. To avoid to be killed, save stack + * environment here, if SIGBUS happens, we can jump + * back here. + */ + if (huge_wrap_sigsetjmp()) { + RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more " + "hugepages of size %u MB\n", + (unsigned int)(hugepage_sz / 0x100000)); + munmap(virtaddr, hugepage_sz); + close(fd); + unlink(hugepg_tbl[i].filepath); #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES - if (maxnode) - essential_memory[node_id] = - essential_prev; + if (maxnode) + essential_memory[node_id] = + essential_prev; #endif - goto out; - } - *(int *)virtaddr = 0; + goto out; } + *(int *)virtaddr = 0; - - /* set shared flock on the file. */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + /* set shared lock on the file. */ + if (flock(fd, LOCK_SH) < 0) { RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n", __func__, strerror(errno)); close(fd); @@ -511,9 +386,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, } close(fd); - - vma_addr = (char *)vma_addr + hugepage_sz; - vma_len -= hugepage_sz; } out: @@ -535,20 +407,6 @@ out: return i; } -/* Unmap all hugepages from original mapping */ -static int -unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) -{ - unsigned i; - for (i = 0; i < hpi->num_pages[0]; i++) { - if (hugepg_tbl[i].orig_va) { - munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz); - hugepg_tbl[i].orig_va = NULL; - } - } - return 0; -} - /* * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge * page. @@ -663,7 +521,18 @@ static void * create_shared_memory(const char *filename, const size_t mem_size) { void *retval; - int fd = open(filename, O_CREAT | O_RDWR, 0666); + int fd; + + /* if no shared files mode is used, create anonymous memory instead */ + if (internal_config.no_shconf) { + retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (retval == MAP_FAILED) + return NULL; + return retval; + } + + fd = open(filename, O_CREAT | O_RDWR, 0666); if (fd < 0) return NULL; if (ftruncate(fd, mem_size) < 0) { @@ -688,7 +557,7 @@ copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size, int src_pos, dst_pos = 0; for (src_pos = 0; src_pos < src_size; src_pos++) { - if (src[src_pos].final_va != NULL) { + if (src[src_pos].orig_va != NULL) { /* error on overflow attempt */ if (dst_pos == dest_size) return -1; @@ -759,9 +628,10 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, unmap_len = hp->size; /* get start addr and len of the remaining segment */ - munmap(hp->final_va, (size_t) unmap_len); + munmap(hp->orig_va, + (size_t)unmap_len); - hp->final_va = NULL; + hp->orig_va = NULL; if (unlink(hp->filepath) == -1) { RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n", __func__, hp->filepath, strerror(errno)); @@ -780,6 +650,436 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, return 0; } +static int +remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl; + struct rte_fbarray *arr; + int cur_page, seg_len; + unsigned int msl_idx; + int ms_idx; + uint64_t page_sz; + size_t memseg_len; + int socket_id; + + page_sz = hugepages[seg_start].size; + socket_id = hugepages[seg_start].socket_id; + seg_len = seg_end - seg_start; + + RTE_LOG(DEBUG, EAL, "Attempting to map %" PRIu64 "M on socket %i\n", + (seg_len * page_sz) >> 20ULL, socket_id); + + /* find free space in memseg lists */ + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + bool empty; + msl = &mcfg->memsegs[msl_idx]; + arr = &msl->memseg_arr; + + if (msl->page_sz != page_sz) + continue; + if (msl->socket_id != socket_id) + continue; + + /* leave space for a hole if array is not empty */ + empty = arr->count == 0; + ms_idx = rte_fbarray_find_next_n_free(arr, 0, + seg_len + (empty ? 0 : 1)); + + /* memseg list is full? */ + if (ms_idx < 0) + continue; + + /* leave some space between memsegs, they are not IOVA + * contiguous, so they shouldn't be VA contiguous either. + */ + if (!empty) + ms_idx++; + break; + } + if (msl_idx == RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n", + RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE), + RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE)); + return -1; + } + +#ifdef RTE_ARCH_PPC64 + /* for PPC64 we go through the list backwards */ + for (cur_page = seg_end - 1; cur_page >= seg_start; + cur_page--, ms_idx++) { +#else + for (cur_page = seg_start; cur_page < seg_end; cur_page++, ms_idx++) { +#endif + struct hugepage_file *hfile = &hugepages[cur_page]; + struct rte_memseg *ms = rte_fbarray_get(arr, ms_idx); + void *addr; + int fd; + + fd = open(hfile->filepath, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open '%s': %s\n", + hfile->filepath, strerror(errno)); + return -1; + } + /* set shared lock on the file. */ + if (flock(fd, LOCK_SH) < 0) { + RTE_LOG(DEBUG, EAL, "Could not lock '%s': %s\n", + hfile->filepath, strerror(errno)); + close(fd); + return -1; + } + memseg_len = (size_t)page_sz; + addr = RTE_PTR_ADD(msl->base_va, ms_idx * memseg_len); + + /* we know this address is already mmapped by memseg list, so + * using MAP_FIXED here is safe + */ + addr = mmap(addr, page_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Couldn't remap '%s': %s\n", + hfile->filepath, strerror(errno)); + close(fd); + return -1; + } + + /* we have a new address, so unmap previous one */ +#ifndef RTE_ARCH_64 + /* in 32-bit legacy mode, we have already unmapped the page */ + if (!internal_config.legacy_mem) + munmap(hfile->orig_va, page_sz); +#else + munmap(hfile->orig_va, page_sz); +#endif + + hfile->orig_va = NULL; + hfile->final_va = addr; + + /* rewrite physical addresses in IOVA as VA mode */ + if (rte_eal_iova_mode() == RTE_IOVA_VA) + hfile->physaddr = (uintptr_t)addr; + + /* set up memseg data */ + ms->addr = addr; + ms->hugepage_sz = page_sz; + ms->len = memseg_len; + ms->iova = hfile->physaddr; + ms->socket_id = hfile->socket_id; + ms->nchannel = rte_memory_get_nchannel(); + ms->nrank = rte_memory_get_nrank(); + + rte_fbarray_set_used(arr, ms_idx); + + close(fd); + } + RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n", + (seg_len * page_sz) >> 20, socket_id); + return 0; +} + +static uint64_t +get_mem_amount(uint64_t page_sz, uint64_t max_mem) +{ + uint64_t area_sz, max_pages; + + /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */ + max_pages = RTE_MAX_MEMSEG_PER_LIST; + max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem); + + area_sz = RTE_MIN(page_sz * max_pages, max_mem); + + /* make sure the list isn't smaller than the page size */ + area_sz = RTE_MAX(area_sz, page_sz); + + return RTE_ALIGN(area_sz, page_sz); +} + +static int +free_memseg_list(struct rte_memseg_list *msl) +{ + if (rte_fbarray_destroy(&msl->memseg_arr)) { + RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n"); + return -1; + } + memset(msl, 0, sizeof(*msl)); + return 0; +} + +#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i" +static int +alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz, + int n_segs, int socket_id, int type_msl_idx) +{ + char name[RTE_FBARRAY_NAME_LEN]; + + snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id, + type_msl_idx); + if (rte_fbarray_init(&msl->memseg_arr, name, n_segs, + sizeof(struct rte_memseg))) { + RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n", + rte_strerror(rte_errno)); + return -1; + } + + msl->page_sz = page_sz; + msl->socket_id = socket_id; + msl->base_va = NULL; + + RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n", + (size_t)page_sz >> 10, socket_id); + + return 0; +} + +static int +alloc_va_space(struct rte_memseg_list *msl) +{ + uint64_t page_sz; + size_t mem_sz; + void *addr; + int flags = 0; + +#ifdef RTE_ARCH_PPC_64 + flags |= MAP_HUGETLB; +#endif + + page_sz = msl->page_sz; + mem_sz = page_sz * msl->memseg_arr.len; + + addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags); + if (addr == NULL) { + if (rte_errno == EADDRNOTAVAIL) + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n", + (unsigned long long)mem_sz, msl->base_va); + else + RTE_LOG(ERR, EAL, "Cannot reserve memory\n"); + return -1; + } + msl->base_va = addr; + + return 0; +} + +/* + * Our VA space is not preallocated yet, so preallocate it here. We need to know + * how many segments there are in order to map all pages into one address space, + * and leave appropriate holes between segments so that rte_malloc does not + * concatenate them into one big segment. + * + * we also need to unmap original pages to free up address space. + */ +static int __rte_unused +prealloc_segments(struct hugepage_file *hugepages, int n_pages) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int cur_page, seg_start_page, end_seg, new_memseg; + unsigned int hpi_idx, socket, i; + int n_contig_segs, n_segs; + int msl_idx; + + /* before we preallocate segments, we need to free up our VA space. + * we're not removing files, and we already have information about + * PA-contiguousness, so it is safe to unmap everything. + */ + for (cur_page = 0; cur_page < n_pages; cur_page++) { + struct hugepage_file *hpi = &hugepages[cur_page]; + munmap(hpi->orig_va, hpi->size); + hpi->orig_va = NULL; + } + + /* we cannot know how many page sizes and sockets we have discovered, so + * loop over all of them + */ + for (hpi_idx = 0; hpi_idx < internal_config.num_hugepage_sizes; + hpi_idx++) { + uint64_t page_sz = + internal_config.hugepage_info[hpi_idx].hugepage_sz; + + for (i = 0; i < rte_socket_count(); i++) { + struct rte_memseg_list *msl; + + socket = rte_socket_id_by_idx(i); + n_contig_segs = 0; + n_segs = 0; + seg_start_page = -1; + + for (cur_page = 0; cur_page < n_pages; cur_page++) { + struct hugepage_file *prev, *cur; + int prev_seg_start_page = -1; + + cur = &hugepages[cur_page]; + prev = cur_page == 0 ? NULL : + &hugepages[cur_page - 1]; + + new_memseg = 0; + end_seg = 0; + + if (cur->size == 0) + end_seg = 1; + else if (cur->socket_id != (int) socket) + end_seg = 1; + else if (cur->size != page_sz) + end_seg = 1; + else if (cur_page == 0) + new_memseg = 1; +#ifdef RTE_ARCH_PPC_64 + /* On PPC64 architecture, the mmap always start + * from higher address to lower address. Here, + * physical addresses are in descending order. + */ + else if ((prev->physaddr - cur->physaddr) != + cur->size) + new_memseg = 1; +#else + else if ((cur->physaddr - prev->physaddr) != + cur->size) + new_memseg = 1; +#endif + if (new_memseg) { + /* if we're already inside a segment, + * new segment means end of current one + */ + if (seg_start_page != -1) { + end_seg = 1; + prev_seg_start_page = + seg_start_page; + } + seg_start_page = cur_page; + } + + if (end_seg) { + if (prev_seg_start_page != -1) { + /* we've found a new segment */ + n_contig_segs++; + n_segs += cur_page - + prev_seg_start_page; + } else if (seg_start_page != -1) { + /* we didn't find new segment, + * but did end current one + */ + n_contig_segs++; + n_segs += cur_page - + seg_start_page; + seg_start_page = -1; + continue; + } else { + /* we're skipping this page */ + continue; + } + } + /* segment continues */ + } + /* check if we missed last segment */ + if (seg_start_page != -1) { + n_contig_segs++; + n_segs += cur_page - seg_start_page; + } + + /* if no segments were found, do not preallocate */ + if (n_segs == 0) + continue; + + /* we now have total number of pages that we will + * allocate for this segment list. add separator pages + * to the total count, and preallocate VA space. + */ + n_segs += n_contig_segs - 1; + + /* now, preallocate VA space for these segments */ + + /* first, find suitable memseg list for this */ + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; + msl_idx++) { + msl = &mcfg->memsegs[msl_idx]; + + if (msl->base_va != NULL) + continue; + break; + } + if (msl_idx == RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, "Not enough space in memseg lists, please increase %s\n", + RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); + return -1; + } + + /* now, allocate fbarray itself */ + if (alloc_memseg_list(msl, page_sz, n_segs, socket, + msl_idx) < 0) + return -1; + + /* finally, allocate VA space */ + if (alloc_va_space(msl) < 0) + return -1; + } + } + return 0; +} + +/* + * We cannot reallocate memseg lists on the fly because PPC64 stores pages + * backwards, therefore we have to process the entire memseg first before + * remapping it into memseg list VA space. + */ +static int +remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages) +{ + int cur_page, seg_start_page, new_memseg, ret; + + seg_start_page = 0; + for (cur_page = 0; cur_page < n_pages; cur_page++) { + struct hugepage_file *prev, *cur; + + new_memseg = 0; + + cur = &hugepages[cur_page]; + prev = cur_page == 0 ? NULL : &hugepages[cur_page - 1]; + + /* if size is zero, no more pages left */ + if (cur->size == 0) + break; + + if (cur_page == 0) + new_memseg = 1; + else if (cur->socket_id != prev->socket_id) + new_memseg = 1; + else if (cur->size != prev->size) + new_memseg = 1; +#ifdef RTE_ARCH_PPC_64 + /* On PPC64 architecture, the mmap always start from higher + * address to lower address. Here, physical addresses are in + * descending order. + */ + else if ((prev->physaddr - cur->physaddr) != cur->size) + new_memseg = 1; +#else + else if ((cur->physaddr - prev->physaddr) != cur->size) + new_memseg = 1; +#endif + + if (new_memseg) { + /* if this isn't the first time, remap segment */ + if (cur_page != 0) { + ret = remap_segment(hugepages, seg_start_page, + cur_page); + if (ret != 0) + return -1; + } + /* remember where we started */ + seg_start_page = cur_page; + } + /* continuation of previous memseg */ + } + /* we were stopped, but we didn't remap the last segment, do it now */ + if (cur_page != 0) { + ret = remap_segment(hugepages, seg_start_page, + cur_page); + if (ret != 0) + return -1; + } + return 0; +} + static inline uint64_t get_socket_mem_size(int socket) { @@ -788,8 +1088,7 @@ get_socket_mem_size(int socket) for (i = 0; i < internal_config.num_hugepage_sizes; i++){ struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) - size += hpi->hugepage_sz * hpi->num_pages[socket]; + size += hpi->hugepage_sz * hpi->num_pages[socket]; } return size; @@ -818,8 +1117,10 @@ calc_num_pages_per_socket(uint64_t * memory, /* if specific memory amounts per socket weren't requested */ if (internal_config.force_sockets == 0) { + size_t total_size; +#ifdef RTE_ARCH_64 int cpu_per_socket[RTE_MAX_NUMA_NODES]; - size_t default_size, total_size; + size_t default_size; unsigned lcore_id; /* Compute number of cores per socket */ @@ -837,7 +1138,7 @@ calc_num_pages_per_socket(uint64_t * memory, /* Set memory amount per socket */ default_size = (internal_config.memory * cpu_per_socket[socket]) - / rte_lcore_count(); + / rte_lcore_count(); /* Limit to maximum available memory on socket */ default_size = RTE_MIN(default_size, get_socket_mem_size(socket)); @@ -854,18 +1155,40 @@ calc_num_pages_per_socket(uint64_t * memory, for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { /* take whatever is available */ default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket], - total_size); + total_size); /* Update sizes */ memory[socket] += default_size; total_size -= default_size; } +#else + /* in 32-bit mode, allocate all of the memory only on master + * lcore socket + */ + total_size = internal_config.memory; + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; + socket++) { + struct rte_config *cfg = rte_eal_get_configuration(); + unsigned int master_lcore_socket; + + master_lcore_socket = + rte_lcore_to_socket_id(cfg->master_lcore); + + if (master_lcore_socket != socket) + continue; + + /* Update sizes */ + memory[socket] = total_size; + break; + } +#endif } for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) { /* skips if the memory on specific socket wasn't requested */ for (i = 0; i < num_hp_info && memory[socket] != 0; i++){ - hp_used[i].hugedir = hp_info[i].hugedir; + strlcpy(hp_used[i].hugedir, hp_info[i].hugedir, + sizeof(hp_used[i].hugedir)); hp_used[i].num_pages[socket] = RTE_MIN( memory[socket] / hp_info[i].hugepage_sz, hp_info[i].num_pages[socket]); @@ -907,7 +1230,8 @@ calc_num_pages_per_socket(uint64_t * memory, } } /* if we didn't satisfy all memory requirements per socket */ - if (memory[socket] > 0) { + if (memory[socket] > 0 && + internal_config.socket_mem[socket] != 0) { /* to prevent icc errors */ requested = (unsigned) (internal_config.socket_mem[socket] / 0x100000); @@ -939,7 +1263,7 @@ eal_get_hugepage_mem_size(void) for (i = 0; i < internal_config.num_hugepage_sizes; i++) { struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) { + if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) { for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { size += hpi->hugepage_sz * hpi->num_pages[j]; } @@ -987,17 +1311,19 @@ huge_recover_sigbus(void) * 6. unmap the first mapping * 7. fill memsegs in configuration with contiguous zones */ -int -rte_eal_hugepage_init(void) +static int +eal_legacy_hugepage_init(void) { struct rte_mem_config *mcfg; struct hugepage_file *hugepage = NULL, *tmp_hp = NULL; struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; + struct rte_fbarray *arr; + struct rte_memseg *ms; uint64_t memory[RTE_MAX_NUMA_NODES]; unsigned hp_offset; - int i, j, new_memseg; + int i, j; int nr_hugefiles, nr_hugepages = 0; void *addr; @@ -1010,21 +1336,54 @@ rte_eal_hugepage_init(void) /* hugetlbfs can be disabled */ if (internal_config.no_hugetlbfs) { + struct rte_memseg_list *msl; + uint64_t page_sz; + int n_segs, cur_seg; + + /* nohuge mode is legacy mode */ + internal_config.legacy_mem = 1; + + /* create a memseg list */ + msl = &mcfg->memsegs[0]; + + page_sz = RTE_PGSIZE_4K; + n_segs = internal_config.memory / page_sz; + + if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs, + sizeof(struct rte_memseg))) { + RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n"); + return -1; + } + addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (addr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, strerror(errno)); return -1; } - if (rte_eal_iova_mode() == RTE_IOVA_VA) - mcfg->memseg[0].iova = (uintptr_t)addr; - else - mcfg->memseg[0].iova = RTE_BAD_IOVA; - mcfg->memseg[0].addr = addr; - mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; - mcfg->memseg[0].len = internal_config.memory; - mcfg->memseg[0].socket_id = 0; + msl->base_va = addr; + msl->page_sz = page_sz; + msl->socket_id = 0; + + /* populate memsegs. each memseg is one page long */ + for (cur_seg = 0; cur_seg < n_segs; cur_seg++) { + arr = &msl->memseg_arr; + + ms = rte_fbarray_get(arr, cur_seg); + if (rte_eal_iova_mode() == RTE_IOVA_VA) + ms->iova = (uintptr_t)addr; + else + ms->iova = RTE_BAD_IOVA; + ms->addr = addr; + ms->hugepage_sz = page_sz; + ms->socket_id = 0; + ms->len = page_sz; + + rte_fbarray_set_used(arr, cur_seg); + + addr = RTE_PTR_ADD(addr, (size_t)page_sz); + } return 0; } @@ -1057,7 +1416,6 @@ rte_eal_hugepage_init(void) for (i = 0; i < RTE_MAX_NUMA_NODES; i++) memory[i] = internal_config.socket_mem[i]; - /* map all hugepages and sort them */ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ unsigned pages_old, pages_new; @@ -1075,8 +1433,7 @@ rte_eal_hugepage_init(void) /* map all hugepages available */ pages_old = hpi->num_pages[0]; - pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, - memory, 1); + pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, memory); if (pages_new < pages_old) { RTE_LOG(DEBUG, EAL, "%d not %d hugepages of size %u MB allocated\n", @@ -1091,7 +1448,8 @@ rte_eal_hugepage_init(void) continue; } - if (phys_addrs_available) { + if (phys_addrs_available && + rte_eal_iova_mode() != RTE_IOVA_VA) { /* find physical addresses for each hugepage */ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { RTE_LOG(DEBUG, EAL, "Failed to find phys addr " @@ -1118,18 +1476,6 @@ rte_eal_hugepage_init(void) qsort(&tmp_hp[hp_offset], hpi->num_pages[0], sizeof(struct hugepage_file), cmp_physaddr); - /* remap all hugepages */ - if (map_all_hugepages(&tmp_hp[hp_offset], hpi, NULL, 0) != - hpi->num_pages[0]) { - RTE_LOG(ERR, EAL, "Failed to remap %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; - } - - /* unmap original mappings */ - if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0) - goto fail; - /* we have processed a num of hugepages of this size, so inc offset */ hp_offset += hpi->num_pages[0]; } @@ -1191,7 +1537,7 @@ rte_eal_hugepage_init(void) } /* create shared memory */ - hugepage = create_shared_memory(eal_hugepage_info_path(), + hugepage = create_shared_memory(eal_hugepage_data_path(), nr_hugefiles * sizeof(struct hugepage_file)); if (hugepage == NULL) { @@ -1212,7 +1558,7 @@ rte_eal_hugepage_init(void) /* * copy stuff from malloc'd hugepage* to the actual shared memory. - * this procedure only copies those hugepages that have final_va + * this procedure only copies those hugepages that have orig_va * not NULL. has overflow protection. */ if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles, @@ -1221,10 +1567,27 @@ rte_eal_hugepage_init(void) goto fail; } - /* free the hugepage backing files */ - if (internal_config.hugepage_unlink && - unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) { - RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n"); +#ifndef RTE_ARCH_64 + /* for legacy 32-bit mode, we did not preallocate VA space, so do it */ + if (internal_config.legacy_mem && + prealloc_segments(hugepage, nr_hugefiles)) { + RTE_LOG(ERR, EAL, "Could not preallocate VA space for hugepages\n"); + goto fail; + } +#endif + + /* remap all pages we do need into memseg list VA space, so that those + * pages become first-class citizens in DPDK memory subsystem + */ + if (remap_needed_hugepages(hugepage, nr_hugefiles)) { + RTE_LOG(ERR, EAL, "Couldn't remap hugepage files into memseg lists\n"); + goto fail; + } + + /* free the hugepage backing files */ + if (internal_config.hugepage_unlink && + unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) { + RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n"); goto fail; } @@ -1232,75 +1595,30 @@ rte_eal_hugepage_init(void) free(tmp_hp); tmp_hp = NULL; - /* first memseg index shall be 0 after incrementing it below */ - j = -1; - for (i = 0; i < nr_hugefiles; i++) { - new_memseg = 0; - - /* if this is a new section, create a new memseg */ - if (i == 0) - new_memseg = 1; - else if (hugepage[i].socket_id != hugepage[i-1].socket_id) - new_memseg = 1; - else if (hugepage[i].size != hugepage[i-1].size) - new_memseg = 1; - -#ifdef RTE_ARCH_PPC_64 - /* On PPC64 architecture, the mmap always start from higher - * virtual address to lower address. Here, both the physical - * address and virtual address are in descending order */ - else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) != - hugepage[i].size) - new_memseg = 1; - else if (((unsigned long)hugepage[i-1].final_va - - (unsigned long)hugepage[i].final_va) != hugepage[i].size) - new_memseg = 1; -#else - else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) != - hugepage[i].size) - new_memseg = 1; - else if (((unsigned long)hugepage[i].final_va - - (unsigned long)hugepage[i-1].final_va) != hugepage[i].size) - new_memseg = 1; -#endif + munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file)); - if (new_memseg) { - j += 1; - if (j == RTE_MAX_MEMSEG) - break; + /* we're not going to allocate more pages, so release VA space for + * unused memseg lists + */ + for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { + struct rte_memseg_list *msl = &mcfg->memsegs[i]; + size_t mem_sz; - mcfg->memseg[j].iova = hugepage[i].physaddr; - mcfg->memseg[j].addr = hugepage[i].final_va; - mcfg->memseg[j].len = hugepage[i].size; - mcfg->memseg[j].socket_id = hugepage[i].socket_id; - mcfg->memseg[j].hugepage_sz = hugepage[i].size; - } - /* continuation of previous memseg */ - else { -#ifdef RTE_ARCH_PPC_64 - /* Use the phy and virt address of the last page as segment - * address for IBM Power architecture */ - mcfg->memseg[j].iova = hugepage[i].physaddr; - mcfg->memseg[j].addr = hugepage[i].final_va; -#endif - mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; - } - hugepage[i].memseg_id = j; - } + /* skip inactive lists */ + if (msl->base_va == NULL) + continue; + /* skip lists where there is at least one page allocated */ + if (msl->memseg_arr.count > 0) + continue; + /* this is an unused list, deallocate it */ + mem_sz = (size_t)msl->page_sz * msl->memseg_arr.len; + munmap(msl->base_va, mem_sz); + msl->base_va = NULL; - if (i < nr_hugefiles) { - RTE_LOG(ERR, EAL, "Can only reserve %d pages " - "from %d requested\n" - "Current %s=%d is not enough\n" - "Please either increase it or request less amount " - "of memory.\n", - i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG), - RTE_MAX_MEMSEG); - goto fail; + /* destroy backing fbarray */ + rte_fbarray_destroy(&msl->memseg_arr); } - munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file)); - return 0; fail: @@ -1312,6 +1630,125 @@ fail: return -1; } +static int __rte_unused +hugepage_count_walk(const struct rte_memseg_list *msl, void *arg) +{ + struct hugepage_info *hpi = arg; + + if (msl->page_sz != hpi->hugepage_sz) + return 0; + + hpi->num_pages[msl->socket_id] += msl->memseg_arr.len; + return 0; +} + +static int +limits_callback(int socket_id, size_t cur_limit, size_t new_len) +{ + RTE_SET_USED(socket_id); + RTE_SET_USED(cur_limit); + RTE_SET_USED(new_len); + return -1; +} + +static int +eal_hugepage_init(void) +{ + struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; + uint64_t memory[RTE_MAX_NUMA_NODES]; + int hp_sz_idx, socket_id; + + test_phys_addrs_available(); + + memset(used_hp, 0, sizeof(used_hp)); + + for (hp_sz_idx = 0; + hp_sz_idx < (int) internal_config.num_hugepage_sizes; + hp_sz_idx++) { +#ifndef RTE_ARCH_64 + struct hugepage_info dummy; + unsigned int i; +#endif + /* also initialize used_hp hugepage sizes in used_hp */ + struct hugepage_info *hpi; + hpi = &internal_config.hugepage_info[hp_sz_idx]; + used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz; + +#ifndef RTE_ARCH_64 + /* for 32-bit, limit number of pages on socket to whatever we've + * preallocated, as we cannot allocate more. + */ + memset(&dummy, 0, sizeof(dummy)); + dummy.hugepage_sz = hpi->hugepage_sz; + if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0) + return -1; + + for (i = 0; i < RTE_DIM(dummy.num_pages); i++) { + hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i], + dummy.num_pages[i]); + } +#endif + } + + /* make a copy of socket_mem, needed for balanced allocation. */ + for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++) + memory[hp_sz_idx] = internal_config.socket_mem[hp_sz_idx]; + + /* calculate final number of pages */ + if (calc_num_pages_per_socket(memory, + internal_config.hugepage_info, used_hp, + internal_config.num_hugepage_sizes) < 0) + return -1; + + for (hp_sz_idx = 0; + hp_sz_idx < (int)internal_config.num_hugepage_sizes; + hp_sz_idx++) { + for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES; + socket_id++) { + struct rte_memseg **pages; + struct hugepage_info *hpi = &used_hp[hp_sz_idx]; + unsigned int num_pages = hpi->num_pages[socket_id]; + int num_pages_alloc, i; + + if (num_pages == 0) + continue; + + pages = malloc(sizeof(*pages) * num_pages); + + RTE_LOG(DEBUG, EAL, "Allocating %u pages of size %" PRIu64 "M on socket %i\n", + num_pages, hpi->hugepage_sz >> 20, socket_id); + + num_pages_alloc = eal_memalloc_alloc_seg_bulk(pages, + num_pages, hpi->hugepage_sz, + socket_id, true); + if (num_pages_alloc < 0) { + free(pages); + return -1; + } + + /* mark preallocated pages as unfreeable */ + for (i = 0; i < num_pages_alloc; i++) { + struct rte_memseg *ms = pages[i]; + ms->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE; + } + free(pages); + } + } + /* if socket limits were specified, set them */ + if (internal_config.force_socket_limits) { + unsigned int i; + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + uint64_t limit = internal_config.socket_limit[i]; + if (limit == 0) + continue; + if (rte_mem_alloc_validator_register("socket-limit", + limits_callback, i, limit)) + RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n"); + } + } + return 0; +} + /* * uses fstat to report the size of a file on disk */ @@ -1330,16 +1767,15 @@ getFileSize(int fd) * configuration and finds the hugepages which form that segment, mapping them * in order to form a contiguous block in the virtual memory space */ -int -rte_eal_hugepage_attach(void) +static int +eal_legacy_hugepage_attach(void) { - const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; struct hugepage_file *hp = NULL; - unsigned num_hp = 0; - unsigned i, s = 0; /* s used to track the segment number */ - unsigned max_seg = RTE_MAX_MEMSEG; + unsigned int num_hp = 0; + unsigned int i = 0; + unsigned int cur_seg; off_t size = 0; - int fd, fd_zero = -1, fd_hugepage = -1; + int fd, fd_hugepage = -1; if (aslr_enabled() > 0) { RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization " @@ -1350,139 +1786,429 @@ rte_eal_hugepage_attach(void) test_phys_addrs_available(); - fd_zero = open("/dev/zero", O_RDONLY); - if (fd_zero < 0) { - RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); - goto error; - } - fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY); + fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY); if (fd_hugepage < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); + RTE_LOG(ERR, EAL, "Could not open %s\n", + eal_hugepage_data_path()); goto error; } - /* map all segments into memory to make sure we get the addrs */ - for (s = 0; s < RTE_MAX_MEMSEG; ++s) { - void *base_addr; - - /* - * the first memory segment with len==0 is the one that - * follows the last valid segment. - */ - if (mcfg->memseg[s].len == 0) - break; - - /* - * fdzero is mmapped to get a contiguous block of virtual - * addresses of the appropriate memseg size. - * use mmap to get identical addresses as the primary process. - */ - base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, - PROT_READ, -#ifdef RTE_ARCH_PPC_64 - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -#else - MAP_PRIVATE, -#endif - fd_zero, 0); - if (base_addr == MAP_FAILED || - base_addr != mcfg->memseg[s].addr) { - max_seg = s; - if (base_addr != MAP_FAILED) { - /* errno is stale, don't use */ - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in /dev/zero at [%p], got [%p] - " - "please use '--base-virtaddr' option\n", - (unsigned long long)mcfg->memseg[s].len, - mcfg->memseg[s].addr, base_addr); - munmap(base_addr, mcfg->memseg[s].len); - } else { - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in /dev/zero at [%p]: '%s'\n", - (unsigned long long)mcfg->memseg[s].len, - mcfg->memseg[s].addr, strerror(errno)); - } - if (aslr_enabled() > 0) { - RTE_LOG(ERR, EAL, "It is recommended to " - "disable ASLR in the kernel " - "and retry running both primary " - "and secondary processes\n"); - } - goto error; - } - } - size = getFileSize(fd_hugepage); hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); if (hp == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); + RTE_LOG(ERR, EAL, "Could not mmap %s\n", + eal_hugepage_data_path()); goto error; } num_hp = size / sizeof(struct hugepage_file); RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp); - s = 0; - while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){ - void *addr, *base_addr; - uintptr_t offset = 0; - size_t mapping_size; - /* - * free previously mapped memory so we can map the - * hugepages into the space - */ - base_addr = mcfg->memseg[s].addr; - munmap(base_addr, mcfg->memseg[s].len); - - /* find the hugepages for this segment and map them - * we don't need to worry about order, as the server sorted the - * entries before it did the second mmap of them */ - for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){ - if (hp[i].memseg_id == (int)s){ - fd = open(hp[i].filepath, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", - hp[i].filepath); - goto error; - } - mapping_size = hp[i].size; - addr = mmap(RTE_PTR_ADD(base_addr, offset), - mapping_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - close(fd); /* close file both on success and on failure */ - if (addr == MAP_FAILED || - addr != RTE_PTR_ADD(base_addr, offset)) { - RTE_LOG(ERR, EAL, "Could not mmap %s\n", - hp[i].filepath); - goto error; - } - offset+=mapping_size; - } + /* map all segments into memory to make sure we get the addrs. the + * segments themselves are already in memseg list (which is shared and + * has its VA space already preallocated), so we just need to map + * everything into correct addresses. + */ + for (i = 0; i < num_hp; i++) { + struct hugepage_file *hf = &hp[i]; + size_t map_sz = hf->size; + void *map_addr = hf->final_va; + + /* if size is zero, no more pages left */ + if (map_sz == 0) + break; + + fd = open(hf->filepath, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s: %s\n", + hf->filepath, strerror(errno)); + goto error; + } + + map_addr = mmap(map_addr, map_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, 0); + if (map_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Could not map %s: %s\n", + hf->filepath, strerror(errno)); + close(fd); + goto error; } - RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s, - (unsigned long long)mcfg->memseg[s].len); - s++; + + /* set shared lock on the file. */ + if (flock(fd, LOCK_SH) < 0) { + RTE_LOG(DEBUG, EAL, "%s(): Locking file failed: %s\n", + __func__, strerror(errno)); + close(fd); + goto error; + } + + close(fd); } /* unmap the hugepage config file, since we are done using it */ munmap(hp, size); - close(fd_zero); close(fd_hugepage); return 0; error: - for (i = 0; i < max_seg && mcfg->memseg[i].len > 0; i++) - munmap(mcfg->memseg[i].addr, mcfg->memseg[i].len); + /* map all segments into memory to make sure we get the addrs */ + cur_seg = 0; + for (cur_seg = 0; cur_seg < i; cur_seg++) { + struct hugepage_file *hf = &hp[i]; + size_t map_sz = hf->size; + void *map_addr = hf->final_va; + + munmap(map_addr, map_sz); + } if (hp != NULL && hp != MAP_FAILED) munmap(hp, size); - if (fd_zero >= 0) - close(fd_zero); if (fd_hugepage >= 0) close(fd_hugepage); return -1; } +static int +eal_hugepage_attach(void) +{ + if (eal_memalloc_sync_with_primary()) { + RTE_LOG(ERR, EAL, "Could not map memory from primary process\n"); + if (aslr_enabled() > 0) + RTE_LOG(ERR, EAL, "It is recommended to disable ASLR in the kernel and retry running both primary and secondary processes\n"); + return -1; + } + return 0; +} + +int +rte_eal_hugepage_init(void) +{ + return internal_config.legacy_mem ? + eal_legacy_hugepage_init() : + eal_hugepage_init(); +} + +int +rte_eal_hugepage_attach(void) +{ + return internal_config.legacy_mem ? + eal_legacy_hugepage_attach() : + eal_hugepage_attach(); +} + int rte_eal_using_phys_addrs(void) { return phys_addrs_available; } + +static int __rte_unused +memseg_primary_init_32(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int active_sockets, hpi_idx, msl_idx = 0; + unsigned int socket_id, i; + struct rte_memseg_list *msl; + uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem; + uint64_t max_mem; + + /* no-huge does not need this at all */ + if (internal_config.no_hugetlbfs) + return 0; + + /* this is a giant hack, but desperate times call for desperate + * measures. in legacy 32-bit mode, we cannot preallocate VA space, + * because having upwards of 2 gigabytes of VA space already mapped will + * interfere with our ability to map and sort hugepages. + * + * therefore, in legacy 32-bit mode, we will be initializing memseg + * lists much later - in eal_memory.c, right after we unmap all the + * unneeded pages. this will not affect secondary processes, as those + * should be able to mmap the space without (too many) problems. + */ + if (internal_config.legacy_mem) + return 0; + + /* 32-bit mode is a very special case. we cannot know in advance where + * the user will want to allocate their memory, so we have to do some + * heuristics. + */ + active_sockets = 0; + total_requested_mem = 0; + if (internal_config.force_sockets) + for (i = 0; i < rte_socket_count(); i++) { + uint64_t mem; + + socket_id = rte_socket_id_by_idx(i); + mem = internal_config.socket_mem[socket_id]; + + if (mem == 0) + continue; + + active_sockets++; + total_requested_mem += mem; + } + else + total_requested_mem = internal_config.memory; + + max_mem = (uint64_t)RTE_MAX_MEM_MB << 20; + if (total_requested_mem > max_mem) { + RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n", + (unsigned int)(max_mem >> 20)); + return -1; + } + total_extra_mem = max_mem - total_requested_mem; + extra_mem_per_socket = active_sockets == 0 ? total_extra_mem : + total_extra_mem / active_sockets; + + /* the allocation logic is a little bit convoluted, but here's how it + * works, in a nutshell: + * - if user hasn't specified on which sockets to allocate memory via + * --socket-mem, we allocate all of our memory on master core socket. + * - if user has specified sockets to allocate memory on, there may be + * some "unused" memory left (e.g. if user has specified --socket-mem + * such that not all memory adds up to 2 gigabytes), so add it to all + * sockets that are in use equally. + * + * page sizes are sorted by size in descending order, so we can safely + * assume that we dispense with bigger page sizes first. + */ + + /* create memseg lists */ + for (i = 0; i < rte_socket_count(); i++) { + int hp_sizes = (int) internal_config.num_hugepage_sizes; + uint64_t max_socket_mem, cur_socket_mem; + unsigned int master_lcore_socket; + struct rte_config *cfg = rte_eal_get_configuration(); + bool skip; + + socket_id = rte_socket_id_by_idx(i); + +#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (socket_id > 0) + break; +#endif + + /* if we didn't specifically request memory on this socket */ + skip = active_sockets != 0 && + internal_config.socket_mem[socket_id] == 0; + /* ...or if we didn't specifically request memory on *any* + * socket, and this is not master lcore + */ + master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore); + skip |= active_sockets == 0 && socket_id != master_lcore_socket; + + if (skip) { + RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n", + socket_id); + continue; + } + + /* max amount of memory on this socket */ + max_socket_mem = (active_sockets != 0 ? + internal_config.socket_mem[socket_id] : + internal_config.memory) + + extra_mem_per_socket; + cur_socket_mem = 0; + + for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) { + uint64_t max_pagesz_mem, cur_pagesz_mem = 0; + uint64_t hugepage_sz; + struct hugepage_info *hpi; + int type_msl_idx, max_segs, total_segs = 0; + + hpi = &internal_config.hugepage_info[hpi_idx]; + hugepage_sz = hpi->hugepage_sz; + + /* check if pages are actually available */ + if (hpi->num_pages[socket_id] == 0) + continue; + + max_segs = RTE_MAX_MEMSEG_PER_TYPE; + max_pagesz_mem = max_socket_mem - cur_socket_mem; + + /* make it multiple of page size */ + max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem, + hugepage_sz); + + RTE_LOG(DEBUG, EAL, "Attempting to preallocate " + "%" PRIu64 "M on socket %i\n", + max_pagesz_mem >> 20, socket_id); + + type_msl_idx = 0; + while (cur_pagesz_mem < max_pagesz_mem && + total_segs < max_segs) { + uint64_t cur_mem; + unsigned int n_segs; + + if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, + "No more space in memseg lists, please increase %s\n", + RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); + return -1; + } + + msl = &mcfg->memsegs[msl_idx]; + + cur_mem = get_mem_amount(hugepage_sz, + max_pagesz_mem); + n_segs = cur_mem / hugepage_sz; + + if (alloc_memseg_list(msl, hugepage_sz, n_segs, + socket_id, type_msl_idx)) { + /* failing to allocate a memseg list is + * a serious error. + */ + RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n"); + return -1; + } + + if (alloc_va_space(msl)) { + /* if we couldn't allocate VA space, we + * can try with smaller page sizes. + */ + RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n"); + /* deallocate memseg list */ + if (free_memseg_list(msl)) + return -1; + break; + } + + total_segs += msl->memseg_arr.len; + cur_pagesz_mem = total_segs * hugepage_sz; + type_msl_idx++; + msl_idx++; + } + cur_socket_mem += cur_pagesz_mem; + } + if (cur_socket_mem == 0) { + RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n", + socket_id); + return -1; + } + } + + return 0; +} + +static int __rte_unused +memseg_primary_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i, socket_id, hpi_idx, msl_idx = 0; + struct rte_memseg_list *msl; + uint64_t max_mem, total_mem; + + /* no-huge does not need this at all */ + if (internal_config.no_hugetlbfs) + return 0; + + max_mem = (uint64_t)RTE_MAX_MEM_MB << 20; + total_mem = 0; + + /* create memseg lists */ + for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes; + hpi_idx++) { + struct hugepage_info *hpi; + uint64_t hugepage_sz; + + hpi = &internal_config.hugepage_info[hpi_idx]; + hugepage_sz = hpi->hugepage_sz; + + for (i = 0; i < (int) rte_socket_count(); i++) { + uint64_t max_type_mem, total_type_mem = 0; + int type_msl_idx, max_segs, total_segs = 0; + + socket_id = rte_socket_id_by_idx(i); + +#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES + if (socket_id > 0) + break; +#endif + + if (total_mem >= max_mem) + break; + + max_type_mem = RTE_MIN(max_mem - total_mem, + (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20); + max_segs = RTE_MAX_MEMSEG_PER_TYPE; + + type_msl_idx = 0; + while (total_type_mem < max_type_mem && + total_segs < max_segs) { + uint64_t cur_max_mem, cur_mem; + unsigned int n_segs; + + if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, + "No more space in memseg lists, please increase %s\n", + RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); + return -1; + } + + msl = &mcfg->memsegs[msl_idx++]; + + cur_max_mem = max_type_mem - total_type_mem; + + cur_mem = get_mem_amount(hugepage_sz, + cur_max_mem); + n_segs = cur_mem / hugepage_sz; + + if (alloc_memseg_list(msl, hugepage_sz, n_segs, + socket_id, type_msl_idx)) + return -1; + + total_segs += msl->memseg_arr.len; + total_type_mem = total_segs * hugepage_sz; + type_msl_idx++; + + if (alloc_va_space(msl)) { + RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n"); + return -1; + } + } + total_mem += total_type_mem; + } + } + return 0; +} + +static int +memseg_secondary_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int msl_idx = 0; + struct rte_memseg_list *msl; + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + + msl = &mcfg->memsegs[msl_idx]; + + /* skip empty memseg lists */ + if (msl->memseg_arr.len == 0) + continue; + + if (rte_fbarray_attach(&msl->memseg_arr)) { + RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n"); + return -1; + } + + /* preallocate VA space */ + if (alloc_va_space(msl)) { + RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n"); + return -1; + } + } + + return 0; +} + +int +rte_eal_memseg_init(void) +{ + return rte_eal_process_type() == RTE_PROC_PRIMARY ? +#ifndef RTE_ARCH_64 + memseg_primary_init_32() : +#else + memseg_primary_init() : +#endif + memseg_secondary_init(); +} diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c index 08e150b7..b496fc71 100644 --- a/lib/librte_eal/linuxapp/eal/eal_thread.c +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -119,7 +119,7 @@ eal_thread_loop(__attribute__((unused)) void *arg) if (eal_thread_set_affinity() < 0) rte_panic("cannot set affinity\n"); - ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset)); RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n", lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "..."); @@ -176,7 +176,7 @@ int rte_sys_gettid(void) int rte_thread_setname(pthread_t id, const char *name) { - int ret = -1; + int ret = ENOSYS; #if defined(__GLIBC__) && defined(__GLIBC_PREREQ) #if __GLIBC_PREREQ(2, 12) ret = pthread_setname_np(id, name); @@ -184,5 +184,5 @@ int rte_thread_setname(pthread_t id, const char *name) #endif RTE_SET_USED(id); RTE_SET_USED(name); - return ret; + return -ret; } diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c index 161322f2..2766bd78 100644 --- a/lib/librte_eal/linuxapp/eal/eal_timer.c +++ b/lib/librte_eal/linuxapp/eal/eal_timer.c @@ -137,7 +137,6 @@ int rte_eal_hpet_init(int make_default) { int fd, ret; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; if (internal_config.no_hpet) { RTE_LOG(NOTICE, EAL, "HPET is disabled\n"); @@ -178,7 +177,7 @@ rte_eal_hpet_init(int make_default) /* create a thread that will increment a global variable for * msb (hpet is 32 bits by default under linux) */ - ret = pthread_create(&msb_inc_thread_id, NULL, + ret = rte_ctrl_thread_create(&msb_inc_thread_id, "hpet-msb-inc", NULL, (void *(*)(void *))hpet_msb_inc, NULL); if (ret != 0) { RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n"); @@ -186,15 +185,6 @@ rte_eal_hpet_init(int make_default) return -1; } - /* - * Set thread_name for aid in debugging. - */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc"); - ret = rte_thread_setname(msb_inc_thread_id, thread_name); - if (ret != 0) - RTE_LOG(DEBUG, EAL, - "Cannot set HPET timer thread name!\n"); - if (make_default) eal_timer_source = EAL_TIMER_HPET; return 0; diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index e44ae4d0..c68dc38e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -1,12 +1,14 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ +#include #include #include #include #include +#include #include #include #include @@ -18,59 +20,258 @@ #ifdef VFIO_PRESENT +#define VFIO_MEM_EVENT_CLB_NAME "vfio_mem_event_clb" + +/* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can + * recreate the mappings for DPDK segments, but we cannot do so for memory that + * was registered by the user themselves, so we need to store the user mappings + * somewhere, to recreate them later. + */ +#define VFIO_MAX_USER_MEM_MAPS 256 +struct user_mem_map { + uint64_t addr; + uint64_t iova; + uint64_t len; +}; + +struct user_mem_maps { + rte_spinlock_recursive_t lock; + int n_maps; + struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS]; +}; + +struct vfio_config { + int vfio_enabled; + int vfio_container_fd; + int vfio_active_groups; + const struct vfio_iommu_type *vfio_iommu_type; + struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; + struct user_mem_maps mem_maps; +}; + /* per-process VFIO config */ -static struct vfio_config vfio_cfg; +static struct vfio_config vfio_cfgs[VFIO_MAX_CONTAINERS]; +static struct vfio_config *default_vfio_cfg = &vfio_cfgs[0]; static int vfio_type1_dma_map(int); +static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); static int vfio_spapr_dma_map(int); +static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); static int vfio_noiommu_dma_map(int); +static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); +static int vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, + uint64_t iova, uint64_t len, int do_map); /* IOMMU types we support */ static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ - { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + { + .type_id = RTE_VFIO_TYPE1, + .name = "Type 1", + .dma_map_func = &vfio_type1_dma_map, + .dma_user_map_func = &vfio_type1_dma_mem_map + }, /* ppc64 IOMMU, otherwise known as spapr */ - { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map}, + { + .type_id = RTE_VFIO_SPAPR, + .name = "sPAPR", + .dma_map_func = &vfio_spapr_dma_map, + .dma_user_map_func = &vfio_spapr_dma_mem_map + }, /* IOMMU-less mode */ - { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, + { + .type_id = RTE_VFIO_NOIOMMU, + .name = "No-IOMMU", + .dma_map_func = &vfio_noiommu_dma_map, + .dma_user_map_func = &vfio_noiommu_dma_mem_map + }, }; -int -vfio_get_group_fd(int iommu_group_no) +static int +is_null_map(const struct user_mem_map *map) { - int i; - int vfio_group_fd; - char filename[PATH_MAX]; - struct vfio_group *cur_grp; + return map->addr == 0 && map->iova == 0 && map->len == 0; +} - /* check if we already have the group descriptor open */ - for (i = 0; i < VFIO_MAX_GROUPS; i++) - if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no) - return vfio_cfg.vfio_groups[i].fd; +/* we may need to merge user mem maps together in case of user mapping/unmapping + * chunks of memory, so we'll need a comparator function to sort segments. + */ +static int +user_mem_map_cmp(const void *a, const void *b) +{ + const struct user_mem_map *umm_a = a; + const struct user_mem_map *umm_b = b; - /* Lets see first if there is room for a new group */ - if (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) { - RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + /* move null entries to end */ + if (is_null_map(umm_a)) + return 1; + if (is_null_map(umm_b)) return -1; - } - /* Now lets get an index for the new group */ - for (i = 0; i < VFIO_MAX_GROUPS; i++) - if (vfio_cfg.vfio_groups[i].group_no == -1) { - cur_grp = &vfio_cfg.vfio_groups[i]; - break; - } + /* sort by iova first */ + if (umm_a->iova < umm_b->iova) + return -1; + if (umm_a->iova > umm_b->iova) + return 1; - /* This should not happen */ - if (i == VFIO_MAX_GROUPS) { - RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); + if (umm_a->addr < umm_b->addr) + return -1; + if (umm_a->addr > umm_b->addr) + return 1; + + if (umm_a->len < umm_b->len) return -1; + if (umm_a->len > umm_b->len) + return 1; + + return 0; +} + +/* adjust user map entry. this may result in shortening of existing map, or in + * splitting existing map in two pieces. + */ +static void +adjust_map(struct user_mem_map *src, struct user_mem_map *end, + uint64_t remove_va_start, uint64_t remove_len) +{ + /* if va start is same as start address, we're simply moving start */ + if (remove_va_start == src->addr) { + src->addr += remove_len; + src->iova += remove_len; + src->len -= remove_len; + } else if (remove_va_start + remove_len == src->addr + src->len) { + /* we're shrinking mapping from the end */ + src->len -= remove_len; + } else { + /* we're blowing a hole in the middle */ + struct user_mem_map tmp; + uint64_t total_len = src->len; + + /* adjust source segment length */ + src->len = remove_va_start - src->addr; + + /* create temporary segment in the middle */ + tmp.addr = src->addr + src->len; + tmp.iova = src->iova + src->len; + tmp.len = remove_len; + + /* populate end segment - this one we will be keeping */ + end->addr = tmp.addr + tmp.len; + end->iova = tmp.iova + tmp.len; + end->len = total_len - src->len - tmp.len; + } +} + +/* try merging two maps into one, return 1 if succeeded */ +static int +merge_map(struct user_mem_map *left, struct user_mem_map *right) +{ + if (left->addr + left->len != right->addr) + return 0; + if (left->iova + left->len != right->iova) + return 0; + + left->len += right->len; + + memset(right, 0, sizeof(*right)); + + return 1; +} + +static struct user_mem_map * +find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr, + uint64_t iova, uint64_t len) +{ + uint64_t va_end = addr + len; + uint64_t iova_end = iova + len; + int i; + + for (i = 0; i < user_mem_maps->n_maps; i++) { + struct user_mem_map *map = &user_mem_maps->maps[i]; + uint64_t map_va_end = map->addr + map->len; + uint64_t map_iova_end = map->iova + map->len; + + /* check start VA */ + if (addr < map->addr || addr >= map_va_end) + continue; + /* check if VA end is within boundaries */ + if (va_end <= map->addr || va_end > map_va_end) + continue; + + /* check start IOVA */ + if (iova < map->iova || iova >= map_iova_end) + continue; + /* check if IOVA end is within boundaries */ + if (iova_end <= map->iova || iova_end > map_iova_end) + continue; + + /* we've found our map */ + return map; + } + return NULL; +} + +/* this will sort all user maps, and merge/compact any adjacent maps */ +static void +compact_user_maps(struct user_mem_maps *user_mem_maps) +{ + int i, n_merged, cur_idx; + + qsort(user_mem_maps->maps, user_mem_maps->n_maps, + sizeof(user_mem_maps->maps[0]), user_mem_map_cmp); + + /* we'll go over the list backwards when merging */ + n_merged = 0; + for (i = user_mem_maps->n_maps - 2; i >= 0; i--) { + struct user_mem_map *l, *r; + + l = &user_mem_maps->maps[i]; + r = &user_mem_maps->maps[i + 1]; + + if (is_null_map(l) || is_null_map(r)) + continue; + + if (merge_map(l, r)) + n_merged++; } + + /* the entries are still sorted, but now they have holes in them, so + * walk through the list and remove the holes + */ + if (n_merged > 0) { + cur_idx = 0; + for (i = 0; i < user_mem_maps->n_maps; i++) { + if (!is_null_map(&user_mem_maps->maps[i])) { + struct user_mem_map *src, *dst; + + src = &user_mem_maps->maps[i]; + dst = &user_mem_maps->maps[cur_idx++]; + + if (src != dst) { + memcpy(dst, src, sizeof(*src)); + memset(src, 0, sizeof(*src)); + } + } + } + user_mem_maps->n_maps = cur_idx; + } +} + +static int +vfio_open_group_fd(int iommu_group_num) +{ + int vfio_group_fd; + char filename[PATH_MAX]; + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; + /* if primary, try to open the group */ if (internal_config.process_type == RTE_PROC_PRIMARY) { /* try regular group format */ snprintf(filename, sizeof(filename), - VFIO_GROUP_FMT, iommu_group_no); + VFIO_GROUP_FMT, iommu_group_num); vfio_group_fd = open(filename, O_RDWR); if (vfio_group_fd < 0) { /* if file not found, it's not an error */ @@ -82,7 +283,8 @@ vfio_get_group_fd(int iommu_group_no) /* special case: try no-IOMMU path as well */ snprintf(filename, sizeof(filename), - VFIO_NOIOMMU_GROUP_FMT, iommu_group_no); + VFIO_NOIOMMU_GROUP_FMT, + iommu_group_num); vfio_group_fd = open(filename, O_RDWR); if (vfio_group_fd < 0) { if (errno != ENOENT) { @@ -95,178 +297,285 @@ vfio_get_group_fd(int iommu_group_no) /* noiommu group found */ } - cur_grp->group_no = iommu_group_no; - cur_grp->fd = vfio_group_fd; - vfio_cfg.vfio_active_groups++; return vfio_group_fd; } /* if we're in a secondary process, request group fd from the primary - * process via our socket + * process via mp channel. */ - else { - int socket_fd, ret; + p->req = SOCKET_REQ_GROUP; + p->group_num = iommu_group_num; + strcpy(mp_req.name, EAL_VFIO_MP); + mp_req.len_param = sizeof(*p); + mp_req.num_fds = 0; + + vfio_group_fd = -1; + if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && + mp_reply.nb_received == 1) { + mp_rep = &mp_reply.msgs[0]; + p = (struct vfio_mp_param *)mp_rep->param; + if (p->result == SOCKET_OK && mp_rep->num_fds == 1) { + vfio_group_fd = mp_rep->fds[0]; + } else if (p->result == SOCKET_NO_FD) { + RTE_LOG(ERR, EAL, " bad VFIO group fd\n"); + vfio_group_fd = 0; + } + free(mp_reply.msgs); + } - socket_fd = vfio_mp_sync_connect_to_primary(); + if (vfio_group_fd < 0) + RTE_LOG(ERR, EAL, " cannot request group fd\n"); + return vfio_group_fd; +} - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) { - RTE_LOG(ERR, EAL, " cannot send group number!\n"); - close(socket_fd); - return -1; - } - ret = vfio_mp_sync_receive_request(socket_fd); - switch (ret) { - case SOCKET_NO_FD: - close(socket_fd); - return 0; - case SOCKET_OK: - vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); - /* if we got the fd, store it and return it */ - if (vfio_group_fd > 0) { - close(socket_fd); - cur_grp->group_no = iommu_group_no; - cur_grp->fd = vfio_group_fd; - vfio_cfg.vfio_active_groups++; - return vfio_group_fd; - } - /* fall-through on error */ - default: - RTE_LOG(ERR, EAL, " cannot get container fd!\n"); - close(socket_fd); - return -1; +static struct vfio_config * +get_vfio_cfg_by_group_num(int iommu_group_num) +{ + struct vfio_config *vfio_cfg; + int i, j; + + for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + vfio_cfg = &vfio_cfgs[i]; + for (j = 0; j < VFIO_MAX_GROUPS; j++) { + if (vfio_cfg->vfio_groups[j].group_num == + iommu_group_num) + return vfio_cfg; } } - return -1; + + return NULL; } +static struct vfio_config * +get_vfio_cfg_by_group_fd(int vfio_group_fd) +{ + struct vfio_config *vfio_cfg; + int i, j; + + for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + vfio_cfg = &vfio_cfgs[i]; + for (j = 0; j < VFIO_MAX_GROUPS; j++) + if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) + return vfio_cfg; + } -static int -get_vfio_group_idx(int vfio_group_fd) + return NULL; +} + +static struct vfio_config * +get_vfio_cfg_by_container_fd(int container_fd) +{ + int i; + + for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + if (vfio_cfgs[i].vfio_container_fd == container_fd) + return &vfio_cfgs[i]; + } + + return NULL; +} + +int +rte_vfio_get_group_fd(int iommu_group_num) { int i; + int vfio_group_fd; + struct vfio_group *cur_grp; + struct vfio_config *vfio_cfg; + + /* get the vfio_config it belongs to */ + vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); + vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; + + /* check if we already have the group descriptor open */ + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) + return vfio_cfg->vfio_groups[i].fd; + + /* Lets see first if there is room for a new group */ + if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + return -1; + } + + /* Now lets get an index for the new group */ for (i = 0; i < VFIO_MAX_GROUPS; i++) - if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd) - return i; + if (vfio_cfg->vfio_groups[i].group_num == -1) { + cur_grp = &vfio_cfg->vfio_groups[i]; + break; + } + + /* This should not happen */ + if (i == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); + return -1; + } + + vfio_group_fd = vfio_open_group_fd(iommu_group_num); + if (vfio_group_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num); + return -1; + } + + cur_grp->group_num = iommu_group_num; + cur_grp->fd = vfio_group_fd; + vfio_cfg->vfio_active_groups++; + + return vfio_group_fd; +} + +static int +get_vfio_group_idx(int vfio_group_fd) +{ + struct vfio_config *vfio_cfg; + int i, j; + + for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + vfio_cfg = &vfio_cfgs[i]; + for (j = 0; j < VFIO_MAX_GROUPS; j++) + if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) + return j; + } + return -1; } static void vfio_group_device_get(int vfio_group_fd) { + struct vfio_config *vfio_cfg; int i; + vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, " invalid group fd!\n"); + return; + } + i = get_vfio_group_idx(vfio_group_fd); if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); else - vfio_cfg.vfio_groups[i].devices++; + vfio_cfg->vfio_groups[i].devices++; } static void vfio_group_device_put(int vfio_group_fd) { + struct vfio_config *vfio_cfg; int i; + vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, " invalid group fd!\n"); + return; + } + i = get_vfio_group_idx(vfio_group_fd); if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); else - vfio_cfg.vfio_groups[i].devices--; + vfio_cfg->vfio_groups[i].devices--; } static int vfio_group_device_count(int vfio_group_fd) { + struct vfio_config *vfio_cfg; int i; + vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, " invalid group fd!\n"); + return -1; + } + i = get_vfio_group_idx(vfio_group_fd); if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) { RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); return -1; } - return vfio_cfg.vfio_groups[i].devices; + return vfio_cfg->vfio_groups[i].devices; } -int -rte_vfio_clear_group(int vfio_group_fd) +static void +vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len, + void *arg __rte_unused) { - int i; - int socket_fd, ret; - - if (internal_config.process_type == RTE_PROC_PRIMARY) { - - i = get_vfio_group_idx(vfio_group_fd); - if (i < 0) - return -1; - vfio_cfg.vfio_groups[i].group_no = -1; - vfio_cfg.vfio_groups[i].fd = -1; - vfio_cfg.vfio_groups[i].devices = 0; - vfio_cfg.vfio_active_groups--; - return 0; + struct rte_memseg_list *msl; + struct rte_memseg *ms; + size_t cur_len = 0; + + msl = rte_mem_virt2memseg_list(addr); + + /* for IOVA as VA mode, no need to care for IOVA addresses */ + if (rte_eal_iova_mode() == RTE_IOVA_VA) { + uint64_t vfio_va = (uint64_t)(uintptr_t)addr; + if (type == RTE_MEM_EVENT_ALLOC) + vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, + len, 1); + else + vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, + len, 0); + return; } - /* This is just for SECONDARY processes */ - socket_fd = vfio_mp_sync_connect_to_primary(); + /* memsegs are contiguous in memory */ + ms = rte_mem_virt2memseg(addr, msl); + while (cur_len < len) { + if (type == RTE_MEM_EVENT_ALLOC) + vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, + ms->iova, ms->len, 1); + else + vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, + ms->iova, ms->len, 0); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; + cur_len += ms->len; + ++ms; } +} + +int +rte_vfio_clear_group(int vfio_group_fd) +{ + int i; + struct vfio_config *vfio_cfg; - if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); + vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, " invalid group fd!\n"); return -1; } - if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) { - RTE_LOG(ERR, EAL, " cannot send group fd!\n"); - close(socket_fd); + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0) return -1; - } + vfio_cfg->vfio_groups[i].group_num = -1; + vfio_cfg->vfio_groups[i].fd = -1; + vfio_cfg->vfio_groups[i].devices = 0; + vfio_cfg->vfio_active_groups--; - ret = vfio_mp_sync_receive_request(socket_fd); - switch (ret) { - case SOCKET_NO_FD: - RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n"); - close(socket_fd); - break; - case SOCKET_OK: - close(socket_fd); - return 0; - case SOCKET_ERR: - RTE_LOG(ERR, EAL, " Socket error\n"); - close(socket_fd); - break; - default: - RTE_LOG(ERR, EAL, " UNKNOWN reply, %d\n", ret); - close(socket_fd); - } - return -1; + return 0; } int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info) { + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; struct vfio_group_status group_status = { .argsz = sizeof(group_status) }; + struct vfio_config *vfio_cfg; + struct user_mem_maps *user_mem_maps; + int vfio_container_fd; int vfio_group_fd; - int iommu_group_no; - int ret; + int iommu_group_num; + int i, ret; /* get group number */ - ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no); + ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); if (ret == 0) { RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", dev_addr); @@ -278,7 +587,7 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, return -1; /* get the actual group fd */ - vfio_group_fd = vfio_get_group_fd(iommu_group_no); + vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); if (vfio_group_fd < 0) return -1; @@ -309,12 +618,18 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, return -1; } + /* get the vfio_config it belongs to */ + vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); + vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; + vfio_container_fd = vfio_cfg->vfio_container_fd; + user_mem_maps = &vfio_cfg->mem_maps; + /* check if group does not have a container yet */ if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) { /* add group to a container */ ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER, - &vfio_cfg.vfio_container_fd); + &vfio_container_fd); if (ret) { RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, " "error %i (%s)\n", dev_addr, errno, strerror(errno)); @@ -332,10 +647,12 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, * functionality. */ if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_cfg.vfio_active_groups == 1) { + vfio_cfg->vfio_active_groups == 1 && + vfio_group_device_count(vfio_group_fd) == 0) { + const struct vfio_iommu_type *t; + /* select an IOMMU type which we will be using */ - const struct vfio_iommu_type *t = - vfio_set_iommu_type(vfio_cfg.vfio_container_fd); + t = vfio_set_iommu_type(vfio_container_fd); if (!t) { RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", @@ -344,15 +661,75 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, rte_vfio_clear_group(vfio_group_fd); return -1; } - ret = t->dma_map_func(vfio_cfg.vfio_container_fd); + /* lock memory hotplug before mapping and release it + * after registering callback, to prevent races + */ + rte_rwlock_read_lock(mem_lock); + if (vfio_cfg == default_vfio_cfg) + ret = t->dma_map_func(vfio_container_fd); + else + ret = 0; if (ret) { RTE_LOG(ERR, EAL, " %s DMA remapping failed, error %i (%s)\n", dev_addr, errno, strerror(errno)); close(vfio_group_fd); rte_vfio_clear_group(vfio_group_fd); + rte_rwlock_read_unlock(mem_lock); + return -1; + } + + vfio_cfg->vfio_iommu_type = t; + + /* re-map all user-mapped segments */ + rte_spinlock_recursive_lock(&user_mem_maps->lock); + + /* this IOMMU type may not support DMA mapping, but + * if we have mappings in the list - that means we have + * previously mapped something successfully, so we can + * be sure that DMA mapping is supported. + */ + for (i = 0; i < user_mem_maps->n_maps; i++) { + struct user_mem_map *map; + map = &user_mem_maps->maps[i]; + + ret = t->dma_user_map_func( + vfio_container_fd, + map->addr, map->iova, map->len, + 1); + if (ret) { + RTE_LOG(ERR, EAL, "Couldn't map user memory for DMA: " + "va: 0x%" PRIx64 " " + "iova: 0x%" PRIx64 " " + "len: 0x%" PRIu64 "\n", + map->addr, map->iova, + map->len); + rte_spinlock_recursive_unlock( + &user_mem_maps->lock); + rte_rwlock_read_unlock(mem_lock); + return -1; + } + } + rte_spinlock_recursive_unlock(&user_mem_maps->lock); + + /* register callback for mem events */ + if (vfio_cfg == default_vfio_cfg) + ret = rte_mem_event_callback_register( + VFIO_MEM_EVENT_CLB_NAME, + vfio_mem_event_callback, NULL); + else + ret = 0; + /* unlock memory hotplug */ + rte_rwlock_read_unlock(mem_lock); + + if (ret && rte_errno != ENOTSUP) { + RTE_LOG(ERR, EAL, "Could not install memory event callback for VFIO\n"); return -1; } + if (ret) + RTE_LOG(DEBUG, EAL, "Memory event callbacks not supported\n"); + else + RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n"); } } @@ -390,30 +767,45 @@ int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int vfio_dev_fd) { + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; struct vfio_group_status group_status = { .argsz = sizeof(group_status) }; + struct vfio_config *vfio_cfg; int vfio_group_fd; - int iommu_group_no; + int iommu_group_num; int ret; + /* we don't want any DMA mapping messages to come while we're detaching + * VFIO device, because this might be the last device and we might need + * to unregister the callback. + */ + rte_rwlock_read_lock(mem_lock); + /* get group number */ - ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no); + ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); if (ret <= 0) { RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n", dev_addr); /* This is an error at this point. */ - return -1; + ret = -1; + goto out; } /* get the actual group fd */ - vfio_group_fd = vfio_get_group_fd(iommu_group_no); + vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); if (vfio_group_fd <= 0) { - RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n", + RTE_LOG(INFO, EAL, "rte_vfio_get_group_fd failed for %s\n", dev_addr); - return -1; + ret = -1; + goto out; } + /* get the vfio_config it belongs to */ + vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); + vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; + /* At this point we got an active group. Closing it will make the * container detachment. If this is the last active group, VFIO kernel * code will unset the container and the IOMMU mappings. @@ -423,7 +815,8 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, if (close(vfio_dev_fd) < 0) { RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n", dev_addr); - return -1; + ret = -1; + goto out; } /* An VFIO group can have several devices attached. Just when there is @@ -435,30 +828,53 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, if (close(vfio_group_fd) < 0) { RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n", dev_addr); - return -1; + ret = -1; + goto out; } if (rte_vfio_clear_group(vfio_group_fd) < 0) { RTE_LOG(INFO, EAL, "Error when clearing group for %s\n", dev_addr); - return -1; + ret = -1; + goto out; } } - return 0; + /* if there are no active device groups, unregister the callback to + * avoid spurious attempts to map/unmap memory from VFIO. + */ + if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0) + rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME, + NULL); + + /* success */ + ret = 0; + +out: + rte_rwlock_read_unlock(mem_lock); + return ret; } int rte_vfio_enable(const char *modname) { /* initialize group list */ - int i; + int i, j; int vfio_available; - for (i = 0; i < VFIO_MAX_GROUPS; i++) { - vfio_cfg.vfio_groups[i].fd = -1; - vfio_cfg.vfio_groups[i].group_no = -1; - vfio_cfg.vfio_groups[i].devices = 0; + rte_spinlock_recursive_t lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER; + + for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + vfio_cfgs[i].vfio_container_fd = -1; + vfio_cfgs[i].vfio_active_groups = 0; + vfio_cfgs[i].vfio_iommu_type = NULL; + vfio_cfgs[i].mem_maps.lock = lock; + + for (j = 0; j < VFIO_MAX_GROUPS; j++) { + vfio_cfgs[i].vfio_groups[j].fd = -1; + vfio_cfgs[i].vfio_groups[j].group_num = -1; + vfio_cfgs[i].vfio_groups[j].devices = 0; + } } /* inform the user that we are probing for VFIO */ @@ -480,12 +896,12 @@ rte_vfio_enable(const char *modname) return 0; } - vfio_cfg.vfio_container_fd = vfio_get_container_fd(); + default_vfio_cfg->vfio_container_fd = rte_vfio_get_container_fd(); /* check if we have VFIO driver enabled */ - if (vfio_cfg.vfio_container_fd != -1) { + if (default_vfio_cfg->vfio_container_fd != -1) { RTE_LOG(NOTICE, EAL, "VFIO support initialized\n"); - vfio_cfg.vfio_enabled = 1; + default_vfio_cfg->vfio_enabled = 1; } else { RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n"); } @@ -497,7 +913,7 @@ int rte_vfio_is_enabled(const char *modname) { const int mod_available = rte_eal_check_module(modname) > 0; - return vfio_cfg.vfio_enabled && mod_available; + return default_vfio_cfg->vfio_enabled && mod_available; } const struct vfio_iommu_type * @@ -558,9 +974,14 @@ vfio_has_supported_extensions(int vfio_container_fd) } int -vfio_get_container_fd(void) +rte_vfio_get_container_fd(void) { int ret, vfio_container_fd; + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; + /* if we're in a primary process, try to open the container */ if (internal_config.process_type == RTE_PROC_PRIMARY) { @@ -591,39 +1012,35 @@ vfio_get_container_fd(void) } return vfio_container_fd; - } else { - /* - * if we're in a secondary process, request container fd from the - * primary process via our socket - */ - int socket_fd; - - socket_fd = vfio_mp_sync_connect_to_primary(); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } - vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd); - if (vfio_container_fd < 0) { - RTE_LOG(ERR, EAL, " cannot get container fd!\n"); - close(socket_fd); - return -1; + } + /* + * if we're in a secondary process, request container fd from the + * primary process via mp channel + */ + p->req = SOCKET_REQ_CONTAINER; + strcpy(mp_req.name, EAL_VFIO_MP); + mp_req.len_param = sizeof(*p); + mp_req.num_fds = 0; + + vfio_container_fd = -1; + if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && + mp_reply.nb_received == 1) { + mp_rep = &mp_reply.msgs[0]; + p = (struct vfio_mp_param *)mp_rep->param; + if (p->result == SOCKET_OK && mp_rep->num_fds == 1) { + free(mp_reply.msgs); + return mp_rep->fds[0]; } - close(socket_fd); - return vfio_container_fd; + free(mp_reply.msgs); } + RTE_LOG(ERR, EAL, " cannot request container fd\n"); return -1; } int -vfio_get_group_no(const char *sysfs_base, - const char *dev_addr, int *iommu_group_no) +rte_vfio_get_group_num(const char *sysfs_base, + const char *dev_addr, int *iommu_group_num) { char linkname[PATH_MAX]; char filename[PATH_MAX]; @@ -655,7 +1072,7 @@ vfio_get_group_no(const char *sysfs_base, errno = 0; group_tok = tok[ret - 1]; end = group_tok; - *iommu_group_no = strtol(group_tok, &end, 10); + *iommu_group_num = strtol(group_tok, &end, 10); if ((end != group_tok && *end != '\0') || errno != 0) { RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", dev_addr); return -1; @@ -665,34 +1082,49 @@ vfio_get_group_no(const char *sysfs_base, } static int -vfio_type1_dma_map(int vfio_container_fd) +type1_map(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) { - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - int i, ret; + int *vfio_container_fd = arg; - /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - struct vfio_iommu_type1_dma_map dma_map; + return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova, + ms->len, 1); +} - if (ms[i].addr == NULL) - break; +static int +vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len, int do_map) +{ + struct vfio_iommu_type1_dma_map dma_map; + struct vfio_iommu_type1_dma_unmap dma_unmap; + int ret; + if (do_map != 0) { memset(&dma_map, 0, sizeof(dma_map)); dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); - dma_map.vaddr = ms[i].addr_64; - dma_map.size = ms[i].len; - if (rte_eal_iova_mode() == RTE_IOVA_VA) - dma_map.iova = dma_map.vaddr; - else - dma_map.iova = ms[i].iova; - dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + dma_map.vaddr = vaddr; + dma_map.size = len; + dma_map.iova = iova; + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | + VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + if (ret) { + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + } else { + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); + dma_unmap.size = len; + dma_unmap.iova = iova; + ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, + &dma_unmap); if (ret) { - RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " - "error %i (%s)\n", errno, - strerror(errno)); + RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n", + errno, strerror(errno)); return -1; } } @@ -701,24 +1133,107 @@ vfio_type1_dma_map(int vfio_container_fd) } static int -vfio_spapr_dma_map(int vfio_container_fd) +vfio_type1_dma_map(int vfio_container_fd) { - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - int i, ret; + return rte_memseg_walk(type1_map, &vfio_container_fd); +} - struct vfio_iommu_spapr_register_memory reg = { - .argsz = sizeof(reg), - .flags = 0 - }; - struct vfio_iommu_spapr_tce_info info = { - .argsz = sizeof(info), - }; - struct vfio_iommu_spapr_tce_create create = { - .argsz = sizeof(create), - }; - struct vfio_iommu_spapr_tce_remove remove = { - .argsz = sizeof(remove), +static int +vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len, int do_map) +{ + struct vfio_iommu_type1_dma_map dma_map; + struct vfio_iommu_type1_dma_unmap dma_unmap; + int ret; + + if (do_map != 0) { + memset(&dma_map, 0, sizeof(dma_map)); + dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); + dma_map.vaddr = vaddr; + dma_map.size = len; + dma_map.iova = iova; + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | + VFIO_DMA_MAP_FLAG_WRITE; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + if (ret) { + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + + } else { + struct vfio_iommu_spapr_register_memory reg = { + .argsz = sizeof(reg), + .flags = 0 + }; + reg.vaddr = (uintptr_t) vaddr; + reg.size = len; + + ret = ioctl(vfio_container_fd, + VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, ®); + if (ret) { + RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); + dma_unmap.size = len; + dma_unmap.iova = iova; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, + &dma_unmap); + if (ret) { + RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + } + + return 0; +} + +static int +vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) +{ + int *vfio_container_fd = arg; + + return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova, + ms->len, 1); +} + +struct spapr_walk_param { + uint64_t window_size; + uint64_t hugepage_sz; +}; +static int +vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) +{ + struct spapr_walk_param *param = arg; + uint64_t max = ms->iova + ms->len; + + if (max > param->window_size) { + param->hugepage_sz = ms->hugepage_sz; + param->window_size = max; + } + + return 0; +} + +static int +vfio_spapr_create_new_dma_window(int vfio_container_fd, + struct vfio_iommu_spapr_tce_create *create) { + struct vfio_iommu_spapr_tce_remove remove = { + .argsz = sizeof(remove), + }; + struct vfio_iommu_spapr_tce_info info = { + .argsz = sizeof(info), }; + int ret; /* query spapr iommu info */ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); @@ -737,69 +1252,158 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } - /* create DMA window from 0 to max(phys_addr + len) */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if (ms[i].addr == NULL) - break; - - create.window_size = RTE_MAX(create.window_size, - ms[i].iova + ms[i].len); - } - - /* sPAPR requires window size to be a power of 2 */ - create.window_size = rte_align64pow2(create.window_size); - create.page_shift = __builtin_ctzll(ms->hugepage_sz); - create.levels = 1; - - ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); + /* create new DMA window */ + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, create); if (ret) { RTE_LOG(ERR, EAL, " cannot create new DMA window, " "error %i (%s)\n", errno, strerror(errno)); return -1; } - if (create.start_addr != 0) { + if (create->start_addr != 0) { RTE_LOG(ERR, EAL, " DMA window start address != 0\n"); return -1; } - /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - struct vfio_iommu_type1_dma_map dma_map; + return 0; +} - if (ms[i].addr == NULL) - break; +static int +vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len, int do_map) +{ + struct spapr_walk_param param; + struct vfio_iommu_spapr_tce_create create = { + .argsz = sizeof(create), + }; + struct vfio_config *vfio_cfg; + struct user_mem_maps *user_mem_maps; + int i, ret = 0; - reg.vaddr = (uintptr_t) ms[i].addr; - reg.size = ms[i].len; - ret = ioctl(vfio_container_fd, - VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®); - if (ret) { - RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } + vfio_cfg = get_vfio_cfg_by_container_fd(vfio_container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, " invalid container fd!\n"); + return -1; + } - memset(&dma_map, 0, sizeof(dma_map)); - dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); - dma_map.vaddr = ms[i].addr_64; - dma_map.size = ms[i].len; - if (rte_eal_iova_mode() == RTE_IOVA_VA) - dma_map.iova = dma_map.vaddr; - else - dma_map.iova = ms[i].iova; - dma_map.flags = VFIO_DMA_MAP_FLAG_READ | - VFIO_DMA_MAP_FLAG_WRITE; + user_mem_maps = &vfio_cfg->mem_maps; + rte_spinlock_recursive_lock(&user_mem_maps->lock); - ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + /* check if window size needs to be adjusted */ + memset(¶m, 0, sizeof(param)); - if (ret) { - RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; + /* we're inside a callback so use thread-unsafe version */ + if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk, + ¶m) < 0) { + RTE_LOG(ERR, EAL, "Could not get window size\n"); + ret = -1; + goto out; + } + + /* also check user maps */ + for (i = 0; i < user_mem_maps->n_maps; i++) { + uint64_t max = user_mem_maps->maps[i].iova + + user_mem_maps->maps[i].len; + create.window_size = RTE_MAX(create.window_size, max); + } + + /* sPAPR requires window size to be a power of 2 */ + create.window_size = rte_align64pow2(param.window_size); + create.page_shift = __builtin_ctzll(param.hugepage_sz); + create.levels = 1; + + if (do_map) { + void *addr; + /* re-create window and remap the entire memory */ + if (iova > create.window_size) { + if (vfio_spapr_create_new_dma_window(vfio_container_fd, + &create) < 0) { + RTE_LOG(ERR, EAL, "Could not create new DMA window\n"); + ret = -1; + goto out; + } + /* we're inside a callback, so use thread-unsafe version + */ + if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk, + &vfio_container_fd) < 0) { + RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n"); + ret = -1; + goto out; + } + /* remap all user maps */ + for (i = 0; i < user_mem_maps->n_maps; i++) { + struct user_mem_map *map = + &user_mem_maps->maps[i]; + if (vfio_spapr_dma_do_map(vfio_container_fd, + map->addr, map->iova, map->len, + 1)) { + RTE_LOG(ERR, EAL, "Could not recreate user DMA maps\n"); + ret = -1; + goto out; + } + } + } + + /* now that we've remapped all of the memory that was present + * before, map the segment that we were requested to map. + * + * however, if we were called by the callback, the memory we + * were called with was already in the memseg list, so previous + * mapping should've mapped that segment already. + * + * virt2memseg_list is a relatively cheap check, so use that. if + * memory is within any memseg list, it's a memseg, so it's + * already mapped. + */ + addr = (void *)(uintptr_t)vaddr; + if (rte_mem_virt2memseg_list(addr) == NULL && + vfio_spapr_dma_do_map(vfio_container_fd, + vaddr, iova, len, 1) < 0) { + RTE_LOG(ERR, EAL, "Could not map segment\n"); + ret = -1; + goto out; + } + } else { + /* for unmap, check if iova within DMA window */ + if (iova > create.window_size) { + RTE_LOG(ERR, EAL, "iova beyond DMA window for unmap"); + ret = -1; + goto out; } + vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0); } +out: + rte_spinlock_recursive_unlock(&user_mem_maps->lock); + return ret; +} + +static int +vfio_spapr_dma_map(int vfio_container_fd) +{ + struct vfio_iommu_spapr_tce_create create = { + .argsz = sizeof(create), + }; + struct spapr_walk_param param; + + memset(¶m, 0, sizeof(param)); + + /* create DMA window from 0 to max(phys_addr + len) */ + rte_memseg_walk(vfio_spapr_window_size_walk, ¶m); + + /* sPAPR requires window size to be a power of 2 */ + create.window_size = rte_align64pow2(param.window_size); + create.page_shift = __builtin_ctzll(param.hugepage_sz); + create.levels = 1; + + if (vfio_spapr_create_new_dma_window(vfio_container_fd, &create) < 0) { + RTE_LOG(ERR, EAL, "Could not create new DMA window\n"); + return -1; + } + + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ + if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0) + return -1; return 0; } @@ -811,6 +1415,175 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) return 0; } +static int +vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd, + uint64_t __rte_unused vaddr, + uint64_t __rte_unused iova, uint64_t __rte_unused len, + int __rte_unused do_map) +{ + /* No-IOMMU mode does not need DMA mapping */ + return 0; +} + +static int +vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, + uint64_t len, int do_map) +{ + const struct vfio_iommu_type *t = vfio_cfg->vfio_iommu_type; + + if (!t) { + RTE_LOG(ERR, EAL, " VFIO support not initialized\n"); + rte_errno = ENODEV; + return -1; + } + + if (!t->dma_user_map_func) { + RTE_LOG(ERR, EAL, + " VFIO custom DMA region maping not supported by IOMMU %s\n", + t->name); + rte_errno = ENOTSUP; + return -1; + } + + return t->dma_user_map_func(vfio_cfg->vfio_container_fd, vaddr, iova, + len, do_map); +} + +static int +container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, + uint64_t len) +{ + struct user_mem_map *new_map; + struct user_mem_maps *user_mem_maps; + int ret = 0; + + user_mem_maps = &vfio_cfg->mem_maps; + rte_spinlock_recursive_lock(&user_mem_maps->lock); + if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) { + RTE_LOG(ERR, EAL, "No more space for user mem maps\n"); + rte_errno = ENOMEM; + ret = -1; + goto out; + } + /* map the entry */ + if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) { + /* technically, this will fail if there are currently no devices + * plugged in, even if a device were added later, this mapping + * might have succeeded. however, since we cannot verify if this + * is a valid mapping without having a device attached, consider + * this to be unsupported, because we can't just store any old + * mapping and pollute list of active mappings willy-nilly. + */ + RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n"); + ret = -1; + goto out; + } + /* create new user mem map entry */ + new_map = &user_mem_maps->maps[user_mem_maps->n_maps++]; + new_map->addr = vaddr; + new_map->iova = iova; + new_map->len = len; + + compact_user_maps(user_mem_maps); +out: + rte_spinlock_recursive_unlock(&user_mem_maps->lock); + return ret; +} + +static int +container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, + uint64_t len) +{ + struct user_mem_map *map, *new_map = NULL; + struct user_mem_maps *user_mem_maps; + int ret = 0; + + user_mem_maps = &vfio_cfg->mem_maps; + rte_spinlock_recursive_lock(&user_mem_maps->lock); + + /* find our mapping */ + map = find_user_mem_map(user_mem_maps, vaddr, iova, len); + if (!map) { + RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n"); + rte_errno = EINVAL; + ret = -1; + goto out; + } + if (map->addr != vaddr || map->iova != iova || map->len != len) { + /* we're partially unmapping a previously mapped region, so we + * need to split entry into two. + */ + if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) { + RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n"); + rte_errno = ENOMEM; + ret = -1; + goto out; + } + new_map = &user_mem_maps->maps[user_mem_maps->n_maps++]; + } + + /* unmap the entry */ + if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) { + /* there may not be any devices plugged in, so unmapping will + * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't + * stop us from removing the mapping, as the assumption is we + * won't be needing this memory any more and thus will want to + * prevent it from being remapped again on hotplug. so, only + * fail if we indeed failed to unmap (e.g. if the mapping was + * within our mapped range but had invalid alignment). + */ + if (rte_errno != ENODEV && rte_errno != ENOTSUP) { + RTE_LOG(ERR, EAL, "Couldn't unmap region for DMA\n"); + ret = -1; + goto out; + } else { + RTE_LOG(DEBUG, EAL, "DMA unmapping failed, but removing mappings anyway\n"); + } + } + /* remove map from the list of active mappings */ + if (new_map != NULL) { + adjust_map(map, new_map, vaddr, len); + + /* if we've created a new map by splitting, sort everything */ + if (!is_null_map(new_map)) { + compact_user_maps(user_mem_maps); + } else { + /* we've created a new mapping, but it was unused */ + user_mem_maps->n_maps--; + } + } else { + memset(map, 0, sizeof(*map)); + compact_user_maps(user_mem_maps); + user_mem_maps->n_maps--; + } + +out: + rte_spinlock_recursive_unlock(&user_mem_maps->lock); + return ret; +} + +int +rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) +{ + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + return container_dma_map(default_vfio_cfg, vaddr, iova, len); +} + +int +rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) +{ + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + return container_dma_unmap(default_vfio_cfg, vaddr, iova, len); +} + int rte_vfio_noiommu_is_enabled(void) { @@ -843,4 +1616,299 @@ rte_vfio_noiommu_is_enabled(void) return c == 'Y'; } -#endif +int +rte_vfio_container_create(void) +{ + int i; + + /* Find an empty slot to store new vfio config */ + for (i = 1; i < VFIO_MAX_CONTAINERS; i++) { + if (vfio_cfgs[i].vfio_container_fd == -1) + break; + } + + if (i == VFIO_MAX_CONTAINERS) { + RTE_LOG(ERR, EAL, "exceed max vfio container limit\n"); + return -1; + } + + vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd(); + if (vfio_cfgs[i].vfio_container_fd < 0) { + RTE_LOG(NOTICE, EAL, "fail to create a new container\n"); + return -1; + } + + return vfio_cfgs[i].vfio_container_fd; +} + +int __rte_experimental +rte_vfio_container_destroy(int container_fd) +{ + struct vfio_config *vfio_cfg; + int i; + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg->vfio_groups[i].group_num != -1) + rte_vfio_container_group_unbind(container_fd, + vfio_cfg->vfio_groups[i].group_num); + + close(container_fd); + vfio_cfg->vfio_container_fd = -1; + vfio_cfg->vfio_active_groups = 0; + vfio_cfg->vfio_iommu_type = NULL; + + return 0; +} + +int +rte_vfio_container_group_bind(int container_fd, int iommu_group_num) +{ + struct vfio_config *vfio_cfg; + struct vfio_group *cur_grp; + int vfio_group_fd; + int i; + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + /* Check room for new group */ + if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + return -1; + } + + /* Get an index for the new group */ + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg->vfio_groups[i].group_num == -1) { + cur_grp = &vfio_cfg->vfio_groups[i]; + break; + } + + /* This should not happen */ + if (i == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); + return -1; + } + + vfio_group_fd = vfio_open_group_fd(iommu_group_num); + if (vfio_group_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num); + return -1; + } + cur_grp->group_num = iommu_group_num; + cur_grp->fd = vfio_group_fd; + cur_grp->devices = 0; + vfio_cfg->vfio_active_groups++; + + return vfio_group_fd; +} + +int +rte_vfio_container_group_unbind(int container_fd, int iommu_group_num) +{ + struct vfio_config *vfio_cfg; + struct vfio_group *cur_grp = NULL; + int i; + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + for (i = 0; i < VFIO_MAX_GROUPS; i++) { + if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) { + cur_grp = &vfio_cfg->vfio_groups[i]; + break; + } + } + + /* This should not happen */ + if (i == VFIO_MAX_GROUPS || cur_grp == NULL) { + RTE_LOG(ERR, EAL, "Specified group number not found\n"); + return -1; + } + + if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) { + RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for" + " iommu_group_num %d\n", iommu_group_num); + return -1; + } + cur_grp->group_num = -1; + cur_grp->fd = -1; + cur_grp->devices = 0; + vfio_cfg->vfio_active_groups--; + + return 0; +} + +int +rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len) +{ + struct vfio_config *vfio_cfg; + + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + return container_dma_map(vfio_cfg, vaddr, iova, len); +} + +int +rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len) +{ + struct vfio_config *vfio_cfg; + + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + return container_dma_unmap(vfio_cfg, vaddr, iova, len); +} + +#else + +int +rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int +rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int +rte_vfio_setup_device(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int *vfio_dev_fd, + __rte_unused struct vfio_device_info *device_info) +{ + return -1; +} + +int +rte_vfio_release_device(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, __rte_unused int fd) +{ + return -1; +} + +int +rte_vfio_enable(__rte_unused const char *modname) +{ + return -1; +} + +int +rte_vfio_is_enabled(__rte_unused const char *modname) +{ + return -1; +} + +int +rte_vfio_noiommu_is_enabled(void) +{ + return -1; +} + +int +rte_vfio_clear_group(__rte_unused int vfio_group_fd) +{ + return -1; +} + +int +rte_vfio_get_group_num(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int *iommu_group_num) +{ + return -1; +} + +int +rte_vfio_get_container_fd(void) +{ + return -1; +} + +int +rte_vfio_get_group_fd(__rte_unused int iommu_group_num) +{ + return -1; +} + +int +rte_vfio_container_create(void) +{ + return -1; +} + +int +rte_vfio_container_destroy(__rte_unused int container_fd) +{ + return -1; +} + +int +rte_vfio_container_group_bind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int +rte_vfio_container_group_unbind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int +rte_vfio_container_dma_map(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int +rte_vfio_container_dma_unmap(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +#endif /* VFIO_PRESENT */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 80595773..68d4750a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -19,6 +19,7 @@ #ifdef VFIO_PRESENT +#include #include #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU @@ -26,6 +27,7 @@ #ifndef VFIO_SPAPR_TCE_v2_IOMMU #define RTE_VFIO_SPAPR 7 #define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) +#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) #define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) #define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) @@ -79,49 +81,37 @@ struct vfio_iommu_spapr_tce_info { #define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) -#define RTE_VFIO_NOIOMMU 8 -#else -#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU -#endif - #define VFIO_MAX_GROUPS RTE_MAX_VFIO_GROUPS - -/* - * Function prototypes for VFIO multiprocess sync functions - */ -int vfio_mp_sync_send_request(int socket, int req); -int vfio_mp_sync_receive_request(int socket); -int vfio_mp_sync_send_fd(int socket, int fd); -int vfio_mp_sync_receive_fd(int socket); -int vfio_mp_sync_connect_to_primary(void); +#define VFIO_MAX_CONTAINERS RTE_MAX_VFIO_CONTAINERS /* * we don't need to store device fd's anywhere since they can be obtained from * the group fd via an ioctl() call. */ struct vfio_group { - int group_no; + int group_num; int fd; int devices; }; -struct vfio_config { - int vfio_enabled; - int vfio_container_fd; - int vfio_active_groups; - struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; -}; - /* DMA mapping function prototype. * Takes VFIO container fd as a parameter. * Returns 0 on success, -1 on error. * */ typedef int (*vfio_dma_func_t)(int); +/* Custom memory region DMA mapping function prototype. + * Takes VFIO container fd, virtual address, phisical address, length and + * operation type (0 to unmap 1 for map) as a parameters. + * Returns 0 on success, -1 on error. + **/ +typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova, + uint64_t len, int do_map); + struct vfio_iommu_type { int type_id; const char *name; + vfio_dma_user_func_t dma_user_map_func; vfio_dma_func_t dma_map_func; }; @@ -133,30 +123,22 @@ vfio_set_iommu_type(int vfio_container_fd); int vfio_has_supported_extensions(int vfio_container_fd); -/* open container fd or get an existing one */ -int -vfio_get_container_fd(void); - -/* parse IOMMU group number for a device - * returns 1 on success, -1 for errors, 0 for non-existent group - */ -int -vfio_get_group_no(const char *sysfs_base, - const char *dev_addr, int *iommu_group_no); - -/* open group fd or get an existing one */ -int -vfio_get_group_fd(int iommu_group_no); - int vfio_mp_sync_setup(void); +#define EAL_VFIO_MP "eal_vfio_mp_sync" + #define SOCKET_REQ_CONTAINER 0x100 #define SOCKET_REQ_GROUP 0x200 -#define SOCKET_CLR_GROUP 0x300 #define SOCKET_OK 0x0 #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF +struct vfio_mp_param { + int req; + int result; + int group_num; +}; + #endif /* VFIO_PRESENT */ #endif /* EAL_VFIO_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index 7cc3c152..680a24aa 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -1,32 +1,16 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ +#include #include -#include -#include -#include - -/* sys/un.h with __USE_MISC uses strlen, which is unsafe */ -#ifdef __USE_MISC -#define REMOVED_USE_MISC -#undef __USE_MISC -#endif -#include -/* make sure we redefine __USE_MISC only if it was previously undefined */ -#ifdef REMOVED_USE_MISC -#define __USE_MISC -#undef REMOVED_USE_MISC -#endif +#include #include -#include -#include #include +#include -#include "eal_filesystem.h" #include "eal_vfio.h" -#include "eal_thread.h" /** * @file @@ -37,358 +21,70 @@ #ifdef VFIO_PRESENT -#define SOCKET_PATH_FMT "%s/.%s_mp_socket" -#define CMSGLEN (CMSG_LEN(sizeof(int))) -#define FD_TO_CMSGHDR(fd, chdr) \ - do {\ - (chdr).cmsg_len = CMSGLEN;\ - (chdr).cmsg_level = SOL_SOCKET;\ - (chdr).cmsg_type = SCM_RIGHTS;\ - memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\ - } while (0) -#define CMSGHDR_TO_FD(chdr, fd) \ - memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd)) - -static pthread_t socket_thread; -static int mp_socket_fd; - - -/* get socket path (/var/run if root, $HOME otherwise) */ -static void -get_socket_path(char *buffer, int bufsz) -{ - const char *dir = "/var/run"; - const char *home_dir = getenv("HOME"); - - if (getuid() != 0 && home_dir != NULL) - dir = home_dir; - - /* use current prefix as file path */ - snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir, - internal_config.hugefile_prefix); -} - - - -/* - * data flow for socket comm protocol: - * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP - * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number - * 2. server receives message - * 2a. in case of invalid group, SOCKET_ERR is sent back to client - * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client - * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd - * - * in case of any error, socket is closed. - */ - -/* send a request, return -1 on error */ -int -vfio_mp_sync_send_request(int socket, int req) -{ - struct msghdr hdr; - struct iovec iov; - int buf; - int ret; - - memset(&hdr, 0, sizeof(hdr)); - - buf = req; - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - - ret = sendmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - return 0; -} - -/* receive a request and return it */ -int -vfio_mp_sync_receive_request(int socket) -{ - int buf; - struct msghdr hdr; - struct iovec iov; - int ret, req; - - memset(&hdr, 0, sizeof(hdr)); - - buf = SOCKET_ERR; - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - - ret = recvmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - - req = buf; - - return req; -} - -/* send OK in message, fd in control message */ -int -vfio_mp_sync_send_fd(int socket, int fd) +static int +vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer) { - int buf; - struct msghdr hdr; - struct cmsghdr *chdr; - char chdr_buf[CMSGLEN]; - struct iovec iov; + int fd = -1; int ret; + struct rte_mp_msg reply; + struct vfio_mp_param *r = (struct vfio_mp_param *)reply.param; + const struct vfio_mp_param *m = + (const struct vfio_mp_param *)msg->param; - chdr = (struct cmsghdr *) chdr_buf; - memset(chdr, 0, sizeof(chdr_buf)); - memset(&hdr, 0, sizeof(hdr)); - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - hdr.msg_control = chdr; - hdr.msg_controllen = CMSGLEN; - - buf = SOCKET_OK; - FD_TO_CMSGHDR(fd, *chdr); - - ret = sendmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - return 0; -} - -/* receive OK in message, fd in control message */ -int -vfio_mp_sync_receive_fd(int socket) -{ - int buf; - struct msghdr hdr; - struct cmsghdr *chdr; - char chdr_buf[CMSGLEN]; - struct iovec iov; - int ret, req, fd; - - buf = SOCKET_ERR; - - chdr = (struct cmsghdr *) chdr_buf; - memset(chdr, 0, sizeof(chdr_buf)); - memset(&hdr, 0, sizeof(hdr)); - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - hdr.msg_control = chdr; - hdr.msg_controllen = CMSGLEN; - - ret = recvmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - - req = buf; - - if (req != SOCKET_OK) - return -1; - - CMSGHDR_TO_FD(*chdr, fd); - - return fd; -} - -/* connect socket_fd in secondary process to the primary process's socket */ -int -vfio_mp_sync_connect_to_primary(void) -{ - struct sockaddr_un addr; - socklen_t sockaddr_len; - int socket_fd; - - /* set up a socket */ - socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, "Failed to create socket!\n"); + if (msg->len_param != sizeof(*m)) { + RTE_LOG(ERR, EAL, "vfio received invalid message!\n"); return -1; } - get_socket_path(addr.sun_path, sizeof(addr.sun_path)); - addr.sun_family = AF_UNIX; - - sockaddr_len = sizeof(struct sockaddr_un); - - if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0) - return socket_fd; - - /* if connect failed */ - close(socket_fd); - return -1; -} - + memset(&reply, 0, sizeof(reply)); - -/* - * socket listening thread for primary process - */ -static __attribute__((noreturn)) void * -vfio_mp_sync_thread(void __rte_unused * arg) -{ - int ret, fd, vfio_data; - - /* wait for requests on the socket */ - for (;;) { - int conn_sock; - struct sockaddr_un addr; - socklen_t sockaddr_len = sizeof(addr); - - /* this is a blocking call */ - conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr, - &sockaddr_len); - - /* just restart on error */ - if (conn_sock == -1) - continue; - - /* set socket to linger after close */ - struct linger l; - l.l_onoff = 1; - l.l_linger = 60; - - if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0) - RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option " - "on listen socket (%s)\n", strerror(errno)); - - ret = vfio_mp_sync_receive_request(conn_sock); - - switch (ret) { - case SOCKET_REQ_CONTAINER: - fd = vfio_get_container_fd(); - if (fd < 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); - else - vfio_mp_sync_send_fd(conn_sock, fd); - if (fd >= 0) - close(fd); - break; - case SOCKET_REQ_GROUP: - /* wait for group number */ - vfio_data = vfio_mp_sync_receive_request(conn_sock); - if (vfio_data < 0) { - close(conn_sock); - continue; - } - - fd = vfio_get_group_fd(vfio_data); - - if (fd < 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + switch (m->req) { + case SOCKET_REQ_GROUP: + r->req = SOCKET_REQ_GROUP; + r->group_num = m->group_num; + fd = rte_vfio_get_group_fd(m->group_num); + if (fd < 0) + r->result = SOCKET_ERR; + else if (fd == 0) /* if VFIO group exists but isn't bound to VFIO driver */ - else if (fd == 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); + r->result = SOCKET_NO_FD; + else { /* if group exists and is bound to VFIO driver */ - else { - vfio_mp_sync_send_request(conn_sock, SOCKET_OK); - vfio_mp_sync_send_fd(conn_sock, fd); - } - break; - case SOCKET_CLR_GROUP: - /* wait for group fd */ - vfio_data = vfio_mp_sync_receive_request(conn_sock); - if (vfio_data < 0) { - close(conn_sock); - continue; - } - - ret = rte_vfio_clear_group(vfio_data); - - if (ret < 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); - else - vfio_mp_sync_send_request(conn_sock, SOCKET_OK); - break; - default: - vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); - break; + r->result = SOCKET_OK; + reply.num_fds = 1; + reply.fds[0] = fd; } - close(conn_sock); - } -} - -static int -vfio_mp_sync_socket_setup(void) -{ - int ret, socket_fd; - struct sockaddr_un addr; - socklen_t sockaddr_len; - - /* set up a socket */ - socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, "Failed to create socket!\n"); - return -1; - } - - get_socket_path(addr.sun_path, sizeof(addr.sun_path)); - addr.sun_family = AF_UNIX; - - sockaddr_len = sizeof(struct sockaddr_un); - - unlink(addr.sun_path); - - ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len); - if (ret) { - RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno)); - close(socket_fd); - return -1; - } - - ret = listen(socket_fd, 50); - if (ret) { - RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno)); - close(socket_fd); + break; + case SOCKET_REQ_CONTAINER: + r->req = SOCKET_REQ_CONTAINER; + fd = rte_vfio_get_container_fd(); + if (fd < 0) + r->result = SOCKET_ERR; + else { + r->result = SOCKET_OK; + reply.num_fds = 1; + reply.fds[0] = fd; + } + break; + default: + RTE_LOG(ERR, EAL, "vfio received invalid message!\n"); return -1; } - /* save the socket in local configuration */ - mp_socket_fd = socket_fd; + strcpy(reply.name, EAL_VFIO_MP); + reply.len_param = sizeof(*r); - return 0; + ret = rte_mp_reply(&reply, peer); + if (m->req == SOCKET_REQ_CONTAINER && fd >= 0) + close(fd); + return ret; } -/* - * set up a local socket and tell it to listen for incoming connections - */ int vfio_mp_sync_setup(void) { - int ret; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; - - if (vfio_mp_sync_socket_setup() < 0) { - RTE_LOG(ERR, EAL, "Failed to set up local socket!\n"); - return -1; - } - - ret = pthread_create(&socket_thread, NULL, - vfio_mp_sync_thread, NULL); - if (ret) { - RTE_LOG(ERR, EAL, - "Failed to create thread for communication with secondary processes!\n"); - close(mp_socket_fd); - return -1; - } - - /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync"); - ret = rte_thread_setname(socket_thread, thread_name); - if (ret) - RTE_LOG(DEBUG, EAL, - "Failed to set thread name for secondary processes!\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + return rte_mp_action_register(EAL_VFIO_MP, vfio_mp_primary); return 0; } diff --git a/lib/librte_eal/linuxapp/eal/meson.build b/lib/librte_eal/linuxapp/eal/meson.build index 03974ff2..6e31c2aa 100644 --- a/lib/librte_eal/linuxapp/eal/meson.build +++ b/lib/librte_eal/linuxapp/eal/meson.build @@ -7,9 +7,11 @@ install_subdir('include/exec-env', install_dir: get_option('includedir')) env_objs = [] env_headers = [] env_sources = files('eal_alarm.c', + 'eal_cpuflags.c', 'eal_debug.c', 'eal_hugepage_info.c', 'eal_interrupts.c', + 'eal_memalloc.c', 'eal_lcore.c', 'eal_log.c', 'eal_thread.c', @@ -18,8 +20,10 @@ env_sources = files('eal_alarm.c', 'eal_vfio_mp_sync.c', 'eal.c', 'eal_memory.c', + 'eal_dev.c', ) +deps += ['kvargs'] if has_libnuma == 1 dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true) endif diff --git a/lib/librte_eal/linuxapp/igb_uio/Kbuild b/lib/librte_eal/linuxapp/igb_uio/Kbuild deleted file mode 100644 index 98c98fe5..00000000 --- a/lib/librte_eal/linuxapp/igb_uio/Kbuild +++ /dev/null @@ -1 +0,0 @@ -obj-m := igb_uio.o diff --git a/lib/librte_eal/linuxapp/igb_uio/Makefile b/lib/librte_eal/linuxapp/igb_uio/Makefile deleted file mode 100644 index f83bcc7c..00000000 --- a/lib/librte_eal/linuxapp/igb_uio/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2010-2014 Intel Corporation - -include $(RTE_SDK)/mk/rte.vars.mk - -# -# module name and path -# -MODULE = igb_uio -MODULE_PATH = drivers/net/igb_uio - -# -# CFLAGS -# -MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100 -MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -MODULE_CFLAGS += -Winline -Wall -Werror -MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h - -# -# all source are stored in SRCS-y -# -SRCS-y := igb_uio.c - -include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h deleted file mode 100644 index ce456d4b..00000000 --- a/lib/librte_eal/linuxapp/igb_uio/compat.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Minimal wrappers to allow compiling igb_uio on older kernels. - */ - -#ifndef RHEL_RELEASE_VERSION -#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) -#define pci_cfg_access_lock pci_block_user_cfg_access -#define pci_cfg_access_unlock pci_unblock_user_cfg_access -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) -#define HAVE_PTE_MASK_PAGE_IOMAP -#endif - -#ifndef PCI_MSIX_ENTRY_SIZE -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 -#endif - -/* - * for kernels < 2.6.38 and backported patch that moves MSI-X entry definition - * to pci_regs.h Those kernels has PCI_MSIX_ENTRY_SIZE defined but not - * PCI_MSIX_ENTRY_CTRL_MASKBIT - */ -#ifndef PCI_MSIX_ENTRY_CTRL_MASKBIT -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9))) - -static int pci_num_vf(struct pci_dev *dev) -{ - struct iov { - int pos; - int nres; - u32 cap; - u16 ctrl; - u16 total; - u16 initial; - u16 nr_virtfn; - } *iov = (struct iov *)dev->sriov; - - if (!dev->is_physfn) - return 0; - - return iov->nr_virtfn; -} - -#endif /* < 2.6.34 */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) - -#define kstrtoul strict_strtoul - -#endif /* < 2.6.39 */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 3))) - -/* Check if INTX works to control irq's. - * Set's INTX_DISABLE flag and reads it back - */ -static bool pci_intx_mask_supported(struct pci_dev *pdev) -{ - bool mask_supported = false; - uint16_t orig, new; - - pci_block_user_cfg_access(pdev); - pci_read_config_word(pdev, PCI_COMMAND, &orig); - pci_write_config_word(pdev, PCI_COMMAND, - orig ^ PCI_COMMAND_INTX_DISABLE); - pci_read_config_word(pdev, PCI_COMMAND, &new); - - if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) { - dev_err(&pdev->dev, "Command register changed from " - "0x%x to 0x%x: driver or hardware bug?\n", orig, new); - } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) { - mask_supported = true; - pci_write_config_word(pdev, PCI_COMMAND, orig); - } - pci_unblock_user_cfg_access(pdev); - - return mask_supported; -} - -static bool pci_check_and_mask_intx(struct pci_dev *pdev) -{ - bool pending; - uint32_t status; - - pci_block_user_cfg_access(pdev); - pci_read_config_dword(pdev, PCI_COMMAND, &status); - - /* interrupt is not ours, goes to out */ - pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0); - if (pending) { - uint16_t old, new; - - old = status; - if (status != 0) - new = old & (~PCI_COMMAND_INTX_DISABLE); - else - new = old | PCI_COMMAND_INTX_DISABLE; - - if (old != new) - pci_write_config_word(pdev, PCI_COMMAND, new); - } - pci_unblock_user_cfg_access(pdev); - - return pending; -} - -#endif /* < 3.3.0 */ - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) -#define HAVE_ALLOC_IRQ_VECTORS 1 -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) -#define HAVE_MSI_LIST_IN_GENERIC_DEVICE 1 -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) -#define HAVE_PCI_MSI_MASK_IRQ 1 -#endif diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c deleted file mode 100644 index 4cae4dd2..00000000 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ /dev/null @@ -1,643 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/*- - * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "compat.h" - -/** - * A structure describing the private information for a uio device. - */ -struct rte_uio_pci_dev { - struct uio_info info; - struct pci_dev *pdev; - enum rte_intr_mode mode; - struct mutex lock; - int refcnt; -}; - -static char *intr_mode; -static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; -/* sriov sysfs */ -static ssize_t -show_max_vfs(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return snprintf(buf, 10, "%u\n", dev_num_vf(dev)); -} - -static ssize_t -store_max_vfs(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - int err = 0; - unsigned long max_vfs; - struct pci_dev *pdev = to_pci_dev(dev); - - if (0 != kstrtoul(buf, 0, &max_vfs)) - return -EINVAL; - - if (0 == max_vfs) - pci_disable_sriov(pdev); - else if (0 == pci_num_vf(pdev)) - err = pci_enable_sriov(pdev, max_vfs); - else /* do nothing if change max_vfs number */ - err = -EINVAL; - - return err ? err : count; -} - -static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs); - -static struct attribute *dev_attrs[] = { - &dev_attr_max_vfs.attr, - NULL, -}; - -static const struct attribute_group dev_attr_grp = { - .attrs = dev_attrs, -}; - -#ifndef HAVE_PCI_MSI_MASK_IRQ -/* - * It masks the msix on/off of generating MSI-X messages. - */ -static void -igbuio_msix_mask_irq(struct msi_desc *desc, s32 state) -{ - u32 mask_bits = desc->masked; - unsigned int offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - - if (state != 0) - mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; - else - mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; - - if (mask_bits != desc->masked) { - writel(mask_bits, desc->mask_base + offset); - readl(desc->mask_base); - desc->masked = mask_bits; - } -} - -/* - * It masks the msi on/off of generating MSI messages. - */ -static void -igbuio_msi_mask_irq(struct pci_dev *pdev, struct msi_desc *desc, int32_t state) -{ - u32 mask_bits = desc->masked; - u32 offset = desc->irq - pdev->irq; - u32 mask = 1 << offset; - - if (!desc->msi_attrib.maskbit) - return; - - if (state != 0) - mask_bits &= ~mask; - else - mask_bits |= mask; - - if (mask_bits != desc->masked) { - pci_write_config_dword(pdev, desc->mask_pos, mask_bits); - desc->masked = mask_bits; - } -} - -static void -igbuio_mask_irq(struct pci_dev *pdev, enum rte_intr_mode mode, s32 irq_state) -{ - struct msi_desc *desc; - struct list_head *msi_list; - -#ifdef HAVE_MSI_LIST_IN_GENERIC_DEVICE - msi_list = &pdev->dev.msi_list; -#else - msi_list = &pdev->msi_list; -#endif - - if (mode == RTE_INTR_MODE_MSIX) { - list_for_each_entry(desc, msi_list, list) - igbuio_msix_mask_irq(desc, irq_state); - } else if (mode == RTE_INTR_MODE_MSI) { - list_for_each_entry(desc, msi_list, list) - igbuio_msi_mask_irq(pdev, desc, irq_state); - } -} -#endif - -/** - * This is the irqcontrol callback to be registered to uio_info. - * It can be used to disable/enable interrupt from user space processes. - * - * @param info - * pointer to uio_info. - * @param irq_state - * state value. 1 to enable interrupt, 0 to disable interrupt. - * - * @return - * - On success, 0. - * - On failure, a negative value. - */ -static int -igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) -{ - struct rte_uio_pci_dev *udev = info->priv; - struct pci_dev *pdev = udev->pdev; - -#ifdef HAVE_PCI_MSI_MASK_IRQ - struct irq_data *irq = irq_get_irq_data(udev->info.irq); -#endif - - pci_cfg_access_lock(pdev); - - if (udev->mode == RTE_INTR_MODE_MSIX || udev->mode == RTE_INTR_MODE_MSI) { -#ifdef HAVE_PCI_MSI_MASK_IRQ - if (irq_state == 1) - pci_msi_unmask_irq(irq); - else - pci_msi_mask_irq(irq); -#else - igbuio_mask_irq(pdev, udev->mode, irq_state); -#endif - } - - if (udev->mode == RTE_INTR_MODE_LEGACY) - pci_intx(pdev, !!irq_state); - - pci_cfg_access_unlock(pdev); - - return 0; -} - -/** - * This is interrupt handler which will check if the interrupt is for the right device. - * If yes, disable it here and will be enable later. - */ -static irqreturn_t -igbuio_pci_irqhandler(int irq, void *dev_id) -{ - struct rte_uio_pci_dev *udev = (struct rte_uio_pci_dev *)dev_id; - struct uio_info *info = &udev->info; - - /* Legacy mode need to mask in hardware */ - if (udev->mode == RTE_INTR_MODE_LEGACY && - !pci_check_and_mask_intx(udev->pdev)) - return IRQ_NONE; - - uio_event_notify(info); - - /* Message signal mode, no share IRQ and automasked */ - return IRQ_HANDLED; -} - -static int -igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) -{ - int err = 0; -#ifndef HAVE_ALLOC_IRQ_VECTORS - struct msix_entry msix_entry; -#endif - - switch (igbuio_intr_mode_preferred) { - case RTE_INTR_MODE_MSIX: - /* Only 1 msi-x vector needed */ -#ifndef HAVE_ALLOC_IRQ_VECTORS - msix_entry.entry = 0; - if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { - dev_dbg(&udev->pdev->dev, "using MSI-X"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = msix_entry.vector; - udev->mode = RTE_INTR_MODE_MSIX; - break; - } -#else - if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) { - dev_dbg(&udev->pdev->dev, "using MSI-X"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = pci_irq_vector(udev->pdev, 0); - udev->mode = RTE_INTR_MODE_MSIX; - break; - } -#endif - - /* fall back to MSI */ - case RTE_INTR_MODE_MSI: -#ifndef HAVE_ALLOC_IRQ_VECTORS - if (pci_enable_msi(udev->pdev) == 0) { - dev_dbg(&udev->pdev->dev, "using MSI"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = udev->pdev->irq; - udev->mode = RTE_INTR_MODE_MSI; - break; - } -#else - if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) { - dev_dbg(&udev->pdev->dev, "using MSI"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = pci_irq_vector(udev->pdev, 0); - udev->mode = RTE_INTR_MODE_MSI; - break; - } -#endif - /* fall back to INTX */ - case RTE_INTR_MODE_LEGACY: - if (pci_intx_mask_supported(udev->pdev)) { - dev_dbg(&udev->pdev->dev, "using INTX"); - udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD; - udev->info.irq = udev->pdev->irq; - udev->mode = RTE_INTR_MODE_LEGACY; - break; - } - dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n"); - /* fall back to no IRQ */ - case RTE_INTR_MODE_NONE: - udev->mode = RTE_INTR_MODE_NONE; - udev->info.irq = UIO_IRQ_NONE; - break; - - default: - dev_err(&udev->pdev->dev, "invalid IRQ mode %u", - igbuio_intr_mode_preferred); - udev->info.irq = UIO_IRQ_NONE; - err = -EINVAL; - } - - if (udev->info.irq != UIO_IRQ_NONE) - err = request_irq(udev->info.irq, igbuio_pci_irqhandler, - udev->info.irq_flags, udev->info.name, - udev); - dev_info(&udev->pdev->dev, "uio device registered with irq %ld\n", - udev->info.irq); - - return err; -} - -static void -igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) -{ - if (udev->info.irq) { - free_irq(udev->info.irq, udev); - udev->info.irq = 0; - } - -#ifndef HAVE_ALLOC_IRQ_VECTORS - if (udev->mode == RTE_INTR_MODE_MSIX) - pci_disable_msix(udev->pdev); - if (udev->mode == RTE_INTR_MODE_MSI) - pci_disable_msi(udev->pdev); -#else - if (udev->mode == RTE_INTR_MODE_MSIX || - udev->mode == RTE_INTR_MODE_MSI) - pci_free_irq_vectors(udev->pdev); -#endif -} - - -/** - * This gets called while opening uio device file. - */ -static int -igbuio_pci_open(struct uio_info *info, struct inode *inode) -{ - struct rte_uio_pci_dev *udev = info->priv; - struct pci_dev *dev = udev->pdev; - int err; - - mutex_lock(&udev->lock); - if (++udev->refcnt > 1) { - mutex_unlock(&udev->lock); - return 0; - } - - /* set bus master, which was cleared by the reset function */ - pci_set_master(dev); - - /* enable interrupts */ - err = igbuio_pci_enable_interrupts(udev); - mutex_unlock(&udev->lock); - if (err) { - dev_err(&dev->dev, "Enable interrupt fails\n"); - return err; - } - return 0; -} - -static int -igbuio_pci_release(struct uio_info *info, struct inode *inode) -{ - struct rte_uio_pci_dev *udev = info->priv; - struct pci_dev *dev = udev->pdev; - - mutex_lock(&udev->lock); - if (--udev->refcnt > 0) { - mutex_unlock(&udev->lock); - return 0; - } - - /* disable interrupts */ - igbuio_pci_disable_interrupts(udev); - - /* stop the device from further DMA */ - pci_clear_master(dev); - - mutex_unlock(&udev->lock); - return 0; -} - -/* Remap pci resources described by bar #pci_bar in uio resource n. */ -static int -igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info, - int n, int pci_bar, const char *name) -{ - unsigned long addr, len; - void *internal_addr; - - if (n >= ARRAY_SIZE(info->mem)) - return -EINVAL; - - addr = pci_resource_start(dev, pci_bar); - len = pci_resource_len(dev, pci_bar); - if (addr == 0 || len == 0) - return -1; - internal_addr = ioremap(addr, len); - if (internal_addr == NULL) - return -1; - info->mem[n].name = name; - info->mem[n].addr = addr; - info->mem[n].internal_addr = internal_addr; - info->mem[n].size = len; - info->mem[n].memtype = UIO_MEM_PHYS; - return 0; -} - -/* Get pci port io resources described by bar #pci_bar in uio resource n. */ -static int -igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info, - int n, int pci_bar, const char *name) -{ - unsigned long addr, len; - - if (n >= ARRAY_SIZE(info->port)) - return -EINVAL; - - addr = pci_resource_start(dev, pci_bar); - len = pci_resource_len(dev, pci_bar); - if (addr == 0 || len == 0) - return -EINVAL; - - info->port[n].name = name; - info->port[n].start = addr; - info->port[n].size = len; - info->port[n].porttype = UIO_PORT_X86; - - return 0; -} - -/* Unmap previously ioremap'd resources */ -static void -igbuio_pci_release_iomem(struct uio_info *info) -{ - int i; - - for (i = 0; i < MAX_UIO_MAPS; i++) { - if (info->mem[i].internal_addr) - iounmap(info->mem[i].internal_addr); - } -} - -static int -igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) -{ - int i, iom, iop, ret; - unsigned long flags; - static const char *bar_names[PCI_STD_RESOURCE_END + 1] = { - "BAR0", - "BAR1", - "BAR2", - "BAR3", - "BAR4", - "BAR5", - }; - - iom = 0; - iop = 0; - - for (i = 0; i < ARRAY_SIZE(bar_names); i++) { - if (pci_resource_len(dev, i) != 0 && - pci_resource_start(dev, i) != 0) { - flags = pci_resource_flags(dev, i); - if (flags & IORESOURCE_MEM) { - ret = igbuio_pci_setup_iomem(dev, info, iom, - i, bar_names[i]); - if (ret != 0) - return ret; - iom++; - } else if (flags & IORESOURCE_IO) { - ret = igbuio_pci_setup_ioport(dev, info, iop, - i, bar_names[i]); - if (ret != 0) - return ret; - iop++; - } - } - } - - return (iom != 0 || iop != 0) ? ret : -ENOENT; -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) -static int __devinit -#else -static int -#endif -igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) -{ - struct rte_uio_pci_dev *udev; - dma_addr_t map_dma_addr; - void *map_addr; - int err; - - udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); - if (!udev) - return -ENOMEM; - - mutex_init(&udev->lock); - /* - * enable device: ask low-level code to enable I/O and - * memory - */ - err = pci_enable_device(dev); - if (err != 0) { - dev_err(&dev->dev, "Cannot enable PCI device\n"); - goto fail_free; - } - - /* enable bus mastering on the device */ - pci_set_master(dev); - - /* remap IO memory */ - err = igbuio_setup_bars(dev, &udev->info); - if (err != 0) - goto fail_release_iomem; - - /* set 64-bit DMA mask */ - err = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); - if (err != 0) { - dev_err(&dev->dev, "Cannot set DMA mask\n"); - goto fail_release_iomem; - } - - err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64)); - if (err != 0) { - dev_err(&dev->dev, "Cannot set consistent DMA mask\n"); - goto fail_release_iomem; - } - - /* fill uio infos */ - udev->info.name = "igb_uio"; - udev->info.version = "0.1"; - udev->info.irqcontrol = igbuio_pci_irqcontrol; - udev->info.open = igbuio_pci_open; - udev->info.release = igbuio_pci_release; - udev->info.priv = udev; - udev->pdev = dev; - - err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); - if (err != 0) - goto fail_release_iomem; - - /* register uio driver */ - err = uio_register_device(&dev->dev, &udev->info); - if (err != 0) - goto fail_remove_group; - - pci_set_drvdata(dev, udev); - - /* - * Doing a harmless dma mapping for attaching the device to - * the iommu identity mapping if kernel boots with iommu=pt. - * Note this is not a problem if no IOMMU at all. - */ - map_addr = dma_alloc_coherent(&dev->dev, 1024, &map_dma_addr, - GFP_KERNEL); - if (map_addr) - memset(map_addr, 0, 1024); - - if (!map_addr) - dev_info(&dev->dev, "dma mapping failed\n"); - else { - dev_info(&dev->dev, "mapping 1K dma=%#llx host=%p\n", - (unsigned long long)map_dma_addr, map_addr); - - dma_free_coherent(&dev->dev, 1024, map_addr, map_dma_addr); - dev_info(&dev->dev, "unmapping 1K dma=%#llx host=%p\n", - (unsigned long long)map_dma_addr, map_addr); - } - - return 0; - -fail_remove_group: - sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); -fail_release_iomem: - igbuio_pci_release_iomem(&udev->info); - pci_disable_device(dev); -fail_free: - kfree(udev); - - return err; -} - -static void -igbuio_pci_remove(struct pci_dev *dev) -{ - struct rte_uio_pci_dev *udev = pci_get_drvdata(dev); - - mutex_destroy(&udev->lock); - sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); - uio_unregister_device(&udev->info); - igbuio_pci_release_iomem(&udev->info); - pci_disable_device(dev); - pci_set_drvdata(dev, NULL); - kfree(udev); -} - -static int -igbuio_config_intr_mode(char *intr_str) -{ - if (!intr_str) { - pr_info("Use MSIX interrupt by default\n"); - return 0; - } - - if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) { - igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; - pr_info("Use MSIX interrupt\n"); - } else if (!strcmp(intr_str, RTE_INTR_MODE_MSI_NAME)) { - igbuio_intr_mode_preferred = RTE_INTR_MODE_MSI; - pr_info("Use MSI interrupt\n"); - } else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) { - igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY; - pr_info("Use legacy interrupt\n"); - } else { - pr_info("Error: bad parameter - %s\n", intr_str); - return -EINVAL; - } - - return 0; -} - -static struct pci_driver igbuio_pci_driver = { - .name = "igb_uio", - .id_table = NULL, - .probe = igbuio_pci_probe, - .remove = igbuio_pci_remove, -}; - -static int __init -igbuio_pci_init_module(void) -{ - int ret; - - ret = igbuio_config_intr_mode(intr_mode); - if (ret < 0) - return ret; - - return pci_register_driver(&igbuio_pci_driver); -} - -static void __exit -igbuio_pci_exit_module(void) -{ - pci_unregister_driver(&igbuio_pci_driver); -} - -module_init(igbuio_pci_init_module); -module_exit(igbuio_pci_exit_module); - -module_param(intr_mode, charp, S_IRUGO); -MODULE_PARM_DESC(intr_mode, -"igb_uio interrupt mode (default=msix):\n" -" " RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n" -" " RTE_INTR_MODE_MSI_NAME " Use MSI interrupt\n" -" " RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n" -"\n"); - -MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Intel Corporation"); diff --git a/lib/librte_eal/linuxapp/igb_uio/meson.build b/lib/librte_eal/linuxapp/igb_uio/meson.build deleted file mode 100644 index 257ef631..00000000 --- a/lib/librte_eal/linuxapp/igb_uio/meson.build +++ /dev/null @@ -1,24 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2017 Intel Corporation - -kernel_dir = get_option('kernel_dir') -if kernel_dir == '' - kernel_version = run_command('uname', '-r').stdout().strip() - kernel_dir = '/lib/modules/' + kernel_version + '/build' -endif - -mkfile = custom_target('igb_uio_makefile', - output: 'Makefile', - command: ['touch', '@OUTPUT@']) - -custom_target('igb_uio', - input: ['igb_uio.c', 'Kbuild'], - output: 'igb_uio.ko', - command: ['make', '-C', kernel_dir, - 'M=' + meson.current_build_dir(), - 'src=' + meson.current_source_dir(), - 'EXTRA_CFLAGS=-I' + meson.current_source_dir() + - '/../../common/include', - 'modules'], - depends: mkfile, - build_by_default: get_option('enable_kmods')) diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile deleted file mode 100644 index 282be7b6..00000000 --- a/lib/librte_eal/linuxapp/kni/Makefile +++ /dev/null @@ -1,58 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2010-2014 Intel Corporation - -include $(RTE_SDK)/mk/rte.vars.mk - -# -# module name and path -# -MODULE = rte_kni - -# -# CFLAGS -# -MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50 -MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/ethtool/igb -MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h -MODULE_CFLAGS += -Wall -Werror - --include /etc/lsb-release - -ifeq ($(DISTRIB_ID),Ubuntu) -MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(subst .,,$(DISTRIB_RELEASE)) -UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \ - | cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1) -MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))" -endif - -# -# all source are stored in SRCS-y -# -SRCS-y := kni_misc.c -SRCS-y += kni_net.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += kni_ethtool.c - -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_main.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_api.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_common.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_ethtool.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82599.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82598.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_x540.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_phy.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/kcompat.c - -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_82575.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_i210.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_api.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mac.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_manage.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mbx.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_nvm.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_phy.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_ethtool.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_main.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_param.c -SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_vmdq.c - -include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h deleted file mode 100644 index 3f8c0bc8..00000000 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Minimal wrappers to allow compiling kni on older kernels. - */ - -#include - -#ifndef RHEL_RELEASE_VERSION -#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) -#endif - -/* SuSE version macro is the same as Linux kernel version */ -#ifndef SLE_VERSION -#define SLE_VERSION(a, b, c) KERNEL_VERSION(a, b, c) -#endif -#ifdef CONFIG_SUSE_KERNEL -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) -/* SLES12SP3 is at least 4.4.57+ based */ -#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 28)) -/* SLES12 is at least 3.12.28+ based */ -#define SLE_VERSION_CODE SLE_VERSION(12, 0, 0) -#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 61)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0))) -/* SLES11 SP3 is at least 3.0.61+ based */ -#define SLE_VERSION_CODE SLE_VERSION(11, 3, 0) -#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)) -/* SLES11 SP1 is 2.6.32 based */ -#define SLE_VERSION_CODE SLE_VERSION(11, 1, 0) -#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 27)) -/* SLES11 GA is 2.6.27 based */ -#define SLE_VERSION_CODE SLE_VERSION(11, 0, 0) -#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ -#endif /* CONFIG_SUSE_KERNEL */ -#ifndef SLE_VERSION_CODE -#define SLE_VERSION_CODE 0 -#endif /* SLE_VERSION_CODE */ - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) - -#define kstrtoul strict_strtoul - -#endif /* < 2.6.39 */ - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33) -#define HAVE_SIMPLIFIED_PERNET_OPERATIONS -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -#define sk_sleep(s) ((s)->sk_sleep) -#else -#define HAVE_SOCKET_WQ -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) -#define HAVE_STATIC_SOCK_MAP_FD -#else -#define kni_sock_map_fd(s) sock_map_fd(s, 0) -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) -#define HAVE_CHANGE_CARRIER_CB -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) -#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN) -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) -#define HAVE_IOV_ITER_MSGHDR -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) -#define HAVE_KIOCB_MSG_PARAM -#define HAVE_REBUILD_HEADER -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) -#define HAVE_SK_ALLOC_KERN_PARAM -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || \ - (defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) || \ - (SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(12, 3, 0)) -#define HAVE_TRANS_START_HELPER -#endif - -/* - * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev() - * For old kernels just backported the commit that enables the macro - * (685343fc3ba6) but still uses old API, it is required to undefine macro to - * select correct version of API, this is safe since KNI doesn't use the value. - * This fix is specific to RedHat/CentOS kernels. - */ -#if (defined(RHEL_RELEASE_CODE) && \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) -#undef NET_NAME_UNKNOWN -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) -#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER -#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/README b/lib/librte_eal/linuxapp/kni/ethtool/README deleted file mode 100644 index af36738a..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/README +++ /dev/null @@ -1,71 +0,0 @@ -.. SPDX-License-Identifier: BSD-3-Clause - Copyright(c) 2010-2014 Intel Corporation. - -Description - -In order to support ethtool in Kernel NIC Interface, the standard Linux kernel -drivers of ixgbe/igb are needed to be reused here. ixgbe-3.9.17 is the version -modified from in kernel NIC interface kernel module to support ixgbe NIC, and -igb-3.4.8 is the version modified from in kernel NIC interface kernel module to -support igb NIC. - -The source code package of ixgbe can be downloaded from sourceforge.net as below. -http://sourceforge.net/projects/e1000/files/ixgbe%20stable/ -Below source files are copied or modified from ixgbe. - -ixgbe_82598.h -ixgbe_82599.c -ixgbe_82599.h -ixgbe_api.c -ixgbe_api.h -ixgbe_common.c -ixgbe_common.h -ixgbe_dcb.h -ixgbe_ethtool.c -ixgbe_fcoe.h -ixgbe.h -ixgbe_main.c -ixgbe_mbx.h -ixgbe_osdep.h -ixgbe_phy.c -ixgbe_phy.h -ixgbe_sriov.h -ixgbe_type.h -kcompat.c -kcompat.h - -The source code package of igb can be downloaded from sourceforge.net as below. -http://sourceforge.net/projects/e1000/files/igb%20stable/ -Below source files are copied or modified from igb. - -e1000_82575.c -e1000_82575.h -e1000_api.c -e1000_api.h -e1000_defines.h -e1000_hw.h -e1000_mac.c -e1000_mac.h -e1000_manage.c -e1000_manage.h -e1000_mbx.c -e1000_mbx.h -e1000_nvm.c -e1000_nvm.h -e1000_osdep.h -e1000_phy.c -e1000_phy.h -e1000_regs.h -igb_ethtool.c -igb.h -igb_main.c -igb_param.c -igb_procfs.c -igb_regtest.h -igb_sysfs.c -igb_vmdq.c -igb_vmdq.h -kcompat.c -kcompat_ethtool.c -kcompat.h - diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c deleted file mode 100644 index 98346709..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c +++ /dev/null @@ -1,3650 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/* - * 82575EB Gigabit Network Connection - * 82575EB Gigabit Backplane Connection - * 82575GB Gigabit Network Connection - * 82576 Gigabit Network Connection - * 82576 Quad Port Gigabit Mezzanine Adapter - * 82580 Gigabit Network Connection - * I350 Gigabit Network Connection - */ - -#include "e1000_api.h" -#include "e1000_i210.h" - -static s32 e1000_init_phy_params_82575(struct e1000_hw *hw); -static s32 e1000_init_mac_params_82575(struct e1000_hw *hw); -static s32 e1000_acquire_phy_82575(struct e1000_hw *hw); -static void e1000_release_phy_82575(struct e1000_hw *hw); -static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw); -static void e1000_release_nvm_82575(struct e1000_hw *hw); -static s32 e1000_check_for_link_82575(struct e1000_hw *hw); -static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw); -static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw); -static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, - u16 *duplex); -static s32 e1000_init_hw_82575(struct e1000_hw *hw); -static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw); -static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, - u16 *data); -static s32 e1000_reset_hw_82575(struct e1000_hw *hw); -static s32 e1000_reset_hw_82580(struct e1000_hw *hw); -static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, - u32 offset, u16 *data); -static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, - u32 offset, u16 data); -static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, - bool active); -static s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, - bool active); -static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, - bool active); -static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw); -static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw); -static s32 e1000_get_media_type_82575(struct e1000_hw *hw); -static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw); -static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data); -static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, - u32 offset, u16 data); -static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw); -static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask); -static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, - u16 *speed, u16 *duplex); -static s32 e1000_get_phy_id_82575(struct e1000_hw *hw); -static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask); -static bool e1000_sgmii_active_82575(struct e1000_hw *hw); -static s32 e1000_reset_init_script_82575(struct e1000_hw *hw); -static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw); -static void e1000_config_collision_dist_82575(struct e1000_hw *hw); -static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw); -static void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw); -static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw); -static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw); -static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw); -static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw); -static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw); -static s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, - u16 offset); -static s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, - u16 offset); -static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw); -static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw); -static void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); -static void e1000_clear_vfta_i350(struct e1000_hw *hw); - -static void e1000_i2c_start(struct e1000_hw *hw); -static void e1000_i2c_stop(struct e1000_hw *hw); -static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data); -static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data); -static s32 e1000_get_i2c_ack(struct e1000_hw *hw); -static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data); -static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data); -static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); -static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); -static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data); -static bool e1000_get_i2c_data(u32 *i2cctl); - -static const u16 e1000_82580_rxpbs_table[] = { - 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; -#define E1000_82580_RXPBS_TABLE_SIZE \ - (sizeof(e1000_82580_rxpbs_table)/sizeof(u16)) - - -/** - * e1000_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO - * @hw: pointer to the HW structure - * - * Called to determine if the I2C pins are being used for I2C or as an - * external MDIO interface since the two options are mutually exclusive. - **/ -static bool e1000_sgmii_uses_mdio_82575(struct e1000_hw *hw) -{ - u32 reg = 0; - bool ext_mdio = false; - - DEBUGFUNC("e1000_sgmii_uses_mdio_82575"); - - switch (hw->mac.type) { - case e1000_82575: - case e1000_82576: - reg = E1000_READ_REG(hw, E1000_MDIC); - ext_mdio = !!(reg & E1000_MDIC_DEST); - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - reg = E1000_READ_REG(hw, E1000_MDICNFG); - ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO); - break; - default: - break; - } - return ext_mdio; -} - -/** - * e1000_init_phy_params_82575 - Init PHY func ptrs. - * @hw: pointer to the HW structure - **/ -static s32 e1000_init_phy_params_82575(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = E1000_SUCCESS; - u32 ctrl_ext; - - DEBUGFUNC("e1000_init_phy_params_82575"); - - phy->ops.read_i2c_byte = e1000_read_i2c_byte_generic; - phy->ops.write_i2c_byte = e1000_write_i2c_byte_generic; - - if (hw->phy.media_type != e1000_media_type_copper) { - phy->type = e1000_phy_none; - goto out; - } - - phy->ops.power_up = e1000_power_up_phy_copper; - phy->ops.power_down = e1000_power_down_phy_copper_82575; - - phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; - phy->reset_delay_us = 100; - - phy->ops.acquire = e1000_acquire_phy_82575; - phy->ops.check_reset_block = e1000_check_reset_block_generic; - phy->ops.commit = e1000_phy_sw_reset_generic; - phy->ops.get_cfg_done = e1000_get_cfg_done_82575; - phy->ops.release = e1000_release_phy_82575; - - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - - if (e1000_sgmii_active_82575(hw)) { - phy->ops.reset = e1000_phy_hw_reset_sgmii_82575; - ctrl_ext |= E1000_CTRL_I2C_ENA; - } else { - phy->ops.reset = e1000_phy_hw_reset_generic; - ctrl_ext &= ~E1000_CTRL_I2C_ENA; - } - - E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); - e1000_reset_mdicnfg_82580(hw); - - if (e1000_sgmii_active_82575(hw) && !e1000_sgmii_uses_mdio_82575(hw)) { - phy->ops.read_reg = e1000_read_phy_reg_sgmii_82575; - phy->ops.write_reg = e1000_write_phy_reg_sgmii_82575; - } else { - switch (hw->mac.type) { - case e1000_82580: - case e1000_i350: - case e1000_i354: - phy->ops.read_reg = e1000_read_phy_reg_82580; - phy->ops.write_reg = e1000_write_phy_reg_82580; - break; - case e1000_i210: - case e1000_i211: - phy->ops.read_reg = e1000_read_phy_reg_gs40g; - phy->ops.write_reg = e1000_write_phy_reg_gs40g; - break; - default: - phy->ops.read_reg = e1000_read_phy_reg_igp; - phy->ops.write_reg = e1000_write_phy_reg_igp; - } - } - - /* Set phy->phy_addr and phy->id. */ - ret_val = e1000_get_phy_id_82575(hw); - - /* Verify phy id and set remaining function pointers */ - switch (phy->id) { - case M88E1543_E_PHY_ID: - case I347AT4_E_PHY_ID: - case M88E1112_E_PHY_ID: - case M88E1340M_E_PHY_ID: - case M88E1111_I_PHY_ID: - phy->type = e1000_phy_m88; - phy->ops.check_polarity = e1000_check_polarity_m88; - phy->ops.get_info = e1000_get_phy_info_m88; - if (phy->id == I347AT4_E_PHY_ID || - phy->id == M88E1112_E_PHY_ID || - phy->id == M88E1340M_E_PHY_ID) - phy->ops.get_cable_length = - e1000_get_cable_length_m88_gen2; - else if (phy->id == M88E1543_E_PHY_ID) - phy->ops.get_cable_length = - e1000_get_cable_length_m88_gen2; - else - phy->ops.get_cable_length = e1000_get_cable_length_m88; - phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; - /* Check if this PHY is configured for media swap. */ - if (phy->id == M88E1112_E_PHY_ID) { - u16 data; - - ret_val = phy->ops.write_reg(hw, - E1000_M88E1112_PAGE_ADDR, - 2); - if (ret_val) - goto out; - - ret_val = phy->ops.read_reg(hw, - E1000_M88E1112_MAC_CTRL_1, - &data); - if (ret_val) - goto out; - - data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >> - E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT; - if (data == E1000_M88E1112_AUTO_COPPER_SGMII || - data == E1000_M88E1112_AUTO_COPPER_BASEX) - hw->mac.ops.check_for_link = - e1000_check_for_link_media_swap; - } - break; - case IGP03E1000_E_PHY_ID: - case IGP04E1000_E_PHY_ID: - phy->type = e1000_phy_igp_3; - phy->ops.check_polarity = e1000_check_polarity_igp; - phy->ops.get_info = e1000_get_phy_info_igp; - phy->ops.get_cable_length = e1000_get_cable_length_igp_2; - phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp; - phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82575; - phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic; - break; - case I82580_I_PHY_ID: - case I350_I_PHY_ID: - phy->type = e1000_phy_82580; - phy->ops.check_polarity = e1000_check_polarity_82577; - phy->ops.force_speed_duplex = - e1000_phy_force_speed_duplex_82577; - phy->ops.get_cable_length = e1000_get_cable_length_82577; - phy->ops.get_info = e1000_get_phy_info_82577; - phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; - phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; - break; - case I210_I_PHY_ID: - phy->type = e1000_phy_i210; - phy->ops.check_polarity = e1000_check_polarity_m88; - phy->ops.get_info = e1000_get_phy_info_m88; - phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2; - phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; - phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; - phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; - break; - default: - ret_val = -E1000_ERR_PHY; - goto out; - } - -out: - return ret_val; -} - -/** - * e1000_init_nvm_params_82575 - Init NVM func ptrs. - * @hw: pointer to the HW structure - **/ -s32 e1000_init_nvm_params_82575(struct e1000_hw *hw) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - u32 eecd = E1000_READ_REG(hw, E1000_EECD); - u16 size; - - DEBUGFUNC("e1000_init_nvm_params_82575"); - - size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> - E1000_EECD_SIZE_EX_SHIFT); - /* - * Added to a constant, "size" becomes the left-shift value - * for setting word_size. - */ - size += NVM_WORD_SIZE_BASE_SHIFT; - - /* Just in case size is out of range, cap it to the largest - * EEPROM size supported - */ - if (size > 15) - size = 15; - - nvm->word_size = 1 << size; - if (hw->mac.type < e1000_i210) { - nvm->opcode_bits = 8; - nvm->delay_usec = 1; - - switch (nvm->override) { - case e1000_nvm_override_spi_large: - nvm->page_size = 32; - nvm->address_bits = 16; - break; - case e1000_nvm_override_spi_small: - nvm->page_size = 8; - nvm->address_bits = 8; - break; - default: - nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; - nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? - 16 : 8; - break; - } - if (nvm->word_size == (1 << 15)) - nvm->page_size = 128; - - nvm->type = e1000_nvm_eeprom_spi; - } else { - nvm->type = e1000_nvm_flash_hw; - } - - /* Function Pointers */ - nvm->ops.acquire = e1000_acquire_nvm_82575; - nvm->ops.release = e1000_release_nvm_82575; - if (nvm->word_size < (1 << 15)) - nvm->ops.read = e1000_read_nvm_eerd; - else - nvm->ops.read = e1000_read_nvm_spi; - - nvm->ops.write = e1000_write_nvm_spi; - nvm->ops.validate = e1000_validate_nvm_checksum_generic; - nvm->ops.update = e1000_update_nvm_checksum_generic; - nvm->ops.valid_led_default = e1000_valid_led_default_82575; - - /* override generic family function pointers for specific descendants */ - switch (hw->mac.type) { - case e1000_82580: - nvm->ops.validate = e1000_validate_nvm_checksum_82580; - nvm->ops.update = e1000_update_nvm_checksum_82580; - break; - case e1000_i350: - //case e1000_i354: - nvm->ops.validate = e1000_validate_nvm_checksum_i350; - nvm->ops.update = e1000_update_nvm_checksum_i350; - break; - default: - break; - } - - return E1000_SUCCESS; -} - -/** - * e1000_init_mac_params_82575 - Init MAC func ptrs. - * @hw: pointer to the HW structure - **/ -static s32 e1000_init_mac_params_82575(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; - - DEBUGFUNC("e1000_init_mac_params_82575"); - - /* Derives media type */ - e1000_get_media_type_82575(hw); - /* Set mta register count */ - mac->mta_reg_count = 128; - /* Set uta register count */ - mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128; - /* Set rar entry count */ - mac->rar_entry_count = E1000_RAR_ENTRIES_82575; - if (mac->type == e1000_82576) - mac->rar_entry_count = E1000_RAR_ENTRIES_82576; - if (mac->type == e1000_82580) - mac->rar_entry_count = E1000_RAR_ENTRIES_82580; - if (mac->type == e1000_i350 || mac->type == e1000_i354) - mac->rar_entry_count = E1000_RAR_ENTRIES_I350; - - /* Enable EEE default settings for EEE supported devices */ - if (mac->type >= e1000_i350) - dev_spec->eee_disable = false; - - /* Allow a single clear of the SW semaphore on I210 and newer */ - if (mac->type >= e1000_i210) - dev_spec->clear_semaphore_once = true; - - /* Set if part includes ASF firmware */ - mac->asf_firmware_present = true; - /* FWSM register */ - mac->has_fwsm = true; - /* ARC supported; valid only if manageability features are enabled. */ - mac->arc_subsystem_valid = - !!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK); - - /* Function pointers */ - - /* bus type/speed/width */ - mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic; - /* reset */ - if (mac->type >= e1000_82580) - mac->ops.reset_hw = e1000_reset_hw_82580; - else - mac->ops.reset_hw = e1000_reset_hw_82575; - /* hw initialization */ - mac->ops.init_hw = e1000_init_hw_82575; - /* link setup */ - mac->ops.setup_link = e1000_setup_link_generic; - /* physical interface link setup */ - mac->ops.setup_physical_interface = - (hw->phy.media_type == e1000_media_type_copper) - ? e1000_setup_copper_link_82575 : e1000_setup_serdes_link_82575; - /* physical interface shutdown */ - mac->ops.shutdown_serdes = e1000_shutdown_serdes_link_82575; - /* physical interface power up */ - mac->ops.power_up_serdes = e1000_power_up_serdes_link_82575; - /* check for link */ - mac->ops.check_for_link = e1000_check_for_link_82575; - /* read mac address */ - mac->ops.read_mac_addr = e1000_read_mac_addr_82575; - /* configure collision distance */ - mac->ops.config_collision_dist = e1000_config_collision_dist_82575; - /* multicast address update */ - mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; - if (hw->mac.type == e1000_i350 || mac->type == e1000_i354) { - /* writing VFTA */ - mac->ops.write_vfta = e1000_write_vfta_i350; - /* clearing VFTA */ - mac->ops.clear_vfta = e1000_clear_vfta_i350; - } else { - /* writing VFTA */ - mac->ops.write_vfta = e1000_write_vfta_generic; - /* clearing VFTA */ - mac->ops.clear_vfta = e1000_clear_vfta_generic; - } - if (hw->mac.type >= e1000_82580) - mac->ops.validate_mdi_setting = - e1000_validate_mdi_setting_crossover_generic; - /* ID LED init */ - mac->ops.id_led_init = e1000_id_led_init_generic; - /* blink LED */ - mac->ops.blink_led = e1000_blink_led_generic; - /* setup LED */ - mac->ops.setup_led = e1000_setup_led_generic; - /* cleanup LED */ - mac->ops.cleanup_led = e1000_cleanup_led_generic; - /* turn on/off LED */ - mac->ops.led_on = e1000_led_on_generic; - mac->ops.led_off = e1000_led_off_generic; - /* clear hardware counters */ - mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82575; - /* link info */ - mac->ops.get_link_up_info = e1000_get_link_up_info_82575; - /* get thermal sensor data */ - mac->ops.get_thermal_sensor_data = - e1000_get_thermal_sensor_data_generic; - mac->ops.init_thermal_sensor_thresh = - e1000_init_thermal_sensor_thresh_generic; - /* acquire SW_FW sync */ - mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575; - mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575; - if (mac->type >= e1000_i210) { - mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210; - mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210; - } - - /* set lan id for port to determine which phy lock to use */ - hw->mac.ops.set_lan_id(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_init_function_pointers_82575 - Init func ptrs. - * @hw: pointer to the HW structure - * - * Called to initialize all function pointers and parameters. - **/ -void e1000_init_function_pointers_82575(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_init_function_pointers_82575"); - - hw->mac.ops.init_params = e1000_init_mac_params_82575; - hw->nvm.ops.init_params = e1000_init_nvm_params_82575; - hw->phy.ops.init_params = e1000_init_phy_params_82575; - hw->mbx.ops.init_params = e1000_init_mbx_params_pf; -} - -/** - * e1000_acquire_phy_82575 - Acquire rights to access PHY - * @hw: pointer to the HW structure - * - * Acquire access rights to the correct PHY. - **/ -static s32 e1000_acquire_phy_82575(struct e1000_hw *hw) -{ - u16 mask = E1000_SWFW_PHY0_SM; - - DEBUGFUNC("e1000_acquire_phy_82575"); - - if (hw->bus.func == E1000_FUNC_1) - mask = E1000_SWFW_PHY1_SM; - else if (hw->bus.func == E1000_FUNC_2) - mask = E1000_SWFW_PHY2_SM; - else if (hw->bus.func == E1000_FUNC_3) - mask = E1000_SWFW_PHY3_SM; - - return hw->mac.ops.acquire_swfw_sync(hw, mask); -} - -/** - * e1000_release_phy_82575 - Release rights to access PHY - * @hw: pointer to the HW structure - * - * A wrapper to release access rights to the correct PHY. - **/ -static void e1000_release_phy_82575(struct e1000_hw *hw) -{ - u16 mask = E1000_SWFW_PHY0_SM; - - DEBUGFUNC("e1000_release_phy_82575"); - - if (hw->bus.func == E1000_FUNC_1) - mask = E1000_SWFW_PHY1_SM; - else if (hw->bus.func == E1000_FUNC_2) - mask = E1000_SWFW_PHY2_SM; - else if (hw->bus.func == E1000_FUNC_3) - mask = E1000_SWFW_PHY3_SM; - - hw->mac.ops.release_swfw_sync(hw, mask); -} - -/** - * e1000_read_phy_reg_sgmii_82575 - Read PHY register using sgmii - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * - * Reads the PHY register at offset using the serial gigabit media independent - * interface and stores the retrieved information in data. - **/ -static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, - u16 *data) -{ - s32 ret_val = -E1000_ERR_PARAM; - - DEBUGFUNC("e1000_read_phy_reg_sgmii_82575"); - - if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { - DEBUGOUT1("PHY Address %u is out of range\n", offset); - goto out; - } - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - goto out; - - ret_val = e1000_read_phy_reg_i2c(hw, offset, data); - - hw->phy.ops.release(hw); - -out: - return ret_val; -} - -/** - * e1000_write_phy_reg_sgmii_82575 - Write PHY register using sgmii - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * - * Writes the data to PHY register at the offset using the serial gigabit - * media independent interface. - **/ -static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, - u16 data) -{ - s32 ret_val = -E1000_ERR_PARAM; - - DEBUGFUNC("e1000_write_phy_reg_sgmii_82575"); - - if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { - DEBUGOUT1("PHY Address %d is out of range\n", offset); - goto out; - } - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - goto out; - - ret_val = e1000_write_phy_reg_i2c(hw, offset, data); - - hw->phy.ops.release(hw); - -out: - return ret_val; -} - -/** - * e1000_get_phy_id_82575 - Retrieve PHY addr and id - * @hw: pointer to the HW structure - * - * Retrieves the PHY address and ID for both PHY's which do and do not use - * sgmi interface. - **/ -static s32 e1000_get_phy_id_82575(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = E1000_SUCCESS; - u16 phy_id; - u32 ctrl_ext; - u32 mdic; - - DEBUGFUNC("e1000_get_phy_id_82575"); - - /* i354 devices can have a PHY that needs an extra read for id */ - if (hw->mac.type == e1000_i354) - e1000_get_phy_id(hw); - - - /* - * For SGMII PHYs, we try the list of possible addresses until - * we find one that works. For non-SGMII PHYs - * (e.g. integrated copper PHYs), an address of 1 should - * work. The result of this function should mean phy->phy_addr - * and phy->id are set correctly. - */ - if (!e1000_sgmii_active_82575(hw)) { - phy->addr = 1; - ret_val = e1000_get_phy_id(hw); - goto out; - } - - if (e1000_sgmii_uses_mdio_82575(hw)) { - switch (hw->mac.type) { - case e1000_82575: - case e1000_82576: - mdic = E1000_READ_REG(hw, E1000_MDIC); - mdic &= E1000_MDIC_PHY_MASK; - phy->addr = mdic >> E1000_MDIC_PHY_SHIFT; - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - mdic = E1000_READ_REG(hw, E1000_MDICNFG); - mdic &= E1000_MDICNFG_PHY_MASK; - phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT; - break; - default: - ret_val = -E1000_ERR_PHY; - goto out; - break; - } - ret_val = e1000_get_phy_id(hw); - goto out; - } - - /* Power on sgmii phy if it is disabled */ - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - E1000_WRITE_REG(hw, E1000_CTRL_EXT, - ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA); - E1000_WRITE_FLUSH(hw); - msec_delay(300); - - /* - * The address field in the I2CCMD register is 3 bits and 0 is invalid. - * Therefore, we need to test 1-7 - */ - for (phy->addr = 1; phy->addr < 8; phy->addr++) { - ret_val = e1000_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id); - if (ret_val == E1000_SUCCESS) { - DEBUGOUT2("Vendor ID 0x%08X read at address %u\n", - phy_id, phy->addr); - /* - * At the time of this writing, The M88 part is - * the only supported SGMII PHY product. - */ - if (phy_id == M88_VENDOR) - break; - } else { - DEBUGOUT1("PHY address %u was unreadable\n", - phy->addr); - } - } - - /* A valid PHY type couldn't be found. */ - if (phy->addr == 8) { - phy->addr = 0; - ret_val = -E1000_ERR_PHY; - } else { - ret_val = e1000_get_phy_id(hw); - } - - /* restore previous sfp cage power state */ - E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); - -out: - return ret_val; -} - -/** - * e1000_phy_hw_reset_sgmii_82575 - Performs a PHY reset - * @hw: pointer to the HW structure - * - * Resets the PHY using the serial gigabit media independent interface. - **/ -static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("e1000_phy_hw_reset_sgmii_82575"); - - /* - * This isn't a true "hard" reset, but is the only reset - * available to us at this time. - */ - - DEBUGOUT("Soft resetting SGMII attached PHY...\n"); - - if (!(hw->phy.ops.write_reg)) - goto out; - - /* - * SFP documentation requires the following to configure the SPF module - * to work on SGMII. No further documentation is given. - */ - ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084); - if (ret_val) - goto out; - - ret_val = hw->phy.ops.commit(hw); - -out: - return ret_val; -} - -/** - * e1000_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state - * @hw: pointer to the HW structure - * @active: true to enable LPLU, false to disable - * - * Sets the LPLU D0 state according to the active flag. When - * activating LPLU this function also disables smart speed - * and vice versa. LPLU will not be activated unless the - * device autonegotiation advertisement meets standards of - * either 10 or 10/100 or 10/100/1000 at all duplexes. - * This is a function pointer entry point only called by - * PHY setup routines. - **/ -static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = E1000_SUCCESS; - u16 data; - - DEBUGFUNC("e1000_set_d0_lplu_state_82575"); - - if (!(hw->phy.ops.read_reg)) - goto out; - - ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); - if (ret_val) - goto out; - - if (active) { - data |= IGP02E1000_PM_D0_LPLU; - ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, - data); - if (ret_val) - goto out; - - /* When LPLU is enabled, we should disable SmartSpeed */ - ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, - &data); - data &= ~IGP01E1000_PSCFR_SMART_SPEED; - ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, - data); - if (ret_val) - goto out; - } else { - data &= ~IGP02E1000_PM_D0_LPLU; - ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, - data); - /* - * LPLU and SmartSpeed are mutually exclusive. LPLU is used - * during Dx states where the power conservation is most - * important. During driver activity we should enable - * SmartSpeed, so performance is maintained. - */ - if (phy->smart_speed == e1000_smart_speed_on) { - ret_val = phy->ops.read_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - &data); - if (ret_val) - goto out; - - data |= IGP01E1000_PSCFR_SMART_SPEED; - ret_val = phy->ops.write_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - data); - if (ret_val) - goto out; - } else if (phy->smart_speed == e1000_smart_speed_off) { - ret_val = phy->ops.read_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - &data); - if (ret_val) - goto out; - - data &= ~IGP01E1000_PSCFR_SMART_SPEED; - ret_val = phy->ops.write_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - data); - if (ret_val) - goto out; - } - } - -out: - return ret_val; -} - -/** - * e1000_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state - * @hw: pointer to the HW structure - * @active: true to enable LPLU, false to disable - * - * Sets the LPLU D0 state according to the active flag. When - * activating LPLU this function also disables smart speed - * and vice versa. LPLU will not be activated unless the - * device autonegotiation advertisement meets standards of - * either 10 or 10/100 or 10/100/1000 at all duplexes. - * This is a function pointer entry point only called by - * PHY setup routines. - **/ -static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = E1000_SUCCESS; - u32 data; - - DEBUGFUNC("e1000_set_d0_lplu_state_82580"); - - data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); - - if (active) { - data |= E1000_82580_PM_D0_LPLU; - - /* When LPLU is enabled, we should disable SmartSpeed */ - data &= ~E1000_82580_PM_SPD; - } else { - data &= ~E1000_82580_PM_D0_LPLU; - - /* - * LPLU and SmartSpeed are mutually exclusive. LPLU is used - * during Dx states where the power conservation is most - * important. During driver activity we should enable - * SmartSpeed, so performance is maintained. - */ - if (phy->smart_speed == e1000_smart_speed_on) - data |= E1000_82580_PM_SPD; - else if (phy->smart_speed == e1000_smart_speed_off) - data &= ~E1000_82580_PM_SPD; - } - - E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); - return ret_val; -} - -/** - * e1000_set_d3_lplu_state_82580 - Sets low power link up state for D3 - * @hw: pointer to the HW structure - * @active: boolean used to enable/disable lplu - * - * Success returns 0, Failure returns 1 - * - * The low power link up (lplu) state is set to the power management level D3 - * and SmartSpeed is disabled when active is true, else clear lplu for D3 - * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU - * is used during Dx states where the power conservation is most important. - * During driver activity, SmartSpeed should be enabled so performance is - * maintained. - **/ -s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = E1000_SUCCESS; - u32 data; - - DEBUGFUNC("e1000_set_d3_lplu_state_82580"); - - data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); - - if (!active) { - data &= ~E1000_82580_PM_D3_LPLU; - /* - * LPLU and SmartSpeed are mutually exclusive. LPLU is used - * during Dx states where the power conservation is most - * important. During driver activity we should enable - * SmartSpeed, so performance is maintained. - */ - if (phy->smart_speed == e1000_smart_speed_on) - data |= E1000_82580_PM_SPD; - else if (phy->smart_speed == e1000_smart_speed_off) - data &= ~E1000_82580_PM_SPD; - } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || - (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || - (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { - data |= E1000_82580_PM_D3_LPLU; - /* When LPLU is enabled, we should disable SmartSpeed */ - data &= ~E1000_82580_PM_SPD; - } - - E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); - return ret_val; -} - -/** - * e1000_acquire_nvm_82575 - Request for access to EEPROM - * @hw: pointer to the HW structure - * - * Acquire the necessary semaphores for exclusive access to the EEPROM. - * Set the EEPROM access request bit and wait for EEPROM access grant bit. - * Return successful if access grant bit set, else clear the request for - * EEPROM access and return -E1000_ERR_NVM (-1). - **/ -static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw) -{ - s32 ret_val; - - DEBUGFUNC("e1000_acquire_nvm_82575"); - - ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); - if (ret_val) - goto out; - - /* - * Check if there is some access - * error this access may hook on - */ - if (hw->mac.type == e1000_i350) { - u32 eecd = E1000_READ_REG(hw, E1000_EECD); - if (eecd & (E1000_EECD_BLOCKED | E1000_EECD_ABORT | - E1000_EECD_TIMEOUT)) { - /* Clear all access error flags */ - E1000_WRITE_REG(hw, E1000_EECD, eecd | - E1000_EECD_ERROR_CLR); - DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); - } - } - if (hw->mac.type == e1000_82580) { - u32 eecd = E1000_READ_REG(hw, E1000_EECD); - if (eecd & E1000_EECD_BLOCKED) { - /* Clear access error flag */ - E1000_WRITE_REG(hw, E1000_EECD, eecd | - E1000_EECD_BLOCKED); - DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); - } - } - - - ret_val = e1000_acquire_nvm_generic(hw); - if (ret_val) - e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); - -out: - return ret_val; -} - -/** - * e1000_release_nvm_82575 - Release exclusive access to EEPROM - * @hw: pointer to the HW structure - * - * Stop any current commands to the EEPROM and clear the EEPROM request bit, - * then release the semaphores acquired. - **/ -static void e1000_release_nvm_82575(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_release_nvm_82575"); - - e1000_release_nvm_generic(hw); - - e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); -} - -/** - * e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Acquire the SW/FW semaphore to access the PHY or NVM. The mask - * will also specify which port we're acquiring the lock for. - **/ -static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 16; - s32 ret_val = E1000_SUCCESS; - s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ - - DEBUGFUNC("e1000_acquire_swfw_sync_82575"); - - while (i < timeout) { - if (e1000_get_hw_semaphore_generic(hw)) { - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - if (!(swfw_sync & (fwmask | swmask))) - break; - - /* - * Firmware currently using resource (fwmask) - * or other software thread using resource (swmask) - */ - e1000_put_hw_semaphore_generic(hw); - msec_delay_irq(5); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync |= swmask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); - -out: - return ret_val; -} - -/** - * e1000_release_swfw_sync_82575 - Release SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Release the SW/FW semaphore used to access the PHY or NVM. The mask - * will also specify which port we're releasing the lock for. - **/ -static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - - DEBUGFUNC("e1000_release_swfw_sync_82575"); - - while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) - ; /* Empty */ - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - swfw_sync &= ~mask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); -} - -/** - * e1000_get_cfg_done_82575 - Read config done bit - * @hw: pointer to the HW structure - * - * Read the management control register for the config done bit for - * completion status. NOTE: silicon which is EEPROM-less will fail trying - * to read the config done bit, so an error is *ONLY* logged and returns - * E1000_SUCCESS. If we were to return with error, EEPROM-less silicon - * would not be able to be reset or change link. - **/ -static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw) -{ - s32 timeout = PHY_CFG_TIMEOUT; - s32 ret_val = E1000_SUCCESS; - u32 mask = E1000_NVM_CFG_DONE_PORT_0; - - DEBUGFUNC("e1000_get_cfg_done_82575"); - - if (hw->bus.func == E1000_FUNC_1) - mask = E1000_NVM_CFG_DONE_PORT_1; - else if (hw->bus.func == E1000_FUNC_2) - mask = E1000_NVM_CFG_DONE_PORT_2; - else if (hw->bus.func == E1000_FUNC_3) - mask = E1000_NVM_CFG_DONE_PORT_3; - while (timeout) { - if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask) - break; - msec_delay(1); - timeout--; - } - if (!timeout) - DEBUGOUT("MNG configuration cycle has not completed.\n"); - - /* If EEPROM is not marked present, init the PHY manually */ - if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) && - (hw->phy.type == e1000_phy_igp_3)) - e1000_phy_init_script_igp3(hw); - - return ret_val; -} - -/** - * e1000_get_link_up_info_82575 - Get link speed/duplex info - * @hw: pointer to the HW structure - * @speed: stores the current speed - * @duplex: stores the current duplex - * - * This is a wrapper function, if using the serial gigabit media independent - * interface, use PCS to retrieve the link speed and duplex information. - * Otherwise, use the generic function to get the link speed and duplex info. - **/ -static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, - u16 *duplex) -{ - s32 ret_val; - - DEBUGFUNC("e1000_get_link_up_info_82575"); - - if (hw->phy.media_type != e1000_media_type_copper) - ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, speed, - duplex); - else - ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, - duplex); - - return ret_val; -} - -/** - * e1000_check_for_link_82575 - Check for link - * @hw: pointer to the HW structure - * - * If sgmii is enabled, then use the pcs register to determine link, otherwise - * use the generic interface for determining link. - **/ -static s32 e1000_check_for_link_82575(struct e1000_hw *hw) -{ - s32 ret_val; - u16 speed, duplex; - - DEBUGFUNC("e1000_check_for_link_82575"); - - if (hw->phy.media_type != e1000_media_type_copper) { - ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed, - &duplex); - /* - * Use this flag to determine if link needs to be checked or - * not. If we have link clear the flag so that we do not - * continue to check for link. - */ - hw->mac.get_link_status = !hw->mac.serdes_has_link; - - /* - * Configure Flow Control now that Auto-Neg has completed. - * First, we need to restore the desired flow control - * settings because we may have had to re-autoneg with a - * different link partner. - */ - ret_val = e1000_config_fc_after_link_up_generic(hw); - if (ret_val) - DEBUGOUT("Error configuring flow control\n"); - } else { - ret_val = e1000_check_for_copper_link_generic(hw); - } - - return ret_val; -} - -/** - * e1000_check_for_link_media_swap - Check which M88E1112 interface linked - * @hw: pointer to the HW structure - * - * Poll the M88E1112 interfaces to see which interface achieved link. - */ -static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data; - u8 port = 0; - - DEBUGFUNC("e1000_check_for_link_media_swap"); - - /* Check the copper medium. */ - ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); - if (ret_val) - return ret_val; - - ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); - if (ret_val) - return ret_val; - - if (data & E1000_M88E1112_STATUS_LINK) - port = E1000_MEDIA_PORT_COPPER; - - /* Check the other medium. */ - ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1); - if (ret_val) - return ret_val; - - ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); - if (ret_val) - return ret_val; - - if (data & E1000_M88E1112_STATUS_LINK) - port = E1000_MEDIA_PORT_OTHER; - - /* Determine if a swap needs to happen. */ - if (port && (hw->dev_spec._82575.media_port != port)) { - hw->dev_spec._82575.media_port = port; - hw->dev_spec._82575.media_changed = true; - } else { - ret_val = e1000_check_for_link_82575(hw); - } - - return E1000_SUCCESS; -} - -/** - * e1000_power_up_serdes_link_82575 - Power up the serdes link after shutdown - * @hw: pointer to the HW structure - **/ -static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw) -{ - u32 reg; - - DEBUGFUNC("e1000_power_up_serdes_link_82575"); - - if ((hw->phy.media_type != e1000_media_type_internal_serdes) && - !e1000_sgmii_active_82575(hw)) - return; - - /* Enable PCS to turn on link */ - reg = E1000_READ_REG(hw, E1000_PCS_CFG0); - reg |= E1000_PCS_CFG_PCS_EN; - E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); - - /* Power up the laser */ - reg = E1000_READ_REG(hw, E1000_CTRL_EXT); - reg &= ~E1000_CTRL_EXT_SDP3_DATA; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); - - /* flush the write to verify completion */ - E1000_WRITE_FLUSH(hw); - msec_delay(1); -} - -/** - * e1000_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex - * @hw: pointer to the HW structure - * @speed: stores the current speed - * @duplex: stores the current duplex - * - * Using the physical coding sub-layer (PCS), retrieve the current speed and - * duplex, then store the values in the pointers provided. - **/ -static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, - u16 *speed, u16 *duplex) -{ - struct e1000_mac_info *mac = &hw->mac; - u32 pcs; - u32 status; - - DEBUGFUNC("e1000_get_pcs_speed_and_duplex_82575"); - - /* - * Read the PCS Status register for link state. For non-copper mode, - * the status register is not accurate. The PCS status register is - * used instead. - */ - pcs = E1000_READ_REG(hw, E1000_PCS_LSTAT); - - /* - * The link up bit determines when link is up on autoneg. - */ - if (pcs & E1000_PCS_LSTS_LINK_OK) { - mac->serdes_has_link = true; - - /* Detect and store PCS speed */ - if (pcs & E1000_PCS_LSTS_SPEED_1000) - *speed = SPEED_1000; - else if (pcs & E1000_PCS_LSTS_SPEED_100) - *speed = SPEED_100; - else - *speed = SPEED_10; - - /* Detect and store PCS duplex */ - if (pcs & E1000_PCS_LSTS_DUPLEX_FULL) - *duplex = FULL_DUPLEX; - else - *duplex = HALF_DUPLEX; - - /* Check if it is an I354 2.5Gb backplane connection. */ - if (mac->type == e1000_i354) { - status = E1000_READ_REG(hw, E1000_STATUS); - if ((status & E1000_STATUS_2P5_SKU) && - !(status & E1000_STATUS_2P5_SKU_OVER)) { - *speed = SPEED_2500; - *duplex = FULL_DUPLEX; - DEBUGOUT("2500 Mbs, "); - DEBUGOUT("Full Duplex\n"); - } - } - - } else { - mac->serdes_has_link = false; - *speed = 0; - *duplex = 0; - } - - return E1000_SUCCESS; -} - -/** - * e1000_shutdown_serdes_link_82575 - Remove link during power down - * @hw: pointer to the HW structure - * - * In the case of serdes shut down sfp and PCS on driver unload - * when management pass through is not enabled. - **/ -void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw) -{ - u32 reg; - - DEBUGFUNC("e1000_shutdown_serdes_link_82575"); - - if ((hw->phy.media_type != e1000_media_type_internal_serdes) && - !e1000_sgmii_active_82575(hw)) - return; - - if (!e1000_enable_mng_pass_thru(hw)) { - /* Disable PCS to turn off link */ - reg = E1000_READ_REG(hw, E1000_PCS_CFG0); - reg &= ~E1000_PCS_CFG_PCS_EN; - E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); - - /* shutdown the laser */ - reg = E1000_READ_REG(hw, E1000_CTRL_EXT); - reg |= E1000_CTRL_EXT_SDP3_DATA; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); - - /* flush the write to verify completion */ - E1000_WRITE_FLUSH(hw); - msec_delay(1); - } - - return; -} - -/** - * e1000_reset_hw_82575 - Reset hardware - * @hw: pointer to the HW structure - * - * This resets the hardware into a known state. - **/ -static s32 e1000_reset_hw_82575(struct e1000_hw *hw) -{ - u32 ctrl; - s32 ret_val; - - DEBUGFUNC("e1000_reset_hw_82575"); - - /* - * Prevent the PCI-E bus from sticking if there is no TLP connection - * on the last TLP read/write transaction when MAC is reset. - */ - ret_val = e1000_disable_pcie_master_generic(hw); - if (ret_val) - DEBUGOUT("PCI-E Master disable polling has failed.\n"); - - /* set the completion timeout for interface */ - ret_val = e1000_set_pcie_completion_timeout(hw); - if (ret_val) - DEBUGOUT("PCI-E Set completion timeout has failed.\n"); - - DEBUGOUT("Masking off all interrupts\n"); - E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); - - E1000_WRITE_REG(hw, E1000_RCTL, 0); - E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); - E1000_WRITE_FLUSH(hw); - - msec_delay(10); - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - - DEBUGOUT("Issuing a global reset to MAC\n"); - E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); - - ret_val = e1000_get_auto_rd_done_generic(hw); - if (ret_val) { - /* - * When auto config read does not complete, do not - * return with an error. This can happen in situations - * where there is no eeprom and prevents getting link. - */ - DEBUGOUT("Auto Read Done did not complete\n"); - } - - /* If EEPROM is not present, run manual init scripts */ - if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES)) - e1000_reset_init_script_82575(hw); - - /* Clear any pending interrupt events. */ - E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); - E1000_READ_REG(hw, E1000_ICR); - - /* Install any alternate MAC address into RAR0 */ - ret_val = e1000_check_alt_mac_addr_generic(hw); - - return ret_val; -} - -/** - * e1000_init_hw_82575 - Initialize hardware - * @hw: pointer to the HW structure - * - * This inits the hardware readying it for operation. - **/ -static s32 e1000_init_hw_82575(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - s32 ret_val; - u16 i, rar_count = mac->rar_entry_count; - - DEBUGFUNC("e1000_init_hw_82575"); - - /* Initialize identification LED */ - ret_val = mac->ops.id_led_init(hw); - if (ret_val) { - DEBUGOUT("Error initializing identification LED\n"); - /* This is not fatal and we should not stop init due to this */ - } - - /* Disabling VLAN filtering */ - DEBUGOUT("Initializing the IEEE VLAN\n"); - mac->ops.clear_vfta(hw); - - /* Setup the receive address */ - e1000_init_rx_addrs_generic(hw, rar_count); - - /* Zero out the Multicast HASH table */ - DEBUGOUT("Zeroing the MTA\n"); - for (i = 0; i < mac->mta_reg_count; i++) - E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); - - /* Zero out the Unicast HASH table */ - DEBUGOUT("Zeroing the UTA\n"); - for (i = 0; i < mac->uta_reg_count; i++) - E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, 0); - - /* Setup link and flow control */ - ret_val = mac->ops.setup_link(hw); - - /* Set the default MTU size */ - hw->dev_spec._82575.mtu = 1500; - - /* - * Clear all of the statistics registers (clear on read). It is - * important that we do this after we have tried to establish link - * because the symbol error count will increment wildly if there - * is no link. - */ - e1000_clear_hw_cntrs_82575(hw); - - return ret_val; -} - -/** - * e1000_setup_copper_link_82575 - Configure copper link settings - * @hw: pointer to the HW structure - * - * Configures the link for auto-neg or forced speed and duplex. Then we check - * for link, once link is established calls to configure collision distance - * and flow control are called. - **/ -static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw) -{ - u32 ctrl; - s32 ret_val; - u32 phpm_reg; - - DEBUGFUNC("e1000_setup_copper_link_82575"); - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - ctrl |= E1000_CTRL_SLU; - ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - - /* Clear Go Link Disconnect bit on supported devices */ - switch (hw->mac.type) { - case e1000_82580: - case e1000_i350: - case e1000_i210: - case e1000_i211: - phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); - phpm_reg &= ~E1000_82580_PM_GO_LINKD; - E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg); - break; - default: - break; - } - - ret_val = e1000_setup_serdes_link_82575(hw); - if (ret_val) - goto out; - - if (e1000_sgmii_active_82575(hw) && !hw->phy.reset_disable) { - /* allow time for SFP cage time to power up phy */ - msec_delay(300); - - ret_val = hw->phy.ops.reset(hw); - if (ret_val) { - DEBUGOUT("Error resetting the PHY.\n"); - goto out; - } - } - switch (hw->phy.type) { - case e1000_phy_i210: - case e1000_phy_m88: - switch (hw->phy.id) { - case I347AT4_E_PHY_ID: - case M88E1112_E_PHY_ID: - case M88E1340M_E_PHY_ID: - case M88E1543_E_PHY_ID: - case I210_I_PHY_ID: - ret_val = e1000_copper_link_setup_m88_gen2(hw); - break; - default: - ret_val = e1000_copper_link_setup_m88(hw); - break; - } - break; - case e1000_phy_igp_3: - ret_val = e1000_copper_link_setup_igp(hw); - break; - case e1000_phy_82580: - ret_val = e1000_copper_link_setup_82577(hw); - break; - default: - ret_val = -E1000_ERR_PHY; - break; - } - - if (ret_val) - goto out; - - ret_val = e1000_setup_copper_link_generic(hw); -out: - return ret_val; -} - -/** - * e1000_setup_serdes_link_82575 - Setup link for serdes - * @hw: pointer to the HW structure - * - * Configure the physical coding sub-layer (PCS) link. The PCS link is - * used on copper connections where the serialized gigabit media independent - * interface (sgmii), or serdes fiber is being used. Configures the link - * for auto-negotiation or forces speed/duplex. - **/ -static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw) -{ - u32 ctrl_ext, ctrl_reg, reg, anadv_reg; - bool pcs_autoneg; - s32 ret_val = E1000_SUCCESS; - u16 data; - - DEBUGFUNC("e1000_setup_serdes_link_82575"); - - if ((hw->phy.media_type != e1000_media_type_internal_serdes) && - !e1000_sgmii_active_82575(hw)) - return ret_val; - - /* - * On the 82575, SerDes loopback mode persists until it is - * explicitly turned off or a power cycle is performed. A read to - * the register does not indicate its status. Therefore, we ensure - * loopback mode is disabled during initialization. - */ - E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); - - /* power on the sfp cage if present */ - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); - - ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); - ctrl_reg |= E1000_CTRL_SLU; - - /* set both sw defined pins on 82575/82576*/ - if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) - ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1; - - reg = E1000_READ_REG(hw, E1000_PCS_LCTL); - - /* default pcs_autoneg to the same setting as mac autoneg */ - pcs_autoneg = hw->mac.autoneg; - - switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) { - case E1000_CTRL_EXT_LINK_MODE_SGMII: - /* sgmii mode lets the phy handle forcing speed/duplex */ - pcs_autoneg = true; - /* autoneg time out should be disabled for SGMII mode */ - reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT); - break; - case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: - /* disable PCS autoneg and support parallel detect only */ - pcs_autoneg = false; - /* fall through to default case */ - default: - if (hw->mac.type == e1000_82575 || - hw->mac.type == e1000_82576) { - ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT) - pcs_autoneg = false; - } - - /* - * non-SGMII modes only supports a speed of 1000/Full for the - * link so it is best to just force the MAC and let the pcs - * link either autoneg or be forced to 1000/Full - */ - ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD | - E1000_CTRL_FD | E1000_CTRL_FRCDPX; - - /* set speed of 1000/Full if speed/duplex is forced */ - reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL; - break; - } - - E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); - - /* - * New SerDes mode allows for forcing speed or autonegotiating speed - * at 1gb. Autoneg should be default set by most drivers. This is the - * mode that will be compatible with older link partners and switches. - * However, both are supported by the hardware and some drivers/tools. - */ - reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP | - E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK); - - if (pcs_autoneg) { - /* Set PCS register for autoneg */ - reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */ - E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */ - - /* Disable force flow control for autoneg */ - reg &= ~E1000_PCS_LCTL_FORCE_FCTRL; - - /* Configure flow control advertisement for autoneg */ - anadv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); - anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE); - - switch (hw->fc.requested_mode) { - case e1000_fc_full: - case e1000_fc_rx_pause: - anadv_reg |= E1000_TXCW_ASM_DIR; - anadv_reg |= E1000_TXCW_PAUSE; - break; - case e1000_fc_tx_pause: - anadv_reg |= E1000_TXCW_ASM_DIR; - break; - default: - break; - } - - E1000_WRITE_REG(hw, E1000_PCS_ANADV, anadv_reg); - - DEBUGOUT1("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg); - } else { - /* Set PCS register for forced link */ - reg |= E1000_PCS_LCTL_FSD; /* Force Speed */ - - /* Force flow control for forced link */ - reg |= E1000_PCS_LCTL_FORCE_FCTRL; - - DEBUGOUT1("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg); - } - - E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg); - - if (!pcs_autoneg && !e1000_sgmii_active_82575(hw)) - e1000_force_mac_fc_generic(hw); - - return ret_val; -} - -/** - * e1000_get_media_type_82575 - derives current media type. - * @hw: pointer to the HW structure - * - * The media type is chosen reflecting few settings. - * The following are taken into account: - * - link mode set in the current port Init Control Word #3 - * - current link mode settings in CSR register - * - MDIO vs. I2C PHY control interface chosen - * - SFP module media type - **/ -static s32 e1000_get_media_type_82575(struct e1000_hw *hw) -{ - struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; - s32 ret_val = E1000_SUCCESS; - u32 ctrl_ext = 0; - u32 link_mode = 0; - - /* Set internal phy as default */ - dev_spec->sgmii_active = false; - dev_spec->module_plugged = false; - - /* Get CSR setting */ - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - - /* extract link mode setting */ - link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK; - - switch (link_mode) { - case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: - hw->phy.media_type = e1000_media_type_internal_serdes; - break; - case E1000_CTRL_EXT_LINK_MODE_GMII: - hw->phy.media_type = e1000_media_type_copper; - break; - case E1000_CTRL_EXT_LINK_MODE_SGMII: - /* Get phy control interface type set (MDIO vs. I2C)*/ - if (e1000_sgmii_uses_mdio_82575(hw)) { - hw->phy.media_type = e1000_media_type_copper; - dev_spec->sgmii_active = true; - break; - } - /* fall through for I2C based SGMII */ - case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES: - /* read media type from SFP EEPROM */ - ret_val = e1000_set_sfp_media_type_82575(hw); - if ((ret_val != E1000_SUCCESS) || - (hw->phy.media_type == e1000_media_type_unknown)) { - /* - * If media type was not identified then return media - * type defined by the CTRL_EXT settings. - */ - hw->phy.media_type = e1000_media_type_internal_serdes; - - if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) { - hw->phy.media_type = e1000_media_type_copper; - dev_spec->sgmii_active = true; - } - - break; - } - - /* do not change link mode for 100BaseFX */ - if (dev_spec->eth_flags.e100_base_fx) - break; - - /* change current link mode setting */ - ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK; - - if (hw->phy.media_type == e1000_media_type_copper) - ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII; - else - ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; - - E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); - - break; - } - - return ret_val; -} - -/** - * e1000_set_sfp_media_type_82575 - derives SFP module media type. - * @hw: pointer to the HW structure - * - * The media type is chosen based on SFP module. - * compatibility flags retrieved from SFP ID EEPROM. - **/ -static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw) -{ - s32 ret_val = E1000_ERR_CONFIG; - u32 ctrl_ext = 0; - struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; - struct sfp_e1000_flags *eth_flags = &dev_spec->eth_flags; - u8 tranceiver_type = 0; - s32 timeout = 3; - - /* Turn I2C interface ON and power on sfp cage */ - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA); - - E1000_WRITE_FLUSH(hw); - - /* Read SFP module data */ - while (timeout) { - ret_val = e1000_read_sfp_data_byte(hw, - E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET), - &tranceiver_type); - if (ret_val == E1000_SUCCESS) - break; - msec_delay(100); - timeout--; - } - if (ret_val != E1000_SUCCESS) - goto out; - - ret_val = e1000_read_sfp_data_byte(hw, - E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET), - (u8 *)eth_flags); - if (ret_val != E1000_SUCCESS) - goto out; - - /* Check if there is some SFP module plugged and powered */ - if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) || - (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) { - dev_spec->module_plugged = true; - if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) { - hw->phy.media_type = e1000_media_type_internal_serdes; - } else if (eth_flags->e100_base_fx) { - dev_spec->sgmii_active = true; - hw->phy.media_type = e1000_media_type_internal_serdes; - } else if (eth_flags->e1000_base_t) { - dev_spec->sgmii_active = true; - hw->phy.media_type = e1000_media_type_copper; - } else { - hw->phy.media_type = e1000_media_type_unknown; - DEBUGOUT("PHY module has not been recognized\n"); - goto out; - } - } else { - hw->phy.media_type = e1000_media_type_unknown; - } - ret_val = E1000_SUCCESS; -out: - /* Restore I2C interface setting */ - E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); - return ret_val; -} - -/** - * e1000_valid_led_default_82575 - Verify a valid default LED config - * @hw: pointer to the HW structure - * @data: pointer to the NVM (EEPROM) - * - * Read the EEPROM for the current default LED configuration. If the - * LED configuration is not valid, set to a valid LED configuration. - **/ -static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data) -{ - s32 ret_val; - - DEBUGFUNC("e1000_valid_led_default_82575"); - - ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - goto out; - } - - if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { - switch (hw->phy.media_type) { - case e1000_media_type_internal_serdes: - *data = ID_LED_DEFAULT_82575_SERDES; - break; - case e1000_media_type_copper: - default: - *data = ID_LED_DEFAULT; - break; - } - } -out: - return ret_val; -} - -/** - * e1000_sgmii_active_82575 - Return sgmii state - * @hw: pointer to the HW structure - * - * 82575 silicon has a serialized gigabit media independent interface (sgmii) - * which can be enabled for use in the embedded applications. Simply - * return the current state of the sgmii interface. - **/ -static bool e1000_sgmii_active_82575(struct e1000_hw *hw) -{ - struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; - return dev_spec->sgmii_active; -} - -/** - * e1000_reset_init_script_82575 - Inits HW defaults after reset - * @hw: pointer to the HW structure - * - * Inits recommended HW defaults after a reset when there is no EEPROM - * detected. This is only for the 82575. - **/ -static s32 e1000_reset_init_script_82575(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_reset_init_script_82575"); - - if (hw->mac.type == e1000_82575) { - DEBUGOUT("Running reset init script for 82575\n"); - /* SerDes configuration via SERDESCTRL */ - e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x00, 0x0C); - e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x01, 0x78); - e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x1B, 0x23); - e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x23, 0x15); - - /* CCM configuration via CCMCTL register */ - e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x14, 0x00); - e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x10, 0x00); - - /* PCIe lanes configuration */ - e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x00, 0xEC); - e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x61, 0xDF); - e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x34, 0x05); - e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x2F, 0x81); - - /* PCIe PLL Configuration */ - e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x02, 0x47); - e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x14, 0x00); - e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x10, 0x00); - } - - return E1000_SUCCESS; -} - -/** - * e1000_read_mac_addr_82575 - Read device MAC address - * @hw: pointer to the HW structure - **/ -static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("e1000_read_mac_addr_82575"); - - /* - * If there's an alternate MAC address place it in RAR0 - * so that it will override the Si installed default perm - * address. - */ - ret_val = e1000_check_alt_mac_addr_generic(hw); - if (ret_val) - goto out; - - ret_val = e1000_read_mac_addr_generic(hw); - -out: - return ret_val; -} - -/** - * e1000_config_collision_dist_82575 - Configure collision distance - * @hw: pointer to the HW structure - * - * Configures the collision distance to the default value and is used - * during link setup. - **/ -static void e1000_config_collision_dist_82575(struct e1000_hw *hw) -{ - u32 tctl_ext; - - DEBUGFUNC("e1000_config_collision_dist_82575"); - - tctl_ext = E1000_READ_REG(hw, E1000_TCTL_EXT); - - tctl_ext &= ~E1000_TCTL_EXT_COLD; - tctl_ext |= E1000_COLLISION_DISTANCE << E1000_TCTL_EXT_COLD_SHIFT; - - E1000_WRITE_REG(hw, E1000_TCTL_EXT, tctl_ext); - E1000_WRITE_FLUSH(hw); -} - -/** - * e1000_power_down_phy_copper_82575 - Remove link during PHY power down - * @hw: pointer to the HW structure - * - * In the case of a PHY power down to save power, or to turn off link during a - * driver unload, or wake on lan is not enabled, remove the link. - **/ -static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - - if (!(phy->ops.check_reset_block)) - return; - - /* If the management interface is not enabled, then power down */ - if (!(e1000_enable_mng_pass_thru(hw) || phy->ops.check_reset_block(hw))) - e1000_power_down_phy_copper(hw); - - return; -} - -/** - * e1000_clear_hw_cntrs_82575 - Clear device specific hardware counters - * @hw: pointer to the HW structure - * - * Clears the hardware counters by reading the counter registers. - **/ -static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_clear_hw_cntrs_82575"); - - e1000_clear_hw_cntrs_base_generic(hw); - - E1000_READ_REG(hw, E1000_PRC64); - E1000_READ_REG(hw, E1000_PRC127); - E1000_READ_REG(hw, E1000_PRC255); - E1000_READ_REG(hw, E1000_PRC511); - E1000_READ_REG(hw, E1000_PRC1023); - E1000_READ_REG(hw, E1000_PRC1522); - E1000_READ_REG(hw, E1000_PTC64); - E1000_READ_REG(hw, E1000_PTC127); - E1000_READ_REG(hw, E1000_PTC255); - E1000_READ_REG(hw, E1000_PTC511); - E1000_READ_REG(hw, E1000_PTC1023); - E1000_READ_REG(hw, E1000_PTC1522); - - E1000_READ_REG(hw, E1000_ALGNERRC); - E1000_READ_REG(hw, E1000_RXERRC); - E1000_READ_REG(hw, E1000_TNCRS); - E1000_READ_REG(hw, E1000_CEXTERR); - E1000_READ_REG(hw, E1000_TSCTC); - E1000_READ_REG(hw, E1000_TSCTFC); - - E1000_READ_REG(hw, E1000_MGTPRC); - E1000_READ_REG(hw, E1000_MGTPDC); - E1000_READ_REG(hw, E1000_MGTPTC); - - E1000_READ_REG(hw, E1000_IAC); - E1000_READ_REG(hw, E1000_ICRXOC); - - E1000_READ_REG(hw, E1000_ICRXPTC); - E1000_READ_REG(hw, E1000_ICRXATC); - E1000_READ_REG(hw, E1000_ICTXPTC); - E1000_READ_REG(hw, E1000_ICTXATC); - E1000_READ_REG(hw, E1000_ICTXQEC); - E1000_READ_REG(hw, E1000_ICTXQMTC); - E1000_READ_REG(hw, E1000_ICRXDMTC); - - E1000_READ_REG(hw, E1000_CBTMPC); - E1000_READ_REG(hw, E1000_HTDPMC); - E1000_READ_REG(hw, E1000_CBRMPC); - E1000_READ_REG(hw, E1000_RPTHC); - E1000_READ_REG(hw, E1000_HGPTC); - E1000_READ_REG(hw, E1000_HTCBDPC); - E1000_READ_REG(hw, E1000_HGORCL); - E1000_READ_REG(hw, E1000_HGORCH); - E1000_READ_REG(hw, E1000_HGOTCL); - E1000_READ_REG(hw, E1000_HGOTCH); - E1000_READ_REG(hw, E1000_LENERRS); - - /* This register should not be read in copper configurations */ - if ((hw->phy.media_type == e1000_media_type_internal_serdes) || - e1000_sgmii_active_82575(hw)) - E1000_READ_REG(hw, E1000_SCVPC); -} - -/** - * e1000_rx_fifo_flush_82575 - Clean rx fifo after Rx enable - * @hw: pointer to the HW structure - * - * After rx enable if managability is enabled then there is likely some - * bad data at the start of the fifo and possibly in the DMA fifo. This - * function clears the fifos and flushes any packets that came in as rx was - * being enabled. - **/ -void e1000_rx_fifo_flush_82575(struct e1000_hw *hw) -{ - u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled; - int i, ms_wait; - - DEBUGFUNC("e1000_rx_fifo_workaround_82575"); - if (hw->mac.type != e1000_82575 || - !(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN)) - return; - - /* Disable all Rx queues */ - for (i = 0; i < 4; i++) { - rxdctl[i] = E1000_READ_REG(hw, E1000_RXDCTL(i)); - E1000_WRITE_REG(hw, E1000_RXDCTL(i), - rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE); - } - /* Poll all queues to verify they have shut down */ - for (ms_wait = 0; ms_wait < 10; ms_wait++) { - msec_delay(1); - rx_enabled = 0; - for (i = 0; i < 4; i++) - rx_enabled |= E1000_READ_REG(hw, E1000_RXDCTL(i)); - if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE)) - break; - } - - if (ms_wait == 10) - DEBUGOUT("Queue disable timed out after 10ms\n"); - - /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all - * incoming packets are rejected. Set enable and wait 2ms so that - * any packet that was coming in as RCTL.EN was set is flushed - */ - rfctl = E1000_READ_REG(hw, E1000_RFCTL); - E1000_WRITE_REG(hw, E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF); - - rlpml = E1000_READ_REG(hw, E1000_RLPML); - E1000_WRITE_REG(hw, E1000_RLPML, 0); - - rctl = E1000_READ_REG(hw, E1000_RCTL); - temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP); - temp_rctl |= E1000_RCTL_LPE; - - E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl); - E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl | E1000_RCTL_EN); - E1000_WRITE_FLUSH(hw); - msec_delay(2); - - /* Enable Rx queues that were previously enabled and restore our - * previous state - */ - for (i = 0; i < 4; i++) - E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i]); - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - E1000_WRITE_FLUSH(hw); - - E1000_WRITE_REG(hw, E1000_RLPML, rlpml); - E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); - - /* Flush receive errors generated by workaround */ - E1000_READ_REG(hw, E1000_ROC); - E1000_READ_REG(hw, E1000_RNBC); - E1000_READ_REG(hw, E1000_MPC); -} - -/** - * e1000_set_pcie_completion_timeout - set pci-e completion timeout - * @hw: pointer to the HW structure - * - * The defaults for 82575 and 82576 should be in the range of 50us to 50ms, - * however the hardware default for these parts is 500us to 1ms which is less - * than the 10ms recommended by the pci-e spec. To address this we need to - * increase the value to either 10ms to 200ms for capability version 1 config, - * or 16ms to 55ms for version 2. - **/ -static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw) -{ - u32 gcr = E1000_READ_REG(hw, E1000_GCR); - s32 ret_val = E1000_SUCCESS; - u16 pcie_devctl2; - - /* only take action if timeout value is defaulted to 0 */ - if (gcr & E1000_GCR_CMPL_TMOUT_MASK) - goto out; - - /* - * if capababilities version is type 1 we can write the - * timeout of 10ms to 200ms through the GCR register - */ - if (!(gcr & E1000_GCR_CAP_VER2)) { - gcr |= E1000_GCR_CMPL_TMOUT_10ms; - goto out; - } - - /* - * for version 2 capabilities we need to write the config space - * directly in order to set the completion timeout value for - * 16ms to 55ms - */ - ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, - &pcie_devctl2); - if (ret_val) - goto out; - - pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; - - ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, - &pcie_devctl2); -out: - /* disable completion timeout resend */ - gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND; - - E1000_WRITE_REG(hw, E1000_GCR, gcr); - return ret_val; -} - -/** - * e1000_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing - * @hw: pointer to the hardware struct - * @enable: state to enter, either enabled or disabled - * @pf: Physical Function pool - do not set anti-spoofing for the PF - * - * enables/disables L2 switch anti-spoofing functionality. - **/ -void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf) -{ - u32 reg_val, reg_offset; - - switch (hw->mac.type) { - case e1000_82576: - reg_offset = E1000_DTXSWC; - break; - case e1000_i350: - case e1000_i354: - reg_offset = E1000_TXSWC; - break; - default: - return; - } - - reg_val = E1000_READ_REG(hw, reg_offset); - if (enable) { - reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK | - E1000_DTXSWC_VLAN_SPOOF_MASK); - /* The PF can spoof - it has to in order to - * support emulation mode NICs - */ - reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS)); - } else { - reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK | - E1000_DTXSWC_VLAN_SPOOF_MASK); - } - E1000_WRITE_REG(hw, reg_offset, reg_val); -} - -/** - * e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback - * @hw: pointer to the hardware struct - * @enable: state to enter, either enabled or disabled - * - * enables/disables L2 switch loopback functionality. - **/ -void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) -{ - u32 dtxswc; - - switch (hw->mac.type) { - case e1000_82576: - dtxswc = E1000_READ_REG(hw, E1000_DTXSWC); - if (enable) - dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; - else - dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; - E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc); - break; - case e1000_i350: - case e1000_i354: - dtxswc = E1000_READ_REG(hw, E1000_TXSWC); - if (enable) - dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; - else - dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; - E1000_WRITE_REG(hw, E1000_TXSWC, dtxswc); - break; - default: - /* Currently no other hardware supports loopback */ - break; - } - - -} - -/** - * e1000_vmdq_set_replication_pf - enable or disable vmdq replication - * @hw: pointer to the hardware struct - * @enable: state to enter, either enabled or disabled - * - * enables/disables replication of packets across multiple pools. - **/ -void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable) -{ - u32 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL); - - if (enable) - vt_ctl |= E1000_VT_CTL_VM_REPL_EN; - else - vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN; - - E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl); -} - -/** - * e1000_read_phy_reg_82580 - Read 82580 MDI control register - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * - * Reads the MDI control register in the PHY at offset and stores the - * information read to data. - **/ -static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) -{ - s32 ret_val; - - DEBUGFUNC("e1000_read_phy_reg_82580"); - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - goto out; - - ret_val = e1000_read_phy_reg_mdic(hw, offset, data); - - hw->phy.ops.release(hw); - -out: - return ret_val; -} - -/** - * e1000_write_phy_reg_82580 - Write 82580 MDI control register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write to register at offset - * - * Writes data to MDI control register in the PHY at offset. - **/ -static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) -{ - s32 ret_val; - - DEBUGFUNC("e1000_write_phy_reg_82580"); - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - goto out; - - ret_val = e1000_write_phy_reg_mdic(hw, offset, data); - - hw->phy.ops.release(hw); - -out: - return ret_val; -} - -/** - * e1000_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits - * @hw: pointer to the HW structure - * - * This resets the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on - * the values found in the EEPROM. This addresses an issue in which these - * bits are not restored from EEPROM after reset. - **/ -static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - u32 mdicnfg; - u16 nvm_data = 0; - - DEBUGFUNC("e1000_reset_mdicnfg_82580"); - - if (hw->mac.type != e1000_82580) - goto out; - if (!e1000_sgmii_active_82575(hw)) - goto out; - - ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + - NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, - &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - goto out; - } - - mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG); - if (nvm_data & NVM_WORD24_EXT_MDIO) - mdicnfg |= E1000_MDICNFG_EXT_MDIO; - if (nvm_data & NVM_WORD24_COM_MDIO) - mdicnfg |= E1000_MDICNFG_COM_MDIO; - E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg); -out: - return ret_val; -} - -/** - * e1000_reset_hw_82580 - Reset hardware - * @hw: pointer to the HW structure - * - * This resets function or entire device (all ports, etc.) - * to a known state. - **/ -static s32 e1000_reset_hw_82580(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - /* BH SW mailbox bit in SW_FW_SYNC */ - u16 swmbsw_mask = E1000_SW_SYNCH_MB; - u32 ctrl; - bool global_device_reset = hw->dev_spec._82575.global_device_reset; - - DEBUGFUNC("e1000_reset_hw_82580"); - - hw->dev_spec._82575.global_device_reset = false; - - /* 82580 does not reliably do global_device_reset due to hw errata */ - if (hw->mac.type == e1000_82580) - global_device_reset = false; - - /* Get current control state. */ - ctrl = E1000_READ_REG(hw, E1000_CTRL); - - /* - * Prevent the PCI-E bus from sticking if there is no TLP connection - * on the last TLP read/write transaction when MAC is reset. - */ - ret_val = e1000_disable_pcie_master_generic(hw); - if (ret_val) - DEBUGOUT("PCI-E Master disable polling has failed.\n"); - - DEBUGOUT("Masking off all interrupts\n"); - E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); - E1000_WRITE_REG(hw, E1000_RCTL, 0); - E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); - E1000_WRITE_FLUSH(hw); - - msec_delay(10); - - /* Determine whether or not a global dev reset is requested */ - if (global_device_reset && hw->mac.ops.acquire_swfw_sync(hw, - swmbsw_mask)) - global_device_reset = false; - - if (global_device_reset && !(E1000_READ_REG(hw, E1000_STATUS) & - E1000_STAT_DEV_RST_SET)) - ctrl |= E1000_CTRL_DEV_RST; - else - ctrl |= E1000_CTRL_RST; - - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - E1000_WRITE_FLUSH(hw); - - /* Add delay to insure DEV_RST has time to complete */ - if (global_device_reset) - msec_delay(5); - - ret_val = e1000_get_auto_rd_done_generic(hw); - if (ret_val) { - /* - * When auto config read does not complete, do not - * return with an error. This can happen in situations - * where there is no eeprom and prevents getting link. - */ - DEBUGOUT("Auto Read Done did not complete\n"); - } - - /* clear global device reset status bit */ - E1000_WRITE_REG(hw, E1000_STATUS, E1000_STAT_DEV_RST_SET); - - /* Clear any pending interrupt events. */ - E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); - E1000_READ_REG(hw, E1000_ICR); - - ret_val = e1000_reset_mdicnfg_82580(hw); - if (ret_val) - DEBUGOUT("Could not reset MDICNFG based on EEPROM\n"); - - /* Install any alternate MAC address into RAR0 */ - ret_val = e1000_check_alt_mac_addr_generic(hw); - - /* Release semaphore */ - if (global_device_reset) - hw->mac.ops.release_swfw_sync(hw, swmbsw_mask); - - return ret_val; -} - -/** - * e1000_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual Rx PBA size - * @data: data received by reading RXPBS register - * - * The 82580 uses a table based approach for packet buffer allocation sizes. - * This function converts the retrieved value into the correct table value - * 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 - * 0x0 36 72 144 1 2 4 8 16 - * 0x8 35 70 140 rsv rsv rsv rsv rsv - */ -u16 e1000_rxpbs_adjust_82580(u32 data) -{ - u16 ret_val = 0; - - if (data < E1000_82580_RXPBS_TABLE_SIZE) - ret_val = e1000_82580_rxpbs_table[data]; - - return ret_val; -} - -/** - * e1000_validate_nvm_checksum_with_offset - Validate EEPROM - * checksum - * @hw: pointer to the HW structure - * @offset: offset in words of the checksum protected region - * - * Calculates the EEPROM checksum by reading/adding each word of the EEPROM - * and then verifies that the sum of the EEPROM is equal to 0xBABA. - **/ -s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) -{ - s32 ret_val = E1000_SUCCESS; - u16 checksum = 0; - u16 i, nvm_data; - - DEBUGFUNC("e1000_validate_nvm_checksum_with_offset"); - - for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) { - ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - goto out; - } - checksum += nvm_data; - } - - if (checksum != (u16) NVM_SUM) { - DEBUGOUT("NVM Checksum Invalid\n"); - ret_val = -E1000_ERR_NVM; - goto out; - } - -out: - return ret_val; -} - -/** - * e1000_update_nvm_checksum_with_offset - Update EEPROM - * checksum - * @hw: pointer to the HW structure - * @offset: offset in words of the checksum protected region - * - * Updates the EEPROM checksum by reading/adding each word of the EEPROM - * up to the checksum. Then calculates the EEPROM checksum and writes the - * value to the EEPROM. - **/ -s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) -{ - s32 ret_val; - u16 checksum = 0; - u16 i, nvm_data; - - DEBUGFUNC("e1000_update_nvm_checksum_with_offset"); - - for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) { - ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error while updating checksum.\n"); - goto out; - } - checksum += nvm_data; - } - checksum = (u16) NVM_SUM - checksum; - ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1, - &checksum); - if (ret_val) - DEBUGOUT("NVM Write Error while updating checksum.\n"); - -out: - return ret_val; -} - -/** - * e1000_validate_nvm_checksum_82580 - Validate EEPROM checksum - * @hw: pointer to the HW structure - * - * Calculates the EEPROM section checksum by reading/adding each word of - * the EEPROM and then verifies that the sum of the EEPROM is - * equal to 0xBABA. - **/ -static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - u16 eeprom_regions_count = 1; - u16 j, nvm_data; - u16 nvm_offset; - - DEBUGFUNC("e1000_validate_nvm_checksum_82580"); - - ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - goto out; - } - - if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) { - /* if chekcsums compatibility bit is set validate checksums - * for all 4 ports. */ - eeprom_regions_count = 4; - } - - for (j = 0; j < eeprom_regions_count; j++) { - nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); - ret_val = e1000_validate_nvm_checksum_with_offset(hw, - nvm_offset); - if (ret_val != E1000_SUCCESS) - goto out; - } - -out: - return ret_val; -} - -/** - * e1000_update_nvm_checksum_82580 - Update EEPROM checksum - * @hw: pointer to the HW structure - * - * Updates the EEPROM section checksums for all 4 ports by reading/adding - * each word of the EEPROM up to the checksum. Then calculates the EEPROM - * checksum and writes the value to the EEPROM. - **/ -static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw) -{ - s32 ret_val; - u16 j, nvm_data; - u16 nvm_offset; - - DEBUGFUNC("e1000_update_nvm_checksum_82580"); - - ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error while updating checksum compatibility bit.\n"); - goto out; - } - - if (!(nvm_data & NVM_COMPATIBILITY_BIT_MASK)) { - /* set compatibility bit to validate checksums appropriately */ - nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK; - ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1, - &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Write Error while updating checksum compatibility bit.\n"); - goto out; - } - } - - for (j = 0; j < 4; j++) { - nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); - ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); - if (ret_val) - goto out; - } - -out: - return ret_val; -} - -/** - * e1000_validate_nvm_checksum_i350 - Validate EEPROM checksum - * @hw: pointer to the HW structure - * - * Calculates the EEPROM section checksum by reading/adding each word of - * the EEPROM and then verifies that the sum of the EEPROM is - * equal to 0xBABA. - **/ -static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - u16 j; - u16 nvm_offset; - - DEBUGFUNC("e1000_validate_nvm_checksum_i350"); - - for (j = 0; j < 4; j++) { - nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); - ret_val = e1000_validate_nvm_checksum_with_offset(hw, - nvm_offset); - if (ret_val != E1000_SUCCESS) - goto out; - } - -out: - return ret_val; -} - -/** - * e1000_update_nvm_checksum_i350 - Update EEPROM checksum - * @hw: pointer to the HW structure - * - * Updates the EEPROM section checksums for all 4 ports by reading/adding - * each word of the EEPROM up to the checksum. Then calculates the EEPROM - * checksum and writes the value to the EEPROM. - **/ -static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - u16 j; - u16 nvm_offset; - - DEBUGFUNC("e1000_update_nvm_checksum_i350"); - - for (j = 0; j < 4; j++) { - nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); - ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); - if (ret_val != E1000_SUCCESS) - goto out; - } - -out: - return ret_val; -} - -/** - * __e1000_access_emi_reg - Read/write EMI register - * @hw: pointer to the HW structure - * @addr: EMI address to program - * @data: pointer to value to read/write from/to the EMI address - * @read: boolean flag to indicate read or write - **/ -static s32 __e1000_access_emi_reg(struct e1000_hw *hw, u16 address, - u16 *data, bool read) -{ - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("__e1000_access_emi_reg"); - - ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address); - if (ret_val) - return ret_val; - - if (read) - ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data); - else - ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data); - - return ret_val; -} - -/** - * e1000_read_emi_reg - Read Extended Management Interface register - * @hw: pointer to the HW structure - * @addr: EMI address to program - * @data: value to be read from the EMI address - **/ -s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data) -{ - DEBUGFUNC("e1000_read_emi_reg"); - - return __e1000_access_emi_reg(hw, addr, data, true); -} - -/** - * e1000_set_eee_i350 - Enable/disable EEE support - * @hw: pointer to the HW structure - * - * Enable/disable EEE based on setting in dev_spec structure. - * - **/ -s32 e1000_set_eee_i350(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - u32 ipcnfg, eeer; - - DEBUGFUNC("e1000_set_eee_i350"); - - if ((hw->mac.type < e1000_i350) || - (hw->phy.media_type != e1000_media_type_copper)) - goto out; - ipcnfg = E1000_READ_REG(hw, E1000_IPCNFG); - eeer = E1000_READ_REG(hw, E1000_EEER); - - /* enable or disable per user setting */ - if (!(hw->dev_spec._82575.eee_disable)) { - u32 eee_su = E1000_READ_REG(hw, E1000_EEE_SU); - - ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); - eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | - E1000_EEER_LPI_FC); - - /* This bit should not be set in normal operation. */ - if (eee_su & E1000_EEE_SU_LPI_CLK_STP) - DEBUGOUT("LPI Clock Stop Bit should not be set!\n"); - } else { - ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); - eeer &= ~(E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | - E1000_EEER_LPI_FC); - } - E1000_WRITE_REG(hw, E1000_IPCNFG, ipcnfg); - E1000_WRITE_REG(hw, E1000_EEER, eeer); - E1000_READ_REG(hw, E1000_IPCNFG); - E1000_READ_REG(hw, E1000_EEER); -out: - - return ret_val; -} - -/** - * e1000_set_eee_i354 - Enable/disable EEE support - * @hw: pointer to the HW structure - * - * Enable/disable EEE legacy mode based on setting in dev_spec structure. - * - **/ -s32 e1000_set_eee_i354(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = E1000_SUCCESS; - u16 phy_data; - - DEBUGFUNC("e1000_set_eee_i354"); - - if ((hw->phy.media_type != e1000_media_type_copper) || - ((phy->id != M88E1543_E_PHY_ID))) - goto out; - - if (!hw->dev_spec._82575.eee_disable) { - /* Switch to PHY page 18. */ - ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18); - if (ret_val) - goto out; - - ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1, - &phy_data); - if (ret_val) - goto out; - - phy_data |= E1000_M88E1543_EEE_CTRL_1_MS; - ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1, - phy_data); - if (ret_val) - goto out; - - /* Return the PHY to page 0. */ - ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); - if (ret_val) - goto out; - - /* Turn on EEE advertisement. */ - ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, - E1000_EEE_ADV_DEV_I354, - &phy_data); - if (ret_val) - goto out; - - phy_data |= E1000_EEE_ADV_100_SUPPORTED | - E1000_EEE_ADV_1000_SUPPORTED; - ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, - E1000_EEE_ADV_DEV_I354, - phy_data); - } else { - /* Turn off EEE advertisement. */ - ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, - E1000_EEE_ADV_DEV_I354, - &phy_data); - if (ret_val) - goto out; - - phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED | - E1000_EEE_ADV_1000_SUPPORTED); - ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, - E1000_EEE_ADV_DEV_I354, - phy_data); - } - -out: - return ret_val; -} - -/** - * e1000_get_eee_status_i354 - Get EEE status - * @hw: pointer to the HW structure - * @status: EEE status - * - * Get EEE status by guessing based on whether Tx or Rx LPI indications have - * been received. - **/ -s32 e1000_get_eee_status_i354(struct e1000_hw *hw, bool *status) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = E1000_SUCCESS; - u16 phy_data; - - DEBUGFUNC("e1000_get_eee_status_i354"); - - /* Check if EEE is supported on this device. */ - if ((hw->phy.media_type != e1000_media_type_copper) || - ((phy->id != M88E1543_E_PHY_ID))) - goto out; - - ret_val = e1000_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354, - E1000_PCS_STATUS_DEV_I354, - &phy_data); - if (ret_val) - goto out; - - *status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD | - E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false; - -out: - return ret_val; -} - -/* Due to a hw errata, if the host tries to configure the VFTA register - * while performing queries from the BMC or DMA, then the VFTA in some - * cases won't be written. - */ - -/** - * e1000_clear_vfta_i350 - Clear VLAN filter table - * @hw: pointer to the HW structure - * - * Clears the register array which contains the VLAN filter table by - * setting all the values to 0. - **/ -void e1000_clear_vfta_i350(struct e1000_hw *hw) -{ - u32 offset; - int i; - - DEBUGFUNC("e1000_clear_vfta_350"); - - for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { - for (i = 0; i < 10; i++) - E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); - - E1000_WRITE_FLUSH(hw); - } -} - -/** - * e1000_write_vfta_i350 - Write value to VLAN filter table - * @hw: pointer to the HW structure - * @offset: register offset in VLAN filter table - * @value: register value written to VLAN filter table - * - * Writes value at the given offset in the register array which stores - * the VLAN filter table. - **/ -void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) -{ - int i; - - DEBUGFUNC("e1000_write_vfta_350"); - - for (i = 0; i < 10; i++) - E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); - - E1000_WRITE_FLUSH(hw); -} - - -/** - * e1000_set_i2c_bb - Enable I2C bit-bang - * @hw: pointer to the HW structure - * - * Enable I2C bit-bang interface - * - **/ -s32 e1000_set_i2c_bb(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - u32 ctrl_ext, i2cparams; - - DEBUGFUNC("e1000_set_i2c_bb"); - - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - ctrl_ext |= E1000_CTRL_I2C_ENA; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); - E1000_WRITE_FLUSH(hw); - - i2cparams = E1000_READ_REG(hw, E1000_I2CPARAMS); - i2cparams |= E1000_I2CBB_EN; - i2cparams |= E1000_I2C_DATA_OE_N; - i2cparams |= E1000_I2C_CLK_OE_N; - E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cparams); - E1000_WRITE_FLUSH(hw); - - return ret_val; -} - -/** - * e1000_read_i2c_byte_generic - Reads 8 bit word over I2C - * @hw: pointer to hardware structure - * @byte_offset: byte offset to read - * @dev_addr: device address - * @data: value read - * - * Performs byte read operation over I2C interface at - * a specified device address. - **/ -s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 *data) -{ - s32 status = E1000_SUCCESS; - u32 max_retry = 10; - u32 retry = 1; - u16 swfw_mask = 0; - - bool nack = true; - - DEBUGFUNC("e1000_read_i2c_byte_generic"); - - swfw_mask = E1000_SWFW_PHY0_SM; - - do { - if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) - != E1000_SUCCESS) { - status = E1000_ERR_SWFW_SYNC; - goto read_byte_out; - } - - e1000_i2c_start(hw); - - /* Device Address and write indication */ - status = e1000_clock_out_i2c_byte(hw, dev_addr); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_get_i2c_ack(hw); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_clock_out_i2c_byte(hw, byte_offset); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_get_i2c_ack(hw); - if (status != E1000_SUCCESS) - goto fail; - - e1000_i2c_start(hw); - - /* Device Address and read indication */ - status = e1000_clock_out_i2c_byte(hw, (dev_addr | 0x1)); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_get_i2c_ack(hw); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_clock_in_i2c_byte(hw, data); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_clock_out_i2c_bit(hw, nack); - if (status != E1000_SUCCESS) - goto fail; - - e1000_i2c_stop(hw); - break; - -fail: - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - msec_delay(100); - e1000_i2c_bus_clear(hw); - retry++; - if (retry < max_retry) - DEBUGOUT("I2C byte read error - Retrying.\n"); - else - DEBUGOUT("I2C byte read error.\n"); - - } while (retry < max_retry); - - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - -read_byte_out: - - return status; -} - -/** - * e1000_write_i2c_byte_generic - Writes 8 bit word over I2C - * @hw: pointer to hardware structure - * @byte_offset: byte offset to write - * @dev_addr: device address - * @data: value to write - * - * Performs byte write operation over I2C interface at - * a specified device address. - **/ -s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 data) -{ - s32 status = E1000_SUCCESS; - u32 max_retry = 1; - u32 retry = 0; - u16 swfw_mask = 0; - - DEBUGFUNC("e1000_write_i2c_byte_generic"); - - swfw_mask = E1000_SWFW_PHY0_SM; - - if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) { - status = E1000_ERR_SWFW_SYNC; - goto write_byte_out; - } - - do { - e1000_i2c_start(hw); - - status = e1000_clock_out_i2c_byte(hw, dev_addr); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_get_i2c_ack(hw); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_clock_out_i2c_byte(hw, byte_offset); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_get_i2c_ack(hw); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_clock_out_i2c_byte(hw, data); - if (status != E1000_SUCCESS) - goto fail; - - status = e1000_get_i2c_ack(hw); - if (status != E1000_SUCCESS) - goto fail; - - e1000_i2c_stop(hw); - break; - -fail: - e1000_i2c_bus_clear(hw); - retry++; - if (retry < max_retry) - DEBUGOUT("I2C byte write error - Retrying.\n"); - else - DEBUGOUT("I2C byte write error.\n"); - } while (retry < max_retry); - - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - -write_byte_out: - - return status; -} - -/** - * e1000_i2c_start - Sets I2C start condition - * @hw: pointer to hardware structure - * - * Sets I2C start condition (High -> Low on SDA while SCL is High) - **/ -static void e1000_i2c_start(struct e1000_hw *hw) -{ - u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - - DEBUGFUNC("e1000_i2c_start"); - - /* Start condition must begin with data and clock high */ - e1000_set_i2c_data(hw, &i2cctl, 1); - e1000_raise_i2c_clk(hw, &i2cctl); - - /* Setup time for start condition (4.7us) */ - usec_delay(E1000_I2C_T_SU_STA); - - e1000_set_i2c_data(hw, &i2cctl, 0); - - /* Hold time for start condition (4us) */ - usec_delay(E1000_I2C_T_HD_STA); - - e1000_lower_i2c_clk(hw, &i2cctl); - - /* Minimum low period of clock is 4.7 us */ - usec_delay(E1000_I2C_T_LOW); - -} - -/** - * e1000_i2c_stop - Sets I2C stop condition - * @hw: pointer to hardware structure - * - * Sets I2C stop condition (Low -> High on SDA while SCL is High) - **/ -static void e1000_i2c_stop(struct e1000_hw *hw) -{ - u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - - DEBUGFUNC("e1000_i2c_stop"); - - /* Stop condition must begin with data low and clock high */ - e1000_set_i2c_data(hw, &i2cctl, 0); - e1000_raise_i2c_clk(hw, &i2cctl); - - /* Setup time for stop condition (4us) */ - usec_delay(E1000_I2C_T_SU_STO); - - e1000_set_i2c_data(hw, &i2cctl, 1); - - /* bus free time between stop and start (4.7us)*/ - usec_delay(E1000_I2C_T_BUF); -} - -/** - * e1000_clock_in_i2c_byte - Clocks in one byte via I2C - * @hw: pointer to hardware structure - * @data: data byte to clock in - * - * Clocks in one byte data via I2C data/clock - **/ -static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data) -{ - s32 i; - bool bit = 0; - - DEBUGFUNC("e1000_clock_in_i2c_byte"); - - *data = 0; - for (i = 7; i >= 0; i--) { - e1000_clock_in_i2c_bit(hw, &bit); - *data |= bit << i; - } - - return E1000_SUCCESS; -} - -/** - * e1000_clock_out_i2c_byte - Clocks out one byte via I2C - * @hw: pointer to hardware structure - * @data: data byte clocked out - * - * Clocks out one byte data via I2C data/clock - **/ -static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data) -{ - s32 status = E1000_SUCCESS; - s32 i; - u32 i2cctl; - bool bit = 0; - - DEBUGFUNC("e1000_clock_out_i2c_byte"); - - for (i = 7; i >= 0; i--) { - bit = (data >> i) & 0x1; - status = e1000_clock_out_i2c_bit(hw, bit); - - if (status != E1000_SUCCESS) - break; - } - - /* Release SDA line (set high) */ - i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - - i2cctl |= E1000_I2C_DATA_OE_N; - E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); - E1000_WRITE_FLUSH(hw); - - return status; -} - -/** - * e1000_get_i2c_ack - Polls for I2C ACK - * @hw: pointer to hardware structure - * - * Clocks in/out one bit via I2C data/clock - **/ -static s32 e1000_get_i2c_ack(struct e1000_hw *hw) -{ - s32 status = E1000_SUCCESS; - u32 i = 0; - u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - u32 timeout = 10; - bool ack = true; - - DEBUGFUNC("e1000_get_i2c_ack"); - - e1000_raise_i2c_clk(hw, &i2cctl); - - /* Minimum high period of clock is 4us */ - usec_delay(E1000_I2C_T_HIGH); - - /* Wait until SCL returns high */ - for (i = 0; i < timeout; i++) { - usec_delay(1); - i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - if (i2cctl & E1000_I2C_CLK_IN) - break; - } - if (!(i2cctl & E1000_I2C_CLK_IN)) - return E1000_ERR_I2C; - - ack = e1000_get_i2c_data(&i2cctl); - if (ack) { - DEBUGOUT("I2C ack was not received.\n"); - status = E1000_ERR_I2C; - } - - e1000_lower_i2c_clk(hw, &i2cctl); - - /* Minimum low period of clock is 4.7 us */ - usec_delay(E1000_I2C_T_LOW); - - return status; -} - -/** - * e1000_clock_in_i2c_bit - Clocks in one bit via I2C data/clock - * @hw: pointer to hardware structure - * @data: read data value - * - * Clocks in one bit via I2C data/clock - **/ -static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data) -{ - u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - - DEBUGFUNC("e1000_clock_in_i2c_bit"); - - e1000_raise_i2c_clk(hw, &i2cctl); - - /* Minimum high period of clock is 4us */ - usec_delay(E1000_I2C_T_HIGH); - - i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - *data = e1000_get_i2c_data(&i2cctl); - - e1000_lower_i2c_clk(hw, &i2cctl); - - /* Minimum low period of clock is 4.7 us */ - usec_delay(E1000_I2C_T_LOW); - - return E1000_SUCCESS; -} - -/** - * e1000_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock - * @hw: pointer to hardware structure - * @data: data value to write - * - * Clocks out one bit via I2C data/clock - **/ -static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data) -{ - s32 status; - u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - - DEBUGFUNC("e1000_clock_out_i2c_bit"); - - status = e1000_set_i2c_data(hw, &i2cctl, data); - if (status == E1000_SUCCESS) { - e1000_raise_i2c_clk(hw, &i2cctl); - - /* Minimum high period of clock is 4us */ - usec_delay(E1000_I2C_T_HIGH); - - e1000_lower_i2c_clk(hw, &i2cctl); - - /* Minimum low period of clock is 4.7 us. - * This also takes care of the data hold time. - */ - usec_delay(E1000_I2C_T_LOW); - } else { - status = E1000_ERR_I2C; - DEBUGOUT1("I2C data was not set to %X\n", data); - } - - return status; -} -/** - * e1000_raise_i2c_clk - Raises the I2C SCL clock - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register - * - * Raises the I2C clock line '0'->'1' - **/ -static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) -{ - DEBUGFUNC("e1000_raise_i2c_clk"); - - *i2cctl |= E1000_I2C_CLK_OUT; - *i2cctl &= ~E1000_I2C_CLK_OE_N; - E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); - E1000_WRITE_FLUSH(hw); - - /* SCL rise time (1000ns) */ - usec_delay(E1000_I2C_T_RISE); -} - -/** - * e1000_lower_i2c_clk - Lowers the I2C SCL clock - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register - * - * Lowers the I2C clock line '1'->'0' - **/ -static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) -{ - - DEBUGFUNC("e1000_lower_i2c_clk"); - - *i2cctl &= ~E1000_I2C_CLK_OUT; - *i2cctl &= ~E1000_I2C_CLK_OE_N; - E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); - E1000_WRITE_FLUSH(hw); - - /* SCL fall time (300ns) */ - usec_delay(E1000_I2C_T_FALL); -} - -/** - * e1000_set_i2c_data - Sets the I2C data bit - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register - * @data: I2C data value (0 or 1) to set - * - * Sets the I2C data bit - **/ -static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data) -{ - s32 status = E1000_SUCCESS; - - DEBUGFUNC("e1000_set_i2c_data"); - - if (data) - *i2cctl |= E1000_I2C_DATA_OUT; - else - *i2cctl &= ~E1000_I2C_DATA_OUT; - - *i2cctl &= ~E1000_I2C_DATA_OE_N; - *i2cctl |= E1000_I2C_CLK_OE_N; - E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); - E1000_WRITE_FLUSH(hw); - - /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ - usec_delay(E1000_I2C_T_RISE + E1000_I2C_T_FALL + E1000_I2C_T_SU_DATA); - - *i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - if (data != e1000_get_i2c_data(i2cctl)) { - status = E1000_ERR_I2C; - DEBUGOUT1("Error - I2C data was not set to %X.\n", data); - } - - return status; -} - -/** - * e1000_get_i2c_data - Reads the I2C SDA data bit - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register - * - * Returns the I2C data bit value - **/ -static bool e1000_get_i2c_data(u32 *i2cctl) -{ - bool data; - - DEBUGFUNC("e1000_get_i2c_data"); - - if (*i2cctl & E1000_I2C_DATA_IN) - data = 1; - else - data = 0; - - return data; -} - -/** - * e1000_i2c_bus_clear - Clears the I2C bus - * @hw: pointer to hardware structure - * - * Clears the I2C bus by sending nine clock pulses. - * Used when data line is stuck low. - **/ -void e1000_i2c_bus_clear(struct e1000_hw *hw) -{ - u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - u32 i; - - DEBUGFUNC("e1000_i2c_bus_clear"); - - e1000_i2c_start(hw); - - e1000_set_i2c_data(hw, &i2cctl, 1); - - for (i = 0; i < 9; i++) { - e1000_raise_i2c_clk(hw, &i2cctl); - - /* Min high period of clock is 4us */ - usec_delay(E1000_I2C_T_HIGH); - - e1000_lower_i2c_clk(hw, &i2cctl); - - /* Min low period of clock is 4.7us*/ - usec_delay(E1000_I2C_T_LOW); - } - - e1000_i2c_start(hw); - - /* Put the i2c bus back to default state */ - e1000_i2c_stop(hw); -} - -static const u8 e1000_emc_temp_data[4] = { - E1000_EMC_INTERNAL_DATA, - E1000_EMC_DIODE1_DATA, - E1000_EMC_DIODE2_DATA, - E1000_EMC_DIODE3_DATA -}; -static const u8 e1000_emc_therm_limit[4] = { - E1000_EMC_INTERNAL_THERM_LIMIT, - E1000_EMC_DIODE1_THERM_LIMIT, - E1000_EMC_DIODE2_THERM_LIMIT, - E1000_EMC_DIODE3_THERM_LIMIT -}; - -/** - * e1000_get_thermal_sensor_data_generic - Gathers thermal sensor data - * @hw: pointer to hardware structure - * - * Updates the temperatures in mac.thermal_sensor_data - **/ -s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw) -{ - s32 status = E1000_SUCCESS; - u16 ets_offset; - u16 ets_cfg; - u16 ets_sensor; - u8 num_sensors; - u8 sensor_index; - u8 sensor_location; - u8 i; - struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; - - DEBUGFUNC("e1000_get_thermal_sensor_data_generic"); - - if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0)) - return E1000_NOT_IMPLEMENTED; - - data->sensor[0].temp = (E1000_READ_REG(hw, E1000_THMJT) & 0xFF); - - /* Return the internal sensor only if ETS is unsupported */ - e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset); - if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) - return status; - - e1000_read_nvm(hw, ets_offset, 1, &ets_cfg); - if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) - != NVM_ETS_TYPE_EMC) - return E1000_NOT_IMPLEMENTED; - - num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); - if (num_sensors > E1000_MAX_SENSORS) - num_sensors = E1000_MAX_SENSORS; - - for (i = 1; i < num_sensors; i++) { - e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor); - sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> - NVM_ETS_DATA_INDEX_SHIFT); - sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> - NVM_ETS_DATA_LOC_SHIFT); - - if (sensor_location != 0) - hw->phy.ops.read_i2c_byte(hw, - e1000_emc_temp_data[sensor_index], - E1000_I2C_THERMAL_SENSOR_ADDR, - &data->sensor[i].temp); - } - return status; -} - -/** - * e1000_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds - * @hw: pointer to hardware structure - * - * Sets the thermal sensor thresholds according to the NVM map - * and save off the threshold and location values into mac.thermal_sensor_data - **/ -s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw) -{ - s32 status = E1000_SUCCESS; - u16 ets_offset; - u16 ets_cfg; - u16 ets_sensor; - u8 low_thresh_delta; - u8 num_sensors; - u8 sensor_index; - u8 sensor_location; - u8 therm_limit; - u8 i; - struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; - - DEBUGFUNC("e1000_init_thermal_sensor_thresh_generic"); - - if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0)) - return E1000_NOT_IMPLEMENTED; - - memset(data, 0, sizeof(struct e1000_thermal_sensor_data)); - - data->sensor[0].location = 0x1; - data->sensor[0].caution_thresh = - (E1000_READ_REG(hw, E1000_THHIGHTC) & 0xFF); - data->sensor[0].max_op_thresh = - (E1000_READ_REG(hw, E1000_THLOWTC) & 0xFF); - - /* Return the internal sensor only if ETS is unsupported */ - e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset); - if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) - return status; - - e1000_read_nvm(hw, ets_offset, 1, &ets_cfg); - if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) - != NVM_ETS_TYPE_EMC) - return E1000_NOT_IMPLEMENTED; - - low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >> - NVM_ETS_LTHRES_DELTA_SHIFT); - num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); - - for (i = 1; i <= num_sensors; i++) { - e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor); - sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> - NVM_ETS_DATA_INDEX_SHIFT); - sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> - NVM_ETS_DATA_LOC_SHIFT); - therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK; - - hw->phy.ops.write_i2c_byte(hw, - e1000_emc_therm_limit[sensor_index], - E1000_I2C_THERMAL_SENSOR_ADDR, - therm_limit); - - if ((i < E1000_MAX_SENSORS) && (sensor_location != 0)) { - data->sensor[i].location = sensor_location; - data->sensor[i].caution_thresh = therm_limit; - data->sensor[i].max_op_thresh = therm_limit - - low_thresh_delta; - } - } - return status; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h deleted file mode 100644 index 2e0dbb2f..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h +++ /dev/null @@ -1,494 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_82575_H_ -#define _E1000_82575_H_ - -#define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \ - (ID_LED_DEF1_DEF2 << 8) | \ - (ID_LED_DEF1_DEF2 << 4) | \ - (ID_LED_OFF1_ON2)) -/* - * Receive Address Register Count - * Number of high/low register pairs in the RAR. The RAR (Receive Address - * Registers) holds the directed and multicast addresses that we monitor. - * These entries are also used for MAC-based filtering. - */ -/* - * For 82576, there are an additional set of RARs that begin at an offset - * separate from the first set of RARs. - */ -#define E1000_RAR_ENTRIES_82575 16 -#define E1000_RAR_ENTRIES_82576 24 -#define E1000_RAR_ENTRIES_82580 24 -#define E1000_RAR_ENTRIES_I350 32 -#define E1000_SW_SYNCH_MB 0x00000100 -#define E1000_STAT_DEV_RST_SET 0x00100000 -#define E1000_CTRL_DEV_RST 0x20000000 - -struct e1000_adv_data_desc { - __le64 buffer_addr; /* Address of the descriptor's data buffer */ - union { - u32 data; - struct { - u32 datalen:16; /* Data buffer length */ - u32 rsvd:4; - u32 dtyp:4; /* Descriptor type */ - u32 dcmd:8; /* Descriptor command */ - } config; - } lower; - union { - u32 data; - struct { - u32 status:4; /* Descriptor status */ - u32 idx:4; - u32 popts:6; /* Packet Options */ - u32 paylen:18; /* Payload length */ - } options; - } upper; -}; - -#define E1000_TXD_DTYP_ADV_C 0x2 /* Advanced Context Descriptor */ -#define E1000_TXD_DTYP_ADV_D 0x3 /* Advanced Data Descriptor */ -#define E1000_ADV_TXD_CMD_DEXT 0x20 /* Descriptor extension (0 = legacy) */ -#define E1000_ADV_TUCMD_IPV4 0x2 /* IP Packet Type: 1=IPv4 */ -#define E1000_ADV_TUCMD_IPV6 0x0 /* IP Packet Type: 0=IPv6 */ -#define E1000_ADV_TUCMD_L4T_UDP 0x0 /* L4 Packet TYPE of UDP */ -#define E1000_ADV_TUCMD_L4T_TCP 0x4 /* L4 Packet TYPE of TCP */ -#define E1000_ADV_TUCMD_MKRREQ 0x10 /* Indicates markers are required */ -#define E1000_ADV_DCMD_EOP 0x1 /* End of Packet */ -#define E1000_ADV_DCMD_IFCS 0x2 /* Insert FCS (Ethernet CRC) */ -#define E1000_ADV_DCMD_RS 0x8 /* Report Status */ -#define E1000_ADV_DCMD_VLE 0x40 /* Add VLAN tag */ -#define E1000_ADV_DCMD_TSE 0x80 /* TCP Seg enable */ -/* Extended Device Control */ -#define E1000_CTRL_EXT_NSICR 0x00000001 /* Disable Intr Clear all on read */ - -struct e1000_adv_context_desc { - union { - u32 ip_config; - struct { - u32 iplen:9; - u32 maclen:7; - u32 vlan_tag:16; - } fields; - } ip_setup; - u32 seq_num; - union { - u64 l4_config; - struct { - u32 mkrloc:9; - u32 tucmd:11; - u32 dtyp:4; - u32 adv:8; - u32 rsvd:4; - u32 idx:4; - u32 l4len:8; - u32 mss:16; - } fields; - } l4_setup; -}; - -/* SRRCTL bit definitions */ -#define E1000_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */ -#define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 -#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ -#define E1000_SRRCTL_DESCTYPE_LEGACY 0x00000000 -#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 -#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 -#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 -#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION 0x06000000 -#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000 -#define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 -#define E1000_SRRCTL_TIMESTAMP 0x40000000 -#define E1000_SRRCTL_DROP_EN 0x80000000 - -#define E1000_SRRCTL_BSIZEPKT_MASK 0x0000007F -#define E1000_SRRCTL_BSIZEHDR_MASK 0x00003F00 - -#define E1000_TX_HEAD_WB_ENABLE 0x1 -#define E1000_TX_SEQNUM_WB_ENABLE 0x2 - -#define E1000_MRQC_ENABLE_RSS_4Q 0x00000002 -#define E1000_MRQC_ENABLE_VMDQ 0x00000003 -#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q 0x00000005 -#define E1000_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 -#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 -#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 -#define E1000_MRQC_ENABLE_RSS_8Q 0x00000002 - -#define E1000_VMRCTL_MIRROR_PORT_SHIFT 8 -#define E1000_VMRCTL_MIRROR_DSTPORT_MASK (7 << \ - E1000_VMRCTL_MIRROR_PORT_SHIFT) -#define E1000_VMRCTL_POOL_MIRROR_ENABLE (1 << 0) -#define E1000_VMRCTL_UPLINK_MIRROR_ENABLE (1 << 1) -#define E1000_VMRCTL_DOWNLINK_MIRROR_ENABLE (1 << 2) - -#define E1000_EICR_TX_QUEUE ( \ - E1000_EICR_TX_QUEUE0 | \ - E1000_EICR_TX_QUEUE1 | \ - E1000_EICR_TX_QUEUE2 | \ - E1000_EICR_TX_QUEUE3) - -#define E1000_EICR_RX_QUEUE ( \ - E1000_EICR_RX_QUEUE0 | \ - E1000_EICR_RX_QUEUE1 | \ - E1000_EICR_RX_QUEUE2 | \ - E1000_EICR_RX_QUEUE3) - -#define E1000_EIMS_RX_QUEUE E1000_EICR_RX_QUEUE -#define E1000_EIMS_TX_QUEUE E1000_EICR_TX_QUEUE - -#define EIMS_ENABLE_MASK ( \ - E1000_EIMS_RX_QUEUE | \ - E1000_EIMS_TX_QUEUE | \ - E1000_EIMS_TCP_TIMER | \ - E1000_EIMS_OTHER) - -/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */ -#define E1000_IMIR_PORT_IM_EN 0x00010000 /* TCP port enable */ -#define E1000_IMIR_PORT_BP 0x00020000 /* TCP port check bypass */ -#define E1000_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ -#define E1000_IMIREXT_CTRL_URG 0x00002000 /* Check URG bit in header */ -#define E1000_IMIREXT_CTRL_ACK 0x00004000 /* Check ACK bit in header */ -#define E1000_IMIREXT_CTRL_PSH 0x00008000 /* Check PSH bit in header */ -#define E1000_IMIREXT_CTRL_RST 0x00010000 /* Check RST bit in header */ -#define E1000_IMIREXT_CTRL_SYN 0x00020000 /* Check SYN bit in header */ -#define E1000_IMIREXT_CTRL_FIN 0x00040000 /* Check FIN bit in header */ -#define E1000_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of ctrl bits */ - -/* Receive Descriptor - Advanced */ -union e1000_adv_rx_desc { - struct { - __le64 pkt_addr; /* Packet buffer address */ - __le64 hdr_addr; /* Header buffer address */ - } read; - struct { - struct { - union { - __le32 data; - struct { - __le16 pkt_info; /*RSS type, Pkt type*/ - /* Split Header, header buffer len */ - __le16 hdr_info; - } hs_rss; - } lo_dword; - union { - __le32 rss; /* RSS Hash */ - struct { - __le16 ip_id; /* IP id */ - __le16 csum; /* Packet Checksum */ - } csum_ip; - } hi_dword; - } lower; - struct { - __le32 status_error; /* ext status/error */ - __le16 length; /* Packet length */ - __le16 vlan; /* VLAN tag */ - } upper; - } wb; /* writeback */ -}; - -#define E1000_RXDADV_RSSTYPE_MASK 0x0000000F -#define E1000_RXDADV_RSSTYPE_SHIFT 12 -#define E1000_RXDADV_HDRBUFLEN_MASK 0x7FE0 -#define E1000_RXDADV_HDRBUFLEN_SHIFT 5 -#define E1000_RXDADV_SPLITHEADER_EN 0x00001000 -#define E1000_RXDADV_SPH 0x8000 -#define E1000_RXDADV_STAT_TS 0x10000 /* Pkt was time stamped */ -#define E1000_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ -#define E1000_RXDADV_ERR_HBO 0x00800000 - -/* RSS Hash results */ -#define E1000_RXDADV_RSSTYPE_NONE 0x00000000 -#define E1000_RXDADV_RSSTYPE_IPV4_TCP 0x00000001 -#define E1000_RXDADV_RSSTYPE_IPV4 0x00000002 -#define E1000_RXDADV_RSSTYPE_IPV6_TCP 0x00000003 -#define E1000_RXDADV_RSSTYPE_IPV6_EX 0x00000004 -#define E1000_RXDADV_RSSTYPE_IPV6 0x00000005 -#define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006 -#define E1000_RXDADV_RSSTYPE_IPV4_UDP 0x00000007 -#define E1000_RXDADV_RSSTYPE_IPV6_UDP 0x00000008 -#define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009 - -/* RSS Packet Types as indicated in the receive descriptor */ -#define E1000_RXDADV_PKTTYPE_NONE 0x00000000 -#define E1000_RXDADV_PKTTYPE_IPV4 0x00000010 /* IPV4 hdr present */ -#define E1000_RXDADV_PKTTYPE_IPV4_EX 0x00000020 /* IPV4 hdr + extensions */ -#define E1000_RXDADV_PKTTYPE_IPV6 0x00000040 /* IPV6 hdr present */ -#define E1000_RXDADV_PKTTYPE_IPV6_EX 0x00000080 /* IPV6 hdr + extensions */ -#define E1000_RXDADV_PKTTYPE_TCP 0x00000100 /* TCP hdr present */ -#define E1000_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */ -#define E1000_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */ -#define E1000_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */ - -#define E1000_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */ -#define E1000_RXDADV_PKTTYPE_IPSEC_AH 0x00002000 /* IPSec AH */ -#define E1000_RXDADV_PKTTYPE_LINKSEC 0x00004000 /* LinkSec Encap */ -#define E1000_RXDADV_PKTTYPE_ETQF 0x00008000 /* PKTTYPE is ETQF index */ -#define E1000_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */ -#define E1000_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */ - -/* LinkSec results */ -/* Security Processing bit Indication */ -#define E1000_RXDADV_LNKSEC_STATUS_SECP 0x00020000 -#define E1000_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000 -#define E1000_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000 -#define E1000_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000 -#define E1000_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000 - -#define E1000_RXDADV_IPSEC_STATUS_SECP 0x00020000 -#define E1000_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000 -#define E1000_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000 -#define E1000_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000 -#define E1000_RXDADV_IPSEC_ERROR_AUTHENTICATION_FAILED 0x18000000 - -/* Transmit Descriptor - Advanced */ -union e1000_adv_tx_desc { - struct { - __le64 buffer_addr; /* Address of descriptor's data buf */ - __le32 cmd_type_len; - __le32 olinfo_status; - } read; - struct { - __le64 rsvd; /* Reserved */ - __le32 nxtseq_seed; - __le32 status; - } wb; -}; - -/* Adv Transmit Descriptor Config Masks */ -#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ -#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ -#define E1000_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ -#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ -#define E1000_ADVTXD_DCMD_RS 0x08000000 /* Report Status */ -#define E1000_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000 /* DDP hdr type or iSCSI */ -#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ -#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ -#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ -#define E1000_ADVTXD_MAC_LINKSEC 0x00040000 /* Apply LinkSec on pkt */ -#define E1000_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp pkt */ -#define E1000_ADVTXD_STAT_SN_CRC 0x00000002 /* NXTSEQ/SEED prsnt in WB */ -#define E1000_ADVTXD_IDX_SHIFT 4 /* Adv desc Index shift */ -#define E1000_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */ -#define E1000_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */ -#define E1000_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */ -/* 1st & Last TSO-full iSCSI PDU*/ -#define E1000_ADVTXD_POPTS_ISCO_FULL 0x00001800 -#define E1000_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ -#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ - -/* Context descriptors */ -struct e1000_adv_tx_context_desc { - __le32 vlan_macip_lens; - __le32 seqnum_seed; - __le32 type_tucmd_mlhl; - __le32 mss_l4len_idx; -}; - -#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ -#define E1000_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ -#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ -#define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ -#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ -#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ -#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ -#define E1000_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ -/* IPSec Encrypt Enable for ESP */ -#define E1000_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000 -/* Req requires Markers and CRC */ -#define E1000_ADVTXD_TUCMD_MKRREQ 0x00002000 -#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ -#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ -/* Adv ctxt IPSec SA IDX mask */ -#define E1000_ADVTXD_IPSEC_SA_INDEX_MASK 0x000000FF -/* Adv ctxt IPSec ESP len mask */ -#define E1000_ADVTXD_IPSEC_ESP_LEN_MASK 0x000000FF - -/* Additional Transmit Descriptor Control definitions */ -#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ -#define E1000_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wbk flushing */ -/* Tx Queue Arbitration Priority 0=low, 1=high */ -#define E1000_TXDCTL_PRIORITY 0x08000000 - -/* Additional Receive Descriptor Control definitions */ -#define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */ -#define E1000_RXDCTL_SWFLSH 0x04000000 /* Rx Desc. wbk flushing */ - -/* Direct Cache Access (DCA) definitions */ -#define E1000_DCA_CTRL_DCA_ENABLE 0x00000000 /* DCA Enable */ -#define E1000_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */ - -#define E1000_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */ -#define E1000_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */ - -#define E1000_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */ -#define E1000_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* DCA Rx Desc enable */ -#define E1000_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* DCA Rx Desc header ena */ -#define E1000_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* DCA Rx Desc payload ena */ -#define E1000_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx Desc Relax Order */ - -#define E1000_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ -#define E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ -#define E1000_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ -#define E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */ -#define E1000_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ - -#define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */ -#define E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */ -#define E1000_DCA_TXCTRL_CPUID_SHIFT_82576 24 /* Tx CPUID */ -#define E1000_DCA_RXCTRL_CPUID_SHIFT_82576 24 /* Rx CPUID */ - -/* Additional interrupt register bit definitions */ -#define E1000_ICR_LSECPNS 0x00000020 /* PN threshold - server */ -#define E1000_IMS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */ -#define E1000_ICS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */ - -/* ETQF register bit definitions */ -#define E1000_ETQF_FILTER_ENABLE (1 << 26) -#define E1000_ETQF_IMM_INT (1 << 29) -#define E1000_ETQF_1588 (1 << 30) -#define E1000_ETQF_QUEUE_ENABLE (1 << 31) -/* - * ETQF filter list: one static filter per filter consumer. This is - * to avoid filter collisions later. Add new filters - * here!! - * - * Current filters: - * EAPOL 802.1x (0x888e): Filter 0 - */ -#define E1000_ETQF_FILTER_EAPOL 0 - -#define E1000_FTQF_VF_BP 0x00008000 -#define E1000_FTQF_1588_TIME_STAMP 0x08000000 -#define E1000_FTQF_MASK 0xF0000000 -#define E1000_FTQF_MASK_PROTO_BP 0x10000000 -#define E1000_FTQF_MASK_SOURCE_ADDR_BP 0x20000000 -#define E1000_FTQF_MASK_DEST_ADDR_BP 0x40000000 -#define E1000_FTQF_MASK_SOURCE_PORT_BP 0x80000000 - -#define E1000_NVM_APME_82575 0x0400 -#define MAX_NUM_VFS 7 - -#define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF /* Per VF MAC spoof cntrl */ -#define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 /* Per VF VLAN spoof cntrl */ -#define E1000_DTXSWC_LLE_MASK 0x00FF0000 /* Per VF Local LB enables */ -#define E1000_DTXSWC_VLAN_SPOOF_SHIFT 8 -#define E1000_DTXSWC_LLE_SHIFT 16 -#define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31) /* global VF LB enable */ - -/* Easy defines for setting default pool, would normally be left a zero */ -#define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7 -#define E1000_VT_CTL_DEFAULT_POOL_MASK (0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT) - -/* Other useful VMD_CTL register defines */ -#define E1000_VT_CTL_IGNORE_MAC (1 << 28) -#define E1000_VT_CTL_DISABLE_DEF_POOL (1 << 29) -#define E1000_VT_CTL_VM_REPL_EN (1 << 30) - -/* Per VM Offload register setup */ -#define E1000_VMOLR_RLPML_MASK 0x00003FFF /* Long Packet Maximum Length mask */ -#define E1000_VMOLR_LPE 0x00010000 /* Accept Long packet */ -#define E1000_VMOLR_RSSE 0x00020000 /* Enable RSS */ -#define E1000_VMOLR_AUPE 0x01000000 /* Accept untagged packets */ -#define E1000_VMOLR_ROMPE 0x02000000 /* Accept overflow multicast */ -#define E1000_VMOLR_ROPE 0x04000000 /* Accept overflow unicast */ -#define E1000_VMOLR_BAM 0x08000000 /* Accept Broadcast packets */ -#define E1000_VMOLR_MPME 0x10000000 /* Multicast promiscuous mode */ -#define E1000_VMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ -#define E1000_VMOLR_STRCRC 0x80000000 /* CRC stripping enable */ - -#define E1000_VMOLR_VPE 0x00800000 /* VLAN promiscuous enable */ -#define E1000_VMOLR_UPE 0x20000000 /* Unicast promisuous enable */ -#define E1000_DVMOLR_HIDVLAN 0x20000000 /* Vlan hiding enable */ -#define E1000_DVMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ -#define E1000_DVMOLR_STRCRC 0x80000000 /* CRC stripping enable */ - -#define E1000_PBRWAC_WALPB 0x00000007 /* Wrap around event on LAN Rx PB */ -#define E1000_PBRWAC_PBE 0x00000008 /* Rx packet buffer empty */ - -#define E1000_VLVF_ARRAY_SIZE 32 -#define E1000_VLVF_VLANID_MASK 0x00000FFF -#define E1000_VLVF_POOLSEL_SHIFT 12 -#define E1000_VLVF_POOLSEL_MASK (0xFF << E1000_VLVF_POOLSEL_SHIFT) -#define E1000_VLVF_LVLAN 0x00100000 -#define E1000_VLVF_VLANID_ENABLE 0x80000000 - -#define E1000_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */ -#define E1000_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */ - -#define E1000_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */ - -#define E1000_IOVCTL 0x05BBC -#define E1000_IOVCTL_REUSE_VFQ 0x00000001 - -#define E1000_RPLOLR_STRVLAN 0x40000000 -#define E1000_RPLOLR_STRCRC 0x80000000 - -#define E1000_TCTL_EXT_COLD 0x000FFC00 -#define E1000_TCTL_EXT_COLD_SHIFT 10 - -#define E1000_DTXCTL_8023LL 0x0004 -#define E1000_DTXCTL_VLAN_ADDED 0x0008 -#define E1000_DTXCTL_OOS_ENABLE 0x0010 -#define E1000_DTXCTL_MDP_EN 0x0020 -#define E1000_DTXCTL_SPOOF_INT 0x0040 - -#define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT (1 << 14) - -#define ALL_QUEUES 0xFFFF - -/* Rx packet buffer size defines */ -#define E1000_RXPBS_SIZE_MASK_82576 0x0000007F -void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable); -void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf); -void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable); -s32 e1000_init_nvm_params_82575(struct e1000_hw *hw); - -u16 e1000_rxpbs_adjust_82580(u32 data); -s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data); -s32 e1000_set_eee_i350(struct e1000_hw *); -s32 e1000_set_eee_i354(struct e1000_hw *); -s32 e1000_get_eee_status_i354(struct e1000_hw *, bool *); -#define E1000_I2C_THERMAL_SENSOR_ADDR 0xF8 -#define E1000_EMC_INTERNAL_DATA 0x00 -#define E1000_EMC_INTERNAL_THERM_LIMIT 0x20 -#define E1000_EMC_DIODE1_DATA 0x01 -#define E1000_EMC_DIODE1_THERM_LIMIT 0x19 -#define E1000_EMC_DIODE2_DATA 0x23 -#define E1000_EMC_DIODE2_THERM_LIMIT 0x1A -#define E1000_EMC_DIODE3_DATA 0x2A -#define E1000_EMC_DIODE3_THERM_LIMIT 0x30 - -s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw); -s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw); - -/* I2C SDA and SCL timing parameters for standard mode */ -#define E1000_I2C_T_HD_STA 4 -#define E1000_I2C_T_LOW 5 -#define E1000_I2C_T_HIGH 4 -#define E1000_I2C_T_SU_STA 5 -#define E1000_I2C_T_HD_DATA 5 -#define E1000_I2C_T_SU_DATA 1 -#define E1000_I2C_T_RISE 1 -#define E1000_I2C_T_FALL 1 -#define E1000_I2C_T_SU_STO 4 -#define E1000_I2C_T_BUF 5 - -s32 e1000_set_i2c_bb(struct e1000_hw *hw); -s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 *data); -s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 data); -void e1000_i2c_bus_clear(struct e1000_hw *hw); -#endif /* _E1000_82575_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c deleted file mode 100644 index 3e54e50e..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c +++ /dev/null @@ -1,1144 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "e1000_api.h" - -/** - * e1000_init_mac_params - Initialize MAC function pointers - * @hw: pointer to the HW structure - * - * This function initializes the function pointers for the MAC - * set of functions. Called by drivers or by e1000_setup_init_funcs. - **/ -s32 e1000_init_mac_params(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - - if (hw->mac.ops.init_params) { - ret_val = hw->mac.ops.init_params(hw); - if (ret_val) { - DEBUGOUT("MAC Initialization Error\n"); - goto out; - } - } else { - DEBUGOUT("mac.init_mac_params was NULL\n"); - ret_val = -E1000_ERR_CONFIG; - } - -out: - return ret_val; -} - -/** - * e1000_init_nvm_params - Initialize NVM function pointers - * @hw: pointer to the HW structure - * - * This function initializes the function pointers for the NVM - * set of functions. Called by drivers or by e1000_setup_init_funcs. - **/ -s32 e1000_init_nvm_params(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - - if (hw->nvm.ops.init_params) { - ret_val = hw->nvm.ops.init_params(hw); - if (ret_val) { - DEBUGOUT("NVM Initialization Error\n"); - goto out; - } - } else { - DEBUGOUT("nvm.init_nvm_params was NULL\n"); - ret_val = -E1000_ERR_CONFIG; - } - -out: - return ret_val; -} - -/** - * e1000_init_phy_params - Initialize PHY function pointers - * @hw: pointer to the HW structure - * - * This function initializes the function pointers for the PHY - * set of functions. Called by drivers or by e1000_setup_init_funcs. - **/ -s32 e1000_init_phy_params(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - - if (hw->phy.ops.init_params) { - ret_val = hw->phy.ops.init_params(hw); - if (ret_val) { - DEBUGOUT("PHY Initialization Error\n"); - goto out; - } - } else { - DEBUGOUT("phy.init_phy_params was NULL\n"); - ret_val = -E1000_ERR_CONFIG; - } - -out: - return ret_val; -} - -/** - * e1000_init_mbx_params - Initialize mailbox function pointers - * @hw: pointer to the HW structure - * - * This function initializes the function pointers for the PHY - * set of functions. Called by drivers or by e1000_setup_init_funcs. - **/ -s32 e1000_init_mbx_params(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - - if (hw->mbx.ops.init_params) { - ret_val = hw->mbx.ops.init_params(hw); - if (ret_val) { - DEBUGOUT("Mailbox Initialization Error\n"); - goto out; - } - } else { - DEBUGOUT("mbx.init_mbx_params was NULL\n"); - ret_val = -E1000_ERR_CONFIG; - } - -out: - return ret_val; -} - -/** - * e1000_set_mac_type - Sets MAC type - * @hw: pointer to the HW structure - * - * This function sets the mac type of the adapter based on the - * device ID stored in the hw structure. - * MUST BE FIRST FUNCTION CALLED (explicitly or through - * e1000_setup_init_funcs()). - **/ -s32 e1000_set_mac_type(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("e1000_set_mac_type"); - - switch (hw->device_id) { - case E1000_DEV_ID_82575EB_COPPER: - case E1000_DEV_ID_82575EB_FIBER_SERDES: - case E1000_DEV_ID_82575GB_QUAD_COPPER: - mac->type = e1000_82575; - break; - case E1000_DEV_ID_82576: - case E1000_DEV_ID_82576_FIBER: - case E1000_DEV_ID_82576_SERDES: - case E1000_DEV_ID_82576_QUAD_COPPER: - case E1000_DEV_ID_82576_QUAD_COPPER_ET2: - case E1000_DEV_ID_82576_NS: - case E1000_DEV_ID_82576_NS_SERDES: - case E1000_DEV_ID_82576_SERDES_QUAD: - mac->type = e1000_82576; - break; - case E1000_DEV_ID_82580_COPPER: - case E1000_DEV_ID_82580_FIBER: - case E1000_DEV_ID_82580_SERDES: - case E1000_DEV_ID_82580_SGMII: - case E1000_DEV_ID_82580_COPPER_DUAL: - case E1000_DEV_ID_82580_QUAD_FIBER: - case E1000_DEV_ID_DH89XXCC_SGMII: - case E1000_DEV_ID_DH89XXCC_SERDES: - case E1000_DEV_ID_DH89XXCC_BACKPLANE: - case E1000_DEV_ID_DH89XXCC_SFP: - mac->type = e1000_82580; - break; - case E1000_DEV_ID_I350_COPPER: - case E1000_DEV_ID_I350_FIBER: - case E1000_DEV_ID_I350_SERDES: - case E1000_DEV_ID_I350_SGMII: - case E1000_DEV_ID_I350_DA4: - mac->type = e1000_i350; - break; - case E1000_DEV_ID_I210_COPPER_FLASHLESS: - case E1000_DEV_ID_I210_SERDES_FLASHLESS: - case E1000_DEV_ID_I210_COPPER: - case E1000_DEV_ID_I210_COPPER_OEM1: - case E1000_DEV_ID_I210_COPPER_IT: - case E1000_DEV_ID_I210_FIBER: - case E1000_DEV_ID_I210_SERDES: - case E1000_DEV_ID_I210_SGMII: - mac->type = e1000_i210; - break; - case E1000_DEV_ID_I211_COPPER: - mac->type = e1000_i211; - break; - - case E1000_DEV_ID_I354_BACKPLANE_1GBPS: - case E1000_DEV_ID_I354_SGMII: - case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS: - mac->type = e1000_i354; - break; - default: - /* Should never have loaded on this device */ - ret_val = -E1000_ERR_MAC_INIT; - break; - } - - return ret_val; -} - -/** - * e1000_setup_init_funcs - Initializes function pointers - * @hw: pointer to the HW structure - * @init_device: true will initialize the rest of the function pointers - * getting the device ready for use. false will only set - * MAC type and the function pointers for the other init - * functions. Passing false will not generate any hardware - * reads or writes. - * - * This function must be called by a driver in order to use the rest - * of the 'shared' code files. Called by drivers only. - **/ -s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device) -{ - s32 ret_val; - - /* Can't do much good without knowing the MAC type. */ - ret_val = e1000_set_mac_type(hw); - if (ret_val) { - DEBUGOUT("ERROR: MAC type could not be set properly.\n"); - goto out; - } - - if (!hw->hw_addr) { - DEBUGOUT("ERROR: Registers not mapped\n"); - ret_val = -E1000_ERR_CONFIG; - goto out; - } - - /* - * Init function pointers to generic implementations. We do this first - * allowing a driver module to override it afterward. - */ - e1000_init_mac_ops_generic(hw); - e1000_init_phy_ops_generic(hw); - e1000_init_nvm_ops_generic(hw); - e1000_init_mbx_ops_generic(hw); - - /* - * Set up the init function pointers. These are functions within the - * adapter family file that sets up function pointers for the rest of - * the functions in that family. - */ - switch (hw->mac.type) { - case e1000_82575: - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - e1000_init_function_pointers_82575(hw); - break; - case e1000_i210: - case e1000_i211: - e1000_init_function_pointers_i210(hw); - break; - default: - DEBUGOUT("Hardware not supported\n"); - ret_val = -E1000_ERR_CONFIG; - break; - } - - /* - * Initialize the rest of the function pointers. These require some - * register reads/writes in some cases. - */ - if (!(ret_val) && init_device) { - ret_val = e1000_init_mac_params(hw); - if (ret_val) - goto out; - - ret_val = e1000_init_nvm_params(hw); - if (ret_val) - goto out; - - ret_val = e1000_init_phy_params(hw); - if (ret_val) - goto out; - - ret_val = e1000_init_mbx_params(hw); - if (ret_val) - goto out; - } - -out: - return ret_val; -} - -/** - * e1000_get_bus_info - Obtain bus information for adapter - * @hw: pointer to the HW structure - * - * This will obtain information about the HW bus for which the - * adapter is attached and stores it in the hw structure. This is a - * function pointer entry point called by drivers. - **/ -s32 e1000_get_bus_info(struct e1000_hw *hw) -{ - if (hw->mac.ops.get_bus_info) - return hw->mac.ops.get_bus_info(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_clear_vfta - Clear VLAN filter table - * @hw: pointer to the HW structure - * - * This clears the VLAN filter table on the adapter. This is a function - * pointer entry point called by drivers. - **/ -void e1000_clear_vfta(struct e1000_hw *hw) -{ - if (hw->mac.ops.clear_vfta) - hw->mac.ops.clear_vfta(hw); -} - -/** - * e1000_write_vfta - Write value to VLAN filter table - * @hw: pointer to the HW structure - * @offset: the 32-bit offset in which to write the value to. - * @value: the 32-bit value to write at location offset. - * - * This writes a 32-bit value to a 32-bit offset in the VLAN filter - * table. This is a function pointer entry point called by drivers. - **/ -void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) -{ - if (hw->mac.ops.write_vfta) - hw->mac.ops.write_vfta(hw, offset, value); -} - -/** - * e1000_update_mc_addr_list - Update Multicast addresses - * @hw: pointer to the HW structure - * @mc_addr_list: array of multicast addresses to program - * @mc_addr_count: number of multicast addresses to program - * - * Updates the Multicast Table Array. - * The caller must have a packed mc_addr_list of multicast addresses. - **/ -void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, - u32 mc_addr_count) -{ - if (hw->mac.ops.update_mc_addr_list) - hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, - mc_addr_count); -} - -/** - * e1000_force_mac_fc - Force MAC flow control - * @hw: pointer to the HW structure - * - * Force the MAC's flow control settings. Currently no func pointer exists - * and all implementations are handled in the generic version of this - * function. - **/ -s32 e1000_force_mac_fc(struct e1000_hw *hw) -{ - return e1000_force_mac_fc_generic(hw); -} - -/** - * e1000_check_for_link - Check/Store link connection - * @hw: pointer to the HW structure - * - * This checks the link condition of the adapter and stores the - * results in the hw->mac structure. This is a function pointer entry - * point called by drivers. - **/ -s32 e1000_check_for_link(struct e1000_hw *hw) -{ - if (hw->mac.ops.check_for_link) - return hw->mac.ops.check_for_link(hw); - - return -E1000_ERR_CONFIG; -} - -/** - * e1000_check_mng_mode - Check management mode - * @hw: pointer to the HW structure - * - * This checks if the adapter has manageability enabled. - * This is a function pointer entry point called by drivers. - **/ -bool e1000_check_mng_mode(struct e1000_hw *hw) -{ - if (hw->mac.ops.check_mng_mode) - return hw->mac.ops.check_mng_mode(hw); - - return false; -} - -/** - * e1000_mng_write_dhcp_info - Writes DHCP info to host interface - * @hw: pointer to the HW structure - * @buffer: pointer to the host interface - * @length: size of the buffer - * - * Writes the DHCP information to the host interface. - **/ -s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length) -{ - return e1000_mng_write_dhcp_info_generic(hw, buffer, length); -} - -/** - * e1000_reset_hw - Reset hardware - * @hw: pointer to the HW structure - * - * This resets the hardware into a known state. This is a function pointer - * entry point called by drivers. - **/ -s32 e1000_reset_hw(struct e1000_hw *hw) -{ - if (hw->mac.ops.reset_hw) - return hw->mac.ops.reset_hw(hw); - - return -E1000_ERR_CONFIG; -} - -/** - * e1000_init_hw - Initialize hardware - * @hw: pointer to the HW structure - * - * This inits the hardware readying it for operation. This is a function - * pointer entry point called by drivers. - **/ -s32 e1000_init_hw(struct e1000_hw *hw) -{ - if (hw->mac.ops.init_hw) - return hw->mac.ops.init_hw(hw); - - return -E1000_ERR_CONFIG; -} - -/** - * e1000_setup_link - Configures link and flow control - * @hw: pointer to the HW structure - * - * This configures link and flow control settings for the adapter. This - * is a function pointer entry point called by drivers. While modules can - * also call this, they probably call their own version of this function. - **/ -s32 e1000_setup_link(struct e1000_hw *hw) -{ - if (hw->mac.ops.setup_link) - return hw->mac.ops.setup_link(hw); - - return -E1000_ERR_CONFIG; -} - -/** - * e1000_get_speed_and_duplex - Returns current speed and duplex - * @hw: pointer to the HW structure - * @speed: pointer to a 16-bit value to store the speed - * @duplex: pointer to a 16-bit value to store the duplex. - * - * This returns the speed and duplex of the adapter in the two 'out' - * variables passed in. This is a function pointer entry point called - * by drivers. - **/ -s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex) -{ - if (hw->mac.ops.get_link_up_info) - return hw->mac.ops.get_link_up_info(hw, speed, duplex); - - return -E1000_ERR_CONFIG; -} - -/** - * e1000_setup_led - Configures SW controllable LED - * @hw: pointer to the HW structure - * - * This prepares the SW controllable LED for use and saves the current state - * of the LED so it can be later restored. This is a function pointer entry - * point called by drivers. - **/ -s32 e1000_setup_led(struct e1000_hw *hw) -{ - if (hw->mac.ops.setup_led) - return hw->mac.ops.setup_led(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_cleanup_led - Restores SW controllable LED - * @hw: pointer to the HW structure - * - * This restores the SW controllable LED to the value saved off by - * e1000_setup_led. This is a function pointer entry point called by drivers. - **/ -s32 e1000_cleanup_led(struct e1000_hw *hw) -{ - if (hw->mac.ops.cleanup_led) - return hw->mac.ops.cleanup_led(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_blink_led - Blink SW controllable LED - * @hw: pointer to the HW structure - * - * This starts the adapter LED blinking. Request the LED to be setup first - * and cleaned up after. This is a function pointer entry point called by - * drivers. - **/ -s32 e1000_blink_led(struct e1000_hw *hw) -{ - if (hw->mac.ops.blink_led) - return hw->mac.ops.blink_led(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_id_led_init - store LED configurations in SW - * @hw: pointer to the HW structure - * - * Initializes the LED config in SW. This is a function pointer entry point - * called by drivers. - **/ -s32 e1000_id_led_init(struct e1000_hw *hw) -{ - if (hw->mac.ops.id_led_init) - return hw->mac.ops.id_led_init(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_led_on - Turn on SW controllable LED - * @hw: pointer to the HW structure - * - * Turns the SW defined LED on. This is a function pointer entry point - * called by drivers. - **/ -s32 e1000_led_on(struct e1000_hw *hw) -{ - if (hw->mac.ops.led_on) - return hw->mac.ops.led_on(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_led_off - Turn off SW controllable LED - * @hw: pointer to the HW structure - * - * Turns the SW defined LED off. This is a function pointer entry point - * called by drivers. - **/ -s32 e1000_led_off(struct e1000_hw *hw) -{ - if (hw->mac.ops.led_off) - return hw->mac.ops.led_off(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_reset_adaptive - Reset adaptive IFS - * @hw: pointer to the HW structure - * - * Resets the adaptive IFS. Currently no func pointer exists and all - * implementations are handled in the generic version of this function. - **/ -void e1000_reset_adaptive(struct e1000_hw *hw) -{ - e1000_reset_adaptive_generic(hw); -} - -/** - * e1000_update_adaptive - Update adaptive IFS - * @hw: pointer to the HW structure - * - * Updates adapter IFS. Currently no func pointer exists and all - * implementations are handled in the generic version of this function. - **/ -void e1000_update_adaptive(struct e1000_hw *hw) -{ - e1000_update_adaptive_generic(hw); -} - -/** - * e1000_disable_pcie_master - Disable PCI-Express master access - * @hw: pointer to the HW structure - * - * Disables PCI-Express master access and verifies there are no pending - * requests. Currently no func pointer exists and all implementations are - * handled in the generic version of this function. - **/ -s32 e1000_disable_pcie_master(struct e1000_hw *hw) -{ - return e1000_disable_pcie_master_generic(hw); -} - -/** - * e1000_config_collision_dist - Configure collision distance - * @hw: pointer to the HW structure - * - * Configures the collision distance to the default value and is used - * during link setup. - **/ -void e1000_config_collision_dist(struct e1000_hw *hw) -{ - if (hw->mac.ops.config_collision_dist) - hw->mac.ops.config_collision_dist(hw); -} - -/** - * e1000_rar_set - Sets a receive address register - * @hw: pointer to the HW structure - * @addr: address to set the RAR to - * @index: the RAR to set - * - * Sets a Receive Address Register (RAR) to the specified address. - **/ -void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) -{ - if (hw->mac.ops.rar_set) - hw->mac.ops.rar_set(hw, addr, index); -} - -/** - * e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state - * @hw: pointer to the HW structure - * - * Ensures that the MDI/MDIX SW state is valid. - **/ -s32 e1000_validate_mdi_setting(struct e1000_hw *hw) -{ - if (hw->mac.ops.validate_mdi_setting) - return hw->mac.ops.validate_mdi_setting(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_hash_mc_addr - Determines address location in multicast table - * @hw: pointer to the HW structure - * @mc_addr: Multicast address to hash. - * - * This hashes an address to determine its location in the multicast - * table. Currently no func pointer exists and all implementations - * are handled in the generic version of this function. - **/ -u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) -{ - return e1000_hash_mc_addr_generic(hw, mc_addr); -} - -/** - * e1000_enable_tx_pkt_filtering - Enable packet filtering on TX - * @hw: pointer to the HW structure - * - * Enables packet filtering on transmit packets if manageability is enabled - * and host interface is enabled. - * Currently no func pointer exists and all implementations are handled in the - * generic version of this function. - **/ -bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw) -{ - return e1000_enable_tx_pkt_filtering_generic(hw); -} - -/** - * e1000_mng_host_if_write - Writes to the manageability host interface - * @hw: pointer to the HW structure - * @buffer: pointer to the host interface buffer - * @length: size of the buffer - * @offset: location in the buffer to write to - * @sum: sum of the data (not checksum) - * - * This function writes the buffer content at the offset given on the host if. - * It also does alignment considerations to do the writes in most efficient - * way. Also fills up the sum of the buffer in *buffer parameter. - **/ -s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length, - u16 offset, u8 *sum) -{ - return e1000_mng_host_if_write_generic(hw, buffer, length, offset, sum); -} - -/** - * e1000_mng_write_cmd_header - Writes manageability command header - * @hw: pointer to the HW structure - * @hdr: pointer to the host interface command header - * - * Writes the command header after does the checksum calculation. - **/ -s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, - struct e1000_host_mng_command_header *hdr) -{ - return e1000_mng_write_cmd_header_generic(hw, hdr); -} - -/** - * e1000_mng_enable_host_if - Checks host interface is enabled - * @hw: pointer to the HW structure - * - * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND - * - * This function checks whether the HOST IF is enabled for command operation - * and also checks whether the previous command is completed. It busy waits - * in case of previous command is not completed. - **/ -s32 e1000_mng_enable_host_if(struct e1000_hw *hw) -{ - return e1000_mng_enable_host_if_generic(hw); -} - -/** - * e1000_check_reset_block - Verifies PHY can be reset - * @hw: pointer to the HW structure - * - * Checks if the PHY is in a state that can be reset or if manageability - * has it tied up. This is a function pointer entry point called by drivers. - **/ -s32 e1000_check_reset_block(struct e1000_hw *hw) -{ - if (hw->phy.ops.check_reset_block) - return hw->phy.ops.check_reset_block(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_read_phy_reg - Reads PHY register - * @hw: pointer to the HW structure - * @offset: the register to read - * @data: the buffer to store the 16-bit read. - * - * Reads the PHY register and returns the value in data. - * This is a function pointer entry point called by drivers. - **/ -s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data) -{ - if (hw->phy.ops.read_reg) - return hw->phy.ops.read_reg(hw, offset, data); - - return E1000_SUCCESS; -} - -/** - * e1000_write_phy_reg - Writes PHY register - * @hw: pointer to the HW structure - * @offset: the register to write - * @data: the value to write. - * - * Writes the PHY register at offset with the value in data. - * This is a function pointer entry point called by drivers. - **/ -s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data) -{ - if (hw->phy.ops.write_reg) - return hw->phy.ops.write_reg(hw, offset, data); - - return E1000_SUCCESS; -} - -/** - * e1000_release_phy - Generic release PHY - * @hw: pointer to the HW structure - * - * Return if silicon family does not require a semaphore when accessing the - * PHY. - **/ -void e1000_release_phy(struct e1000_hw *hw) -{ - if (hw->phy.ops.release) - hw->phy.ops.release(hw); -} - -/** - * e1000_acquire_phy - Generic acquire PHY - * @hw: pointer to the HW structure - * - * Return success if silicon family does not require a semaphore when - * accessing the PHY. - **/ -s32 e1000_acquire_phy(struct e1000_hw *hw) -{ - if (hw->phy.ops.acquire) - return hw->phy.ops.acquire(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_read_kmrn_reg - Reads register using Kumeran interface - * @hw: pointer to the HW structure - * @offset: the register to read - * @data: the location to store the 16-bit value read. - * - * Reads a register out of the Kumeran interface. Currently no func pointer - * exists and all implementations are handled in the generic version of - * this function. - **/ -s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data) -{ - return e1000_read_kmrn_reg_generic(hw, offset, data); -} - -/** - * e1000_write_kmrn_reg - Writes register using Kumeran interface - * @hw: pointer to the HW structure - * @offset: the register to write - * @data: the value to write. - * - * Writes a register to the Kumeran interface. Currently no func pointer - * exists and all implementations are handled in the generic version of - * this function. - **/ -s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data) -{ - return e1000_write_kmrn_reg_generic(hw, offset, data); -} - -/** - * e1000_get_cable_length - Retrieves cable length estimation - * @hw: pointer to the HW structure - * - * This function estimates the cable length and stores them in - * hw->phy.min_length and hw->phy.max_length. This is a function pointer - * entry point called by drivers. - **/ -s32 e1000_get_cable_length(struct e1000_hw *hw) -{ - if (hw->phy.ops.get_cable_length) - return hw->phy.ops.get_cable_length(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_get_phy_info - Retrieves PHY information from registers - * @hw: pointer to the HW structure - * - * This function gets some information from various PHY registers and - * populates hw->phy values with it. This is a function pointer entry - * point called by drivers. - **/ -s32 e1000_get_phy_info(struct e1000_hw *hw) -{ - if (hw->phy.ops.get_info) - return hw->phy.ops.get_info(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_phy_hw_reset - Hard PHY reset - * @hw: pointer to the HW structure - * - * Performs a hard PHY reset. This is a function pointer entry point called - * by drivers. - **/ -s32 e1000_phy_hw_reset(struct e1000_hw *hw) -{ - if (hw->phy.ops.reset) - return hw->phy.ops.reset(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_phy_commit - Soft PHY reset - * @hw: pointer to the HW structure - * - * Performs a soft PHY reset on those that apply. This is a function pointer - * entry point called by drivers. - **/ -s32 e1000_phy_commit(struct e1000_hw *hw) -{ - if (hw->phy.ops.commit) - return hw->phy.ops.commit(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_set_d0_lplu_state - Sets low power link up state for D0 - * @hw: pointer to the HW structure - * @active: boolean used to enable/disable lplu - * - * Success returns 0, Failure returns 1 - * - * The low power link up (lplu) state is set to the power management level D0 - * and SmartSpeed is disabled when active is true, else clear lplu for D0 - * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU - * is used during Dx states where the power conservation is most important. - * During driver activity, SmartSpeed should be enabled so performance is - * maintained. This is a function pointer entry point called by drivers. - **/ -s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active) -{ - if (hw->phy.ops.set_d0_lplu_state) - return hw->phy.ops.set_d0_lplu_state(hw, active); - - return E1000_SUCCESS; -} - -/** - * e1000_set_d3_lplu_state - Sets low power link up state for D3 - * @hw: pointer to the HW structure - * @active: boolean used to enable/disable lplu - * - * Success returns 0, Failure returns 1 - * - * The low power link up (lplu) state is set to the power management level D3 - * and SmartSpeed is disabled when active is true, else clear lplu for D3 - * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU - * is used during Dx states where the power conservation is most important. - * During driver activity, SmartSpeed should be enabled so performance is - * maintained. This is a function pointer entry point called by drivers. - **/ -s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) -{ - if (hw->phy.ops.set_d3_lplu_state) - return hw->phy.ops.set_d3_lplu_state(hw, active); - - return E1000_SUCCESS; -} - -/** - * e1000_read_mac_addr - Reads MAC address - * @hw: pointer to the HW structure - * - * Reads the MAC address out of the adapter and stores it in the HW structure. - * Currently no func pointer exists and all implementations are handled in the - * generic version of this function. - **/ -s32 e1000_read_mac_addr(struct e1000_hw *hw) -{ - if (hw->mac.ops.read_mac_addr) - return hw->mac.ops.read_mac_addr(hw); - - return e1000_read_mac_addr_generic(hw); -} - -/** - * e1000_read_pba_string - Read device part number string - * @hw: pointer to the HW structure - * @pba_num: pointer to device part number - * @pba_num_size: size of part number buffer - * - * Reads the product board assembly (PBA) number from the EEPROM and stores - * the value in pba_num. - * Currently no func pointer exists and all implementations are handled in the - * generic version of this function. - **/ -s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size) -{ - return e1000_read_pba_string_generic(hw, pba_num, pba_num_size); -} - -/** - * e1000_read_pba_length - Read device part number string length - * @hw: pointer to the HW structure - * @pba_num_size: size of part number buffer - * - * Reads the product board assembly (PBA) number length from the EEPROM and - * stores the value in pba_num. - * Currently no func pointer exists and all implementations are handled in the - * generic version of this function. - **/ -s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size) -{ - return e1000_read_pba_length_generic(hw, pba_num_size); -} - -/** - * e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum - * @hw: pointer to the HW structure - * - * Validates the NVM checksum is correct. This is a function pointer entry - * point called by drivers. - **/ -s32 e1000_validate_nvm_checksum(struct e1000_hw *hw) -{ - if (hw->nvm.ops.validate) - return hw->nvm.ops.validate(hw); - - return -E1000_ERR_CONFIG; -} - -/** - * e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum - * @hw: pointer to the HW structure - * - * Updates the NVM checksum. Currently no func pointer exists and all - * implementations are handled in the generic version of this function. - **/ -s32 e1000_update_nvm_checksum(struct e1000_hw *hw) -{ - if (hw->nvm.ops.update) - return hw->nvm.ops.update(hw); - - return -E1000_ERR_CONFIG; -} - -/** - * e1000_reload_nvm - Reloads EEPROM - * @hw: pointer to the HW structure - * - * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the - * extended control register. - **/ -void e1000_reload_nvm(struct e1000_hw *hw) -{ - if (hw->nvm.ops.reload) - hw->nvm.ops.reload(hw); -} - -/** - * e1000_read_nvm - Reads NVM (EEPROM) - * @hw: pointer to the HW structure - * @offset: the word offset to read - * @words: number of 16-bit words to read - * @data: pointer to the properly sized buffer for the data. - * - * Reads 16-bit chunks of data from the NVM (EEPROM). This is a function - * pointer entry point called by drivers. - **/ -s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) -{ - if (hw->nvm.ops.read) - return hw->nvm.ops.read(hw, offset, words, data); - - return -E1000_ERR_CONFIG; -} - -/** - * e1000_write_nvm - Writes to NVM (EEPROM) - * @hw: pointer to the HW structure - * @offset: the word offset to read - * @words: number of 16-bit words to write - * @data: pointer to the properly sized buffer for the data. - * - * Writes 16-bit chunks of data to the NVM (EEPROM). This is a function - * pointer entry point called by drivers. - **/ -s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) -{ - if (hw->nvm.ops.write) - return hw->nvm.ops.write(hw, offset, words, data); - - return E1000_SUCCESS; -} - -/** - * e1000_write_8bit_ctrl_reg - Writes 8bit Control register - * @hw: pointer to the HW structure - * @reg: 32bit register offset - * @offset: the register to write - * @data: the value to write. - * - * Writes the PHY register at offset with the value in data. - * This is a function pointer entry point called by drivers. - **/ -s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, - u8 data) -{ - return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data); -} - -/** - * e1000_power_up_phy - Restores link in case of PHY power down - * @hw: pointer to the HW structure - * - * The phy may be powered down to save power, to turn off link when the - * driver is unloaded, or wake on lan is not enabled (among others). - **/ -void e1000_power_up_phy(struct e1000_hw *hw) -{ - if (hw->phy.ops.power_up) - hw->phy.ops.power_up(hw); - - e1000_setup_link(hw); -} - -/** - * e1000_power_down_phy - Power down PHY - * @hw: pointer to the HW structure - * - * The phy may be powered down to save power, to turn off link when the - * driver is unloaded, or wake on lan is not enabled (among others). - **/ -void e1000_power_down_phy(struct e1000_hw *hw) -{ - if (hw->phy.ops.power_down) - hw->phy.ops.power_down(hw); -} - -/** - * e1000_power_up_fiber_serdes_link - Power up serdes link - * @hw: pointer to the HW structure - * - * Power on the optics and PCS. - **/ -void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw) -{ - if (hw->mac.ops.power_up_serdes) - hw->mac.ops.power_up_serdes(hw); -} - -/** - * e1000_shutdown_fiber_serdes_link - Remove link during power down - * @hw: pointer to the HW structure - * - * Shutdown the optics and PCS on driver unload. - **/ -void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw) -{ - if (hw->mac.ops.shutdown_serdes) - hw->mac.ops.shutdown_serdes(hw); -} - -/** - * e1000_get_thermal_sensor_data - Gathers thermal sensor data - * @hw: pointer to hardware structure - * - * Updates the temperatures in mac.thermal_sensor_data - **/ -s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw) -{ - if (hw->mac.ops.get_thermal_sensor_data) - return hw->mac.ops.get_thermal_sensor_data(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_init_thermal_sensor_thresh - Sets thermal sensor thresholds - * @hw: pointer to hardware structure - * - * Sets the thermal sensor thresholds according to the NVM map - **/ -s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw) -{ - if (hw->mac.ops.init_thermal_sensor_thresh) - return hw->mac.ops.init_thermal_sensor_thresh(hw); - - return E1000_SUCCESS; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h deleted file mode 100644 index 0bc00acd..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h +++ /dev/null @@ -1,142 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_API_H_ -#define _E1000_API_H_ - -#include "e1000_hw.h" - -extern void e1000_init_function_pointers_82575(struct e1000_hw *hw); -extern void e1000_rx_fifo_flush_82575(struct e1000_hw *hw); -extern void e1000_init_function_pointers_vf(struct e1000_hw *hw); -extern void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw); -extern void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw); -extern void e1000_init_function_pointers_i210(struct e1000_hw *hw); - -s32 e1000_set_obff_timer(struct e1000_hw *hw, u32 itr); -s32 e1000_set_mac_type(struct e1000_hw *hw); -s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device); -s32 e1000_init_mac_params(struct e1000_hw *hw); -s32 e1000_init_nvm_params(struct e1000_hw *hw); -s32 e1000_init_phy_params(struct e1000_hw *hw); -s32 e1000_init_mbx_params(struct e1000_hw *hw); -s32 e1000_get_bus_info(struct e1000_hw *hw); -void e1000_clear_vfta(struct e1000_hw *hw); -void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value); -s32 e1000_force_mac_fc(struct e1000_hw *hw); -s32 e1000_check_for_link(struct e1000_hw *hw); -s32 e1000_reset_hw(struct e1000_hw *hw); -s32 e1000_init_hw(struct e1000_hw *hw); -s32 e1000_setup_link(struct e1000_hw *hw); -s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex); -s32 e1000_disable_pcie_master(struct e1000_hw *hw); -void e1000_config_collision_dist(struct e1000_hw *hw); -void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index); -u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr); -void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, - u32 mc_addr_count); -s32 e1000_setup_led(struct e1000_hw *hw); -s32 e1000_cleanup_led(struct e1000_hw *hw); -s32 e1000_check_reset_block(struct e1000_hw *hw); -s32 e1000_blink_led(struct e1000_hw *hw); -s32 e1000_led_on(struct e1000_hw *hw); -s32 e1000_led_off(struct e1000_hw *hw); -s32 e1000_id_led_init(struct e1000_hw *hw); -void e1000_reset_adaptive(struct e1000_hw *hw); -void e1000_update_adaptive(struct e1000_hw *hw); -s32 e1000_get_cable_length(struct e1000_hw *hw); -s32 e1000_validate_mdi_setting(struct e1000_hw *hw); -s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, - u8 data); -s32 e1000_get_phy_info(struct e1000_hw *hw); -void e1000_release_phy(struct e1000_hw *hw); -s32 e1000_acquire_phy(struct e1000_hw *hw); -s32 e1000_phy_hw_reset(struct e1000_hw *hw); -s32 e1000_phy_commit(struct e1000_hw *hw); -void e1000_power_up_phy(struct e1000_hw *hw); -void e1000_power_down_phy(struct e1000_hw *hw); -s32 e1000_read_mac_addr(struct e1000_hw *hw); -s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size); -s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size); -void e1000_reload_nvm(struct e1000_hw *hw); -s32 e1000_update_nvm_checksum(struct e1000_hw *hw); -s32 e1000_validate_nvm_checksum(struct e1000_hw *hw); -s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); -s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); -s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active); -s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active); -bool e1000_check_mng_mode(struct e1000_hw *hw); -bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw); -s32 e1000_mng_enable_host_if(struct e1000_hw *hw); -s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length, - u16 offset, u8 *sum); -s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, - struct e1000_host_mng_command_header *hdr); -s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length); -s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw); -s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw); - - - -/* - * TBI_ACCEPT macro definition: - * - * This macro requires: - * adapter = a pointer to struct e1000_hw - * status = the 8 bit status field of the Rx descriptor with EOP set - * error = the 8 bit error field of the Rx descriptor with EOP set - * length = the sum of all the length fields of the Rx descriptors that - * make up the current frame - * last_byte = the last byte of the frame DMAed by the hardware - * max_frame_length = the maximum frame length we want to accept. - * min_frame_length = the minimum frame length we want to accept. - * - * This macro is a conditional that should be used in the interrupt - * handler's Rx processing routine when RxErrors have been detected. - * - * Typical use: - * ... - * if (TBI_ACCEPT) { - * accept_frame = true; - * e1000_tbi_adjust_stats(adapter, MacAddress); - * frame_length--; - * } else { - * accept_frame = false; - * } - * ... - */ - -/* The carrier extension symbol, as received by the NIC. */ -#define CARRIER_EXTENSION 0x0F - -#define TBI_ACCEPT(a, status, errors, length, last_byte, \ - min_frame_size, max_frame_size) \ - (e1000_tbi_sbp_enabled_82543(a) && \ - (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \ - ((last_byte) == CARRIER_EXTENSION) && \ - (((status) & E1000_RXD_STAT_VP) ? \ - (((length) > (min_frame_size - VLAN_TAG_SIZE)) && \ - ((length) <= (max_frame_size + 1))) : \ - (((length) > min_frame_size) && \ - ((length) <= (max_frame_size + VLAN_TAG_SIZE + 1))))) - -#ifndef E1000_MAX -#define E1000_MAX(a, b) ((a) > (b) ? (a) : (b)) -#endif -#ifndef E1000_DIVIDE_ROUND_UP -#define E1000_DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b)) /* ceil(a/b) */ -#endif -#endif /* _E1000_API_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h deleted file mode 100644 index b39aaf80..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h +++ /dev/null @@ -1,1365 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_DEFINES_H_ -#define _E1000_DEFINES_H_ - -/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ -#define REQ_TX_DESCRIPTOR_MULTIPLE 8 -#define REQ_RX_DESCRIPTOR_MULTIPLE 8 - -/* Definitions for power management and wakeup registers */ -/* Wake Up Control */ -#define E1000_WUC_APME 0x00000001 /* APM Enable */ -#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */ -#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */ -#define E1000_WUC_APMPME 0x00000008 /* Assert PME on APM Wakeup */ -#define E1000_WUC_PHY_WAKE 0x00000100 /* if PHY supports wakeup */ - -/* Wake Up Filter Control */ -#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ -#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ -#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ -#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ -#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ -#define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ -#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ -#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ - -/* Wake Up Status */ -#define E1000_WUS_LNKC E1000_WUFC_LNKC -#define E1000_WUS_MAG E1000_WUFC_MAG -#define E1000_WUS_EX E1000_WUFC_EX -#define E1000_WUS_MC E1000_WUFC_MC -#define E1000_WUS_BC E1000_WUFC_BC - -/* Extended Device Control */ -#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* SW Definable Pin 4 data */ -#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* SW Definable Pin 6 data */ -#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* SW Definable Pin 3 data */ -#define E1000_CTRL_EXT_SDP6_DIR 0x00000400 /* Direction of SDP6 0=in 1=out */ -#define E1000_CTRL_EXT_SDP3_DIR 0x00000800 /* Direction of SDP3 0=in 1=out */ -#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */ -/* Physical Func Reset Done Indication */ -#define E1000_CTRL_EXT_PFRSTD 0x00004000 -#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ -#define E1000_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ -#define E1000_CTRL_EXT_DMA_DYN_CLK_EN 0x00080000 /* DMA Dynamic Clk Gating */ -#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 -/* Offset of the link mode field in Ctrl Ext register */ -#define E1000_CTRL_EXT_LINK_MODE_OFFSET 22 -#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000 -#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 -#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 -#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 -#define E1000_CTRL_EXT_EIAME 0x01000000 -#define E1000_CTRL_EXT_IRCA 0x00000001 -#define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ -#define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */ -#define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ -#define E1000_I2CCMD_REG_ADDR_SHIFT 16 -#define E1000_I2CCMD_PHY_ADDR_SHIFT 24 -#define E1000_I2CCMD_OPCODE_READ 0x08000000 -#define E1000_I2CCMD_OPCODE_WRITE 0x00000000 -#define E1000_I2CCMD_READY 0x20000000 -#define E1000_I2CCMD_ERROR 0x80000000 -#define E1000_I2CCMD_SFP_DATA_ADDR(a) (0x0000 + (a)) -#define E1000_I2CCMD_SFP_DIAG_ADDR(a) (0x0100 + (a)) -#define E1000_MAX_SGMII_PHY_REG_ADDR 255 -#define E1000_I2CCMD_PHY_TIMEOUT 200 -#define E1000_IVAR_VALID 0x80 -#define E1000_GPIE_NSICR 0x00000001 -#define E1000_GPIE_MSIX_MODE 0x00000010 -#define E1000_GPIE_EIAME 0x40000000 -#define E1000_GPIE_PBA 0x80000000 - -/* Receive Descriptor bit definitions */ -#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ -#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ -#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ -#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ -#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ -#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ -#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ -#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */ -#define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */ -#define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ -#define E1000_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ -#define E1000_RXD_ERR_CE 0x01 /* CRC Error */ -#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ -#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ -#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ -#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ -#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ -#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ -#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ - -#define E1000_RXDEXT_STATERR_TST 0x00000100 /* Time Stamp taken */ -#define E1000_RXDEXT_STATERR_LB 0x00040000 -#define E1000_RXDEXT_STATERR_CE 0x01000000 -#define E1000_RXDEXT_STATERR_SE 0x02000000 -#define E1000_RXDEXT_STATERR_SEQ 0x04000000 -#define E1000_RXDEXT_STATERR_CXE 0x10000000 -#define E1000_RXDEXT_STATERR_TCPE 0x20000000 -#define E1000_RXDEXT_STATERR_IPE 0x40000000 -#define E1000_RXDEXT_STATERR_RXE 0x80000000 - -/* mask to determine if packets should be dropped due to frame errors */ -#define E1000_RXD_ERR_FRAME_ERR_MASK ( \ - E1000_RXD_ERR_CE | \ - E1000_RXD_ERR_SE | \ - E1000_RXD_ERR_SEQ | \ - E1000_RXD_ERR_CXE | \ - E1000_RXD_ERR_RXE) - -/* Same mask, but for extended and packet split descriptors */ -#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \ - E1000_RXDEXT_STATERR_CE | \ - E1000_RXDEXT_STATERR_SE | \ - E1000_RXDEXT_STATERR_SEQ | \ - E1000_RXDEXT_STATERR_CXE | \ - E1000_RXDEXT_STATERR_RXE) - -#define E1000_MRQC_RSS_FIELD_MASK 0xFFFF0000 -#define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 -#define E1000_MRQC_RSS_FIELD_IPV4 0x00020000 -#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 -#define E1000_MRQC_RSS_FIELD_IPV6 0x00100000 -#define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 - -#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000 - -/* Management Control */ -#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ -#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ -#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ -#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ -#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ -/* Enable MAC address filtering */ -#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 -/* Enable MNG packets to host memory */ -#define E1000_MANC_EN_MNG2HOST 0x00200000 - -#define E1000_MANC2H_PORT_623 0x00000020 /* Port 0x26f */ -#define E1000_MANC2H_PORT_664 0x00000040 /* Port 0x298 */ -#define E1000_MDEF_PORT_623 0x00000800 /* Port 0x26f */ -#define E1000_MDEF_PORT_664 0x00000400 /* Port 0x298 */ - -/* Receive Control */ -#define E1000_RCTL_RST 0x00000001 /* Software reset */ -#define E1000_RCTL_EN 0x00000002 /* enable */ -#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ -#define E1000_RCTL_UPE 0x00000008 /* unicast promisc enable */ -#define E1000_RCTL_MPE 0x00000010 /* multicast promisc enable */ -#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ -#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ -#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ -#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ -#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ -#define E1000_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ -#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ -#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ -#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ -/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ -#define E1000_RCTL_SZ_2048 0x00000000 /* Rx buffer size 2048 */ -#define E1000_RCTL_SZ_1024 0x00010000 /* Rx buffer size 1024 */ -#define E1000_RCTL_SZ_512 0x00020000 /* Rx buffer size 512 */ -#define E1000_RCTL_SZ_256 0x00030000 /* Rx buffer size 256 */ -/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ -#define E1000_RCTL_SZ_16384 0x00010000 /* Rx buffer size 16384 */ -#define E1000_RCTL_SZ_8192 0x00020000 /* Rx buffer size 8192 */ -#define E1000_RCTL_SZ_4096 0x00030000 /* Rx buffer size 4096 */ -#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ -#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ -#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ -#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ -#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ -#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ -#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ - -/* Use byte values for the following shift parameters - * Usage: - * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) & - * E1000_PSRCTL_BSIZE0_MASK) | - * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) & - * E1000_PSRCTL_BSIZE1_MASK) | - * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) & - * E1000_PSRCTL_BSIZE2_MASK) | - * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |; - * E1000_PSRCTL_BSIZE3_MASK)) - * where value0 = [128..16256], default=256 - * value1 = [1024..64512], default=4096 - * value2 = [0..64512], default=4096 - * value3 = [0..64512], default=0 - */ - -#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F -#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 -#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 -#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 - -#define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */ -#define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */ -#define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */ -#define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */ - -/* SWFW_SYNC Definitions */ -#define E1000_SWFW_EEP_SM 0x01 -#define E1000_SWFW_PHY0_SM 0x02 -#define E1000_SWFW_PHY1_SM 0x04 -#define E1000_SWFW_CSR_SM 0x08 -#define E1000_SWFW_PHY2_SM 0x20 -#define E1000_SWFW_PHY3_SM 0x40 -#define E1000_SWFW_SW_MNG_SM 0x400 - -/* Device Control */ -#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ -#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */ -#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master reqs */ -#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ -#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ -#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ -#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ -#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ -#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ -#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ -#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ -#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ -#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ -#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ -#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ -#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ -#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ -#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ -#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ -#define E1000_CTRL_RST 0x04000000 /* Global reset */ -#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ -#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ -#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ -#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ -#define E1000_CTRL_I2C_ENA 0x02000000 /* I2C enable */ - - -#define E1000_CONNSW_ENRGSRC 0x4 -#define E1000_CONNSW_PHYSD 0x400 -#define E1000_CONNSW_PHY_PDN 0x800 -#define E1000_CONNSW_SERDESD 0x200 -#define E1000_CONNSW_AUTOSENSE_CONF 0x2 -#define E1000_CONNSW_AUTOSENSE_EN 0x1 -#define E1000_PCS_CFG_PCS_EN 8 -#define E1000_PCS_LCTL_FLV_LINK_UP 1 -#define E1000_PCS_LCTL_FSV_10 0 -#define E1000_PCS_LCTL_FSV_100 2 -#define E1000_PCS_LCTL_FSV_1000 4 -#define E1000_PCS_LCTL_FDV_FULL 8 -#define E1000_PCS_LCTL_FSD 0x10 -#define E1000_PCS_LCTL_FORCE_LINK 0x20 -#define E1000_PCS_LCTL_FORCE_FCTRL 0x80 -#define E1000_PCS_LCTL_AN_ENABLE 0x10000 -#define E1000_PCS_LCTL_AN_RESTART 0x20000 -#define E1000_PCS_LCTL_AN_TIMEOUT 0x40000 -#define E1000_ENABLE_SERDES_LOOPBACK 0x0410 - -#define E1000_PCS_LSTS_LINK_OK 1 -#define E1000_PCS_LSTS_SPEED_100 2 -#define E1000_PCS_LSTS_SPEED_1000 4 -#define E1000_PCS_LSTS_DUPLEX_FULL 8 -#define E1000_PCS_LSTS_SYNK_OK 0x10 -#define E1000_PCS_LSTS_AN_COMPLETE 0x10000 - -/* Device Status */ -#define E1000_STATUS_FD 0x00000001 /* Duplex 0=half 1=full */ -#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ -#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ -#define E1000_STATUS_FUNC_SHIFT 2 -#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ -#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ -#define E1000_STATUS_SPEED_MASK 0x000000C0 -#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ -#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ -#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ -#define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Compltn by NVM */ -#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ -#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master request status */ -#define E1000_STATUS_2P5_SKU 0x00001000 /* Val of 2.5GBE SKU strap */ -#define E1000_STATUS_2P5_SKU_OVER 0x00002000 /* Val of 2.5GBE SKU Over */ - -#define SPEED_10 10 -#define SPEED_100 100 -#define SPEED_1000 1000 -#define SPEED_2500 2500 -#define HALF_DUPLEX 1 -#define FULL_DUPLEX 2 - - -#define ADVERTISE_10_HALF 0x0001 -#define ADVERTISE_10_FULL 0x0002 -#define ADVERTISE_100_HALF 0x0004 -#define ADVERTISE_100_FULL 0x0008 -#define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */ -#define ADVERTISE_1000_FULL 0x0020 - -/* 1000/H is not supported, nor spec-compliant. */ -#define E1000_ALL_SPEED_DUPLEX ( \ - ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \ - ADVERTISE_100_FULL | ADVERTISE_1000_FULL) -#define E1000_ALL_NOT_GIG ( \ - ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \ - ADVERTISE_100_FULL) -#define E1000_ALL_100_SPEED (ADVERTISE_100_HALF | ADVERTISE_100_FULL) -#define E1000_ALL_10_SPEED (ADVERTISE_10_HALF | ADVERTISE_10_FULL) -#define E1000_ALL_HALF_DUPLEX (ADVERTISE_10_HALF | ADVERTISE_100_HALF) - -#define AUTONEG_ADVERTISE_SPEED_DEFAULT E1000_ALL_SPEED_DUPLEX - -/* LED Control */ -#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F -#define E1000_LEDCTL_LED0_MODE_SHIFT 0 -#define E1000_LEDCTL_LED0_IVRT 0x00000040 -#define E1000_LEDCTL_LED0_BLINK 0x00000080 - -#define E1000_LEDCTL_MODE_LED_ON 0xE -#define E1000_LEDCTL_MODE_LED_OFF 0xF - -/* Transmit Descriptor bit definitions */ -#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ -#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */ -#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ -#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ -#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ -#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ -#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ -#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ -#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ -#define E1000_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ -#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ -#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ -#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ -#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ -#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ -#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ -#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ -#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ -#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ -#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ -#define E1000_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ - -/* Transmit Control */ -#define E1000_TCTL_EN 0x00000002 /* enable Tx */ -#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ -#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ -#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ -#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ -#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ - -/* Transmit Arbitration Count */ -#define E1000_TARC0_ENABLE 0x00000400 /* Enable Tx Queue 0 */ - -/* SerDes Control */ -#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 -#define E1000_SCTL_ENABLE_SERDES_LOOPBACK 0x0410 - -/* Receive Checksum Control */ -#define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */ -#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ -#define E1000_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ -#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ -#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ - -/* Header split receive */ -#define E1000_RFCTL_NFSW_DIS 0x00000040 -#define E1000_RFCTL_NFSR_DIS 0x00000080 -#define E1000_RFCTL_ACK_DIS 0x00001000 -#define E1000_RFCTL_EXTEN 0x00008000 -#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 -#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 -#define E1000_RFCTL_LEF 0x00040000 - -/* Collision related configuration parameters */ -#define E1000_COLLISION_THRESHOLD 15 -#define E1000_CT_SHIFT 4 -#define E1000_COLLISION_DISTANCE 63 -#define E1000_COLD_SHIFT 12 - -/* Default values for the transmit IPG register */ -#define DEFAULT_82543_TIPG_IPGT_FIBER 9 -#define DEFAULT_82543_TIPG_IPGT_COPPER 8 - -#define E1000_TIPG_IPGT_MASK 0x000003FF - -#define DEFAULT_82543_TIPG_IPGR1 8 -#define E1000_TIPG_IPGR1_SHIFT 10 - -#define DEFAULT_82543_TIPG_IPGR2 6 -#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7 -#define E1000_TIPG_IPGR2_SHIFT 20 - -/* Ethertype field values */ -#define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ - -#define ETHERNET_FCS_SIZE 4 -#define MAX_JUMBO_FRAME_SIZE 0x3F00 - -/* Extended Configuration Control and Size */ -#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020 -#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE 0x00000001 -#define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE 0x00000008 -#define E1000_EXTCNF_CTRL_SWFLAG 0x00000020 -#define E1000_EXTCNF_CTRL_GATE_PHY_CFG 0x00000080 -#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK 0x00FF0000 -#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT 16 -#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK 0x0FFF0000 -#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT 16 - -#define E1000_PHY_CTRL_D0A_LPLU 0x00000002 -#define E1000_PHY_CTRL_NOND0A_LPLU 0x00000004 -#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008 -#define E1000_PHY_CTRL_GBE_DISABLE 0x00000040 - -#define E1000_KABGTXD_BGSQLBIAS 0x00050000 - -/* PBA constants */ -#define E1000_PBA_8K 0x0008 /* 8KB */ -#define E1000_PBA_10K 0x000A /* 10KB */ -#define E1000_PBA_12K 0x000C /* 12KB */ -#define E1000_PBA_14K 0x000E /* 14KB */ -#define E1000_PBA_16K 0x0010 /* 16KB */ -#define E1000_PBA_18K 0x0012 -#define E1000_PBA_20K 0x0014 -#define E1000_PBA_22K 0x0016 -#define E1000_PBA_24K 0x0018 -#define E1000_PBA_26K 0x001A -#define E1000_PBA_30K 0x001E -#define E1000_PBA_32K 0x0020 -#define E1000_PBA_34K 0x0022 -#define E1000_PBA_35K 0x0023 -#define E1000_PBA_38K 0x0026 -#define E1000_PBA_40K 0x0028 -#define E1000_PBA_48K 0x0030 /* 48KB */ -#define E1000_PBA_64K 0x0040 /* 64KB */ - -#define E1000_PBA_RXA_MASK 0xFFFF - -#define E1000_PBS_16K E1000_PBA_16K - -#define IFS_MAX 80 -#define IFS_MIN 40 -#define IFS_RATIO 4 -#define IFS_STEP 10 -#define MIN_NUM_XMITS 1000 - -/* SW Semaphore Register */ -#define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ -#define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ -#define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */ - -#define E1000_SWSM2_LOCK 0x00000002 /* Secondary driver semaphore bit */ - -/* Interrupt Cause Read */ -#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ -#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ -#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ -#define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */ -#define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ -#define E1000_ICR_RXO 0x00000040 /* Rx overrun */ -#define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ -#define E1000_ICR_VMMB 0x00000100 /* VM MB event */ -#define E1000_ICR_RXCFG 0x00000400 /* Rx /c/ ordered set */ -#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ -#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */ -#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */ -#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */ -#define E1000_ICR_TXD_LOW 0x00008000 -#define E1000_ICR_MNG 0x00040000 /* Manageability event */ -#define E1000_ICR_TS 0x00080000 /* Time Sync Interrupt */ -#define E1000_ICR_DRSTA 0x40000000 /* Device Reset Asserted */ -/* If this bit asserted, the driver should claim the interrupt */ -#define E1000_ICR_INT_ASSERTED 0x80000000 -#define E1000_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */ -#define E1000_ICR_FER 0x00400000 /* Fatal Error */ - -#define E1000_ICR_THS 0x00800000 /* ICR.THS: Thermal Sensor Event*/ -#define E1000_ICR_MDDET 0x10000000 /* Malicious Driver Detect */ - - -/* Extended Interrupt Cause Read */ -#define E1000_EICR_RX_QUEUE0 0x00000001 /* Rx Queue 0 Interrupt */ -#define E1000_EICR_RX_QUEUE1 0x00000002 /* Rx Queue 1 Interrupt */ -#define E1000_EICR_RX_QUEUE2 0x00000004 /* Rx Queue 2 Interrupt */ -#define E1000_EICR_RX_QUEUE3 0x00000008 /* Rx Queue 3 Interrupt */ -#define E1000_EICR_TX_QUEUE0 0x00000100 /* Tx Queue 0 Interrupt */ -#define E1000_EICR_TX_QUEUE1 0x00000200 /* Tx Queue 1 Interrupt */ -#define E1000_EICR_TX_QUEUE2 0x00000400 /* Tx Queue 2 Interrupt */ -#define E1000_EICR_TX_QUEUE3 0x00000800 /* Tx Queue 3 Interrupt */ -#define E1000_EICR_TCP_TIMER 0x40000000 /* TCP Timer */ -#define E1000_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ -/* TCP Timer */ -#define E1000_TCPTIMER_KS 0x00000100 /* KickStart */ -#define E1000_TCPTIMER_COUNT_ENABLE 0x00000200 /* Count Enable */ -#define E1000_TCPTIMER_COUNT_FINISH 0x00000400 /* Count finish */ -#define E1000_TCPTIMER_LOOP 0x00000800 /* Loop */ - -/* This defines the bits that are set in the Interrupt Mask - * Set/Read Register. Each bit is documented below: - * o RXT0 = Receiver Timer Interrupt (ring 0) - * o TXDW = Transmit Descriptor Written Back - * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) - * o RXSEQ = Receive Sequence Error - * o LSC = Link Status Change - */ -#define IMS_ENABLE_MASK ( \ - E1000_IMS_RXT0 | \ - E1000_IMS_TXDW | \ - E1000_IMS_RXDMT0 | \ - E1000_IMS_RXSEQ | \ - E1000_IMS_LSC) - -/* Interrupt Mask Set */ -#define E1000_IMS_TXDW E1000_ICR_TXDW /* Tx desc written back */ -#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ -#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ -#define E1000_IMS_VMMB E1000_ICR_VMMB /* Mail box activity */ -#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ -#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ -#define E1000_IMS_RXO E1000_ICR_RXO /* Rx overrun */ -#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */ -#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW -#define E1000_IMS_TS E1000_ICR_TS /* Time Sync Interrupt */ -#define E1000_IMS_DRSTA E1000_ICR_DRSTA /* Device Reset Asserted */ -#define E1000_IMS_DOUTSYNC E1000_ICR_DOUTSYNC /* NIC DMA out of sync */ -#define E1000_IMS_FER E1000_ICR_FER /* Fatal Error */ - -#define E1000_IMS_THS E1000_ICR_THS /* ICR.TS: Thermal Sensor Event*/ -#define E1000_IMS_MDDET E1000_ICR_MDDET /* Malicious Driver Detect */ -/* Extended Interrupt Mask Set */ -#define E1000_EIMS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ -#define E1000_EIMS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ -#define E1000_EIMS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ -#define E1000_EIMS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ -#define E1000_EIMS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ -#define E1000_EIMS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ -#define E1000_EIMS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ -#define E1000_EIMS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ -#define E1000_EIMS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ -#define E1000_EIMS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ - -/* Interrupt Cause Set */ -#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ -#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ -#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ - -/* Extended Interrupt Cause Set */ -#define E1000_EICS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ -#define E1000_EICS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ -#define E1000_EICS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ -#define E1000_EICS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ -#define E1000_EICS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ -#define E1000_EICS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ -#define E1000_EICS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ -#define E1000_EICS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ -#define E1000_EICS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ -#define E1000_EICS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ - -#define E1000_EITR_ITR_INT_MASK 0x0000FFFF -/* E1000_EITR_CNT_IGNR is only for 82576 and newer */ -#define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ -#define E1000_EITR_INTERVAL 0x00007FFC - -/* Transmit Descriptor Control */ -#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ -#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */ -#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */ -#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */ -#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */ -#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */ -/* Enable the counting of descriptors still to be processed. */ -#define E1000_TXDCTL_COUNT_DESC 0x00400000 - -/* Flow Control Constants */ -#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 -#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 -#define FLOW_CONTROL_TYPE 0x8808 - -/* 802.1q VLAN Packet Size */ -#define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMA'd) */ -#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ - -/* Receive Address - * Number of high/low register pairs in the RAR. The RAR (Receive Address - * Registers) holds the directed and multicast addresses that we monitor. - * Technically, we have 16 spots. However, we reserve one of these spots - * (RAR[15]) for our directed address used by controllers with - * manageability enabled, allowing us room for 15 multicast addresses. - */ -#define E1000_RAR_ENTRIES 15 -#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ -#define E1000_RAL_MAC_ADDR_LEN 4 -#define E1000_RAH_MAC_ADDR_LEN 2 -#define E1000_RAH_QUEUE_MASK_82575 0x000C0000 -#define E1000_RAH_POOL_1 0x00040000 - -/* Error Codes */ -#define E1000_SUCCESS 0 -#define E1000_ERR_NVM 1 -#define E1000_ERR_PHY 2 -#define E1000_ERR_CONFIG 3 -#define E1000_ERR_PARAM 4 -#define E1000_ERR_MAC_INIT 5 -#define E1000_ERR_PHY_TYPE 6 -#define E1000_ERR_RESET 9 -#define E1000_ERR_MASTER_REQUESTS_PENDING 10 -#define E1000_ERR_HOST_INTERFACE_COMMAND 11 -#define E1000_BLK_PHY_RESET 12 -#define E1000_ERR_SWFW_SYNC 13 -#define E1000_NOT_IMPLEMENTED 14 -#define E1000_ERR_MBX 15 -#define E1000_ERR_INVALID_ARGUMENT 16 -#define E1000_ERR_NO_SPACE 17 -#define E1000_ERR_NVM_PBA_SECTION 18 -#define E1000_ERR_I2C 19 -#define E1000_ERR_INVM_VALUE_NOT_FOUND 20 - -/* Loop limit on how long we wait for auto-negotiation to complete */ -#define FIBER_LINK_UP_LIMIT 50 -#define COPPER_LINK_UP_LIMIT 10 -#define PHY_AUTO_NEG_LIMIT 45 -#define PHY_FORCE_LIMIT 20 -/* Number of 100 microseconds we wait for PCI Express master disable */ -#define MASTER_DISABLE_TIMEOUT 800 -/* Number of milliseconds we wait for PHY configuration done after MAC reset */ -#define PHY_CFG_TIMEOUT 100 -/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */ -#define MDIO_OWNERSHIP_TIMEOUT 10 -/* Number of milliseconds for NVM auto read done after MAC reset. */ -#define AUTO_READ_DONE_TIMEOUT 10 - -/* Flow Control */ -#define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */ -#define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */ -#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */ - -/* Transmit Configuration Word */ -#define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */ -#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */ -#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */ -#define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */ -#define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */ - -/* Receive Configuration Word */ -#define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */ -#define E1000_RXCW_IV 0x08000000 /* Receive config invalid */ -#define E1000_RXCW_C 0x20000000 /* Receive config */ -#define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ - -#define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ -#define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ - -#define E1000_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ -#define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ -#define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00 -#define E1000_TSYNCRXCTL_TYPE_L4_V1 0x02 -#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 -#define E1000_TSYNCRXCTL_TYPE_ALL 0x08 -#define E1000_TSYNCRXCTL_TYPE_EVENT_V2 0x0A -#define E1000_TSYNCRXCTL_ENABLED 0x00000010 /* enable Rx timestamping */ -#define E1000_TSYNCRXCTL_SYSCFI 0x00000020 /* Sys clock frequency */ - -#define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK 0x000000FF -#define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE 0x00 -#define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE 0x01 -#define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE 0x02 -#define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE 0x03 -#define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE 0x04 - -#define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK 0x00000F00 -#define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE 0x0000 -#define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE 0x0100 -#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE 0x0200 -#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE 0x0300 -#define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE 0x0800 -#define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE 0x0900 -#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE 0x0A00 -#define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE 0x0B00 -#define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE 0x0C00 -#define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE 0x0D00 - -#define E1000_TIMINCA_16NS_SHIFT 24 -#define E1000_TIMINCA_INCPERIOD_SHIFT 24 -#define E1000_TIMINCA_INCVALUE_MASK 0x00FFFFFF - -#define E1000_TSICR_TXTS 0x00000002 -#define E1000_TSIM_TXTS 0x00000002 -/* TUPLE Filtering Configuration */ -#define E1000_TTQF_DISABLE_MASK 0xF0008000 /* TTQF Disable Mask */ -#define E1000_TTQF_QUEUE_ENABLE 0x100 /* TTQF Queue Enable Bit */ -#define E1000_TTQF_PROTOCOL_MASK 0xFF /* TTQF Protocol Mask */ -/* TTQF TCP Bit, shift with E1000_TTQF_PROTOCOL SHIFT */ -#define E1000_TTQF_PROTOCOL_TCP 0x0 -/* TTQF UDP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */ -#define E1000_TTQF_PROTOCOL_UDP 0x1 -/* TTQF SCTP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */ -#define E1000_TTQF_PROTOCOL_SCTP 0x2 -#define E1000_TTQF_PROTOCOL_SHIFT 5 /* TTQF Protocol Shift */ -#define E1000_TTQF_QUEUE_SHIFT 16 /* TTQF Queue Shfit */ -#define E1000_TTQF_RX_QUEUE_MASK 0x70000 /* TTQF Queue Mask */ -#define E1000_TTQF_MASK_ENABLE 0x10000000 /* TTQF Mask Enable Bit */ -#define E1000_IMIR_CLEAR_MASK 0xF001FFFF /* IMIR Reg Clear Mask */ -#define E1000_IMIR_PORT_BYPASS 0x20000 /* IMIR Port Bypass Bit */ -#define E1000_IMIR_PRIORITY_SHIFT 29 /* IMIR Priority Shift */ -#define E1000_IMIREXT_CLEAR_MASK 0x7FFFF /* IMIREXT Reg Clear Mask */ - -#define E1000_MDICNFG_EXT_MDIO 0x80000000 /* MDI ext/int destination */ -#define E1000_MDICNFG_COM_MDIO 0x40000000 /* MDI shared w/ lan 0 */ -#define E1000_MDICNFG_PHY_MASK 0x03E00000 -#define E1000_MDICNFG_PHY_SHIFT 21 - -#define E1000_MEDIA_PORT_COPPER 1 -#define E1000_MEDIA_PORT_OTHER 2 -#define E1000_M88E1112_AUTO_COPPER_SGMII 0x2 -#define E1000_M88E1112_AUTO_COPPER_BASEX 0x3 -#define E1000_M88E1112_STATUS_LINK 0x0004 /* Interface Link Bit */ -#define E1000_M88E1112_MAC_CTRL_1 0x10 -#define E1000_M88E1112_MAC_CTRL_1_MODE_MASK 0x0380 /* Mode Select */ -#define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT 7 -#define E1000_M88E1112_PAGE_ADDR 0x16 -#define E1000_M88E1112_STATUS 0x01 - -#define E1000_THSTAT_LOW_EVENT 0x20000000 /* Low thermal threshold */ -#define E1000_THSTAT_MID_EVENT 0x00200000 /* Mid thermal threshold */ -#define E1000_THSTAT_HIGH_EVENT 0x00002000 /* High thermal threshold */ -#define E1000_THSTAT_PWR_DOWN 0x00000001 /* Power Down Event */ -#define E1000_THSTAT_LINK_THROTTLE 0x00000002 /* Link Spd Throttle Event */ - -/* I350 EEE defines */ -#define E1000_IPCNFG_EEE_1G_AN 0x00000008 /* IPCNFG EEE Ena 1G AN */ -#define E1000_IPCNFG_EEE_100M_AN 0x00000004 /* IPCNFG EEE Ena 100M AN */ -#define E1000_EEER_TX_LPI_EN 0x00010000 /* EEER Tx LPI Enable */ -#define E1000_EEER_RX_LPI_EN 0x00020000 /* EEER Rx LPI Enable */ -#define E1000_EEER_LPI_FC 0x00040000 /* EEER Ena on Flow Cntrl */ -/* EEE status */ -#define E1000_EEER_EEE_NEG 0x20000000 /* EEE capability nego */ -#define E1000_EEER_RX_LPI_STATUS 0x40000000 /* Rx in LPI state */ -#define E1000_EEER_TX_LPI_STATUS 0x80000000 /* Tx in LPI state */ -#define E1000_EEE_LP_ADV_ADDR_I350 0x040F /* EEE LP Advertisement */ -#define E1000_M88E1543_PAGE_ADDR 0x16 /* Page Offset Register */ -#define E1000_M88E1543_EEE_CTRL_1 0x0 -#define E1000_M88E1543_EEE_CTRL_1_MS 0x0001 /* EEE Master/Slave */ -#define E1000_EEE_ADV_DEV_I354 7 -#define E1000_EEE_ADV_ADDR_I354 60 -#define E1000_EEE_ADV_100_SUPPORTED (1 << 1) /* 100BaseTx EEE Supported */ -#define E1000_EEE_ADV_1000_SUPPORTED (1 << 2) /* 1000BaseT EEE Supported */ -#define E1000_PCS_STATUS_DEV_I354 3 -#define E1000_PCS_STATUS_ADDR_I354 1 -#define E1000_PCS_STATUS_RX_LPI_RCVD 0x0400 -#define E1000_PCS_STATUS_TX_LPI_RCVD 0x0800 -#define E1000_EEE_SU_LPI_CLK_STP 0x00800000 /* EEE LPI Clock Stop */ -#define E1000_EEE_LP_ADV_DEV_I210 7 /* EEE LP Adv Device */ -#define E1000_EEE_LP_ADV_ADDR_I210 61 /* EEE LP Adv Register */ -/* PCI Express Control */ -#define E1000_GCR_RXD_NO_SNOOP 0x00000001 -#define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002 -#define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004 -#define E1000_GCR_TXD_NO_SNOOP 0x00000008 -#define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 -#define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 -#define E1000_GCR_CMPL_TMOUT_MASK 0x0000F000 -#define E1000_GCR_CMPL_TMOUT_10ms 0x00001000 -#define E1000_GCR_CMPL_TMOUT_RESEND 0x00010000 -#define E1000_GCR_CAP_VER2 0x00040000 - -#define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ - E1000_GCR_RXDSCW_NO_SNOOP | \ - E1000_GCR_RXDSCR_NO_SNOOP | \ - E1000_GCR_TXD_NO_SNOOP | \ - E1000_GCR_TXDSCW_NO_SNOOP | \ - E1000_GCR_TXDSCR_NO_SNOOP) - -#define E1000_MMDAC_FUNC_DATA 0x4000 /* Data, no post increment */ - -/* mPHY address control and data registers */ -#define E1000_MPHY_ADDR_CTL 0x0024 /* Address Control Reg */ -#define E1000_MPHY_ADDR_CTL_OFFSET_MASK 0xFFFF0000 -#define E1000_MPHY_DATA 0x0E10 /* Data Register */ - -/* AFE CSR Offset for PCS CLK */ -#define E1000_MPHY_PCS_CLK_REG_OFFSET 0x0004 -/* Override for near end digital loopback. */ -#define E1000_MPHY_PCS_CLK_REG_DIGINELBEN 0x10 - -/* PHY Control Register */ -#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ -#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ -#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ -#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ -#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ -#define MII_CR_POWER_DOWN 0x0800 /* Power down */ -#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ -#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ -#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ -#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ -#define MII_CR_SPEED_1000 0x0040 -#define MII_CR_SPEED_100 0x2000 -#define MII_CR_SPEED_10 0x0000 - -/* PHY Status Register */ -#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ -#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ -#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ -#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ -#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ -#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ -#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ -#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ -#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ -#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ -#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ -#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ -#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ -#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ -#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ - -/* Autoneg Advertisement Register */ -#define NWAY_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */ -#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ -#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ -#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ -#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ -#define NWAY_AR_100T4_CAPS 0x0200 /* 100T4 Capable */ -#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ -#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ -#define NWAY_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */ -#define NWAY_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */ - -/* Link Partner Ability Register (Base Page) */ -#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */ -#define NWAY_LPAR_10T_HD_CAPS 0x0020 /* LP 10T Half Dplx Capable */ -#define NWAY_LPAR_10T_FD_CAPS 0x0040 /* LP 10T Full Dplx Capable */ -#define NWAY_LPAR_100TX_HD_CAPS 0x0080 /* LP 100TX Half Dplx Capable */ -#define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP 100TX Full Dplx Capable */ -#define NWAY_LPAR_100T4_CAPS 0x0200 /* LP is 100T4 Capable */ -#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ -#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asym Pause Direction bit */ -#define NWAY_LPAR_REMOTE_FAULT 0x2000 /* LP detected Remote Fault */ -#define NWAY_LPAR_ACKNOWLEDGE 0x4000 /* LP rx'd link code word */ -#define NWAY_LPAR_NEXT_PAGE 0x8000 /* Next Page ability supported */ - -/* Autoneg Expansion Register */ -#define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */ -#define NWAY_ER_PAGE_RXD 0x0002 /* LP 10T Half Dplx Capable */ -#define NWAY_ER_NEXT_PAGE_CAPS 0x0004 /* LP 10T Full Dplx Capable */ -#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP 100TX Half Dplx Capable */ -#define NWAY_ER_PAR_DETECT_FAULT 0x0010 /* LP 100TX Full Dplx Capable */ - -/* 1000BASE-T Control Register */ -#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */ -#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ -#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ -/* 1=Repeater/switch device port 0=DTE device */ -#define CR_1000T_REPEATER_DTE 0x0400 -/* 1=Configure PHY as Master 0=Configure PHY as Slave */ -#define CR_1000T_MS_VALUE 0x0800 -/* 1=Master/Slave manual config value 0=Automatic Master/Slave config */ -#define CR_1000T_MS_ENABLE 0x1000 -#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */ -#define CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */ -#define CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */ -#define CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */ -#define CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */ - -/* 1000BASE-T Status Register */ -#define SR_1000T_IDLE_ERROR_CNT 0x00FF /* Num idle err since last rd */ -#define SR_1000T_ASYM_PAUSE_DIR 0x0100 /* LP asym pause direction bit */ -#define SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */ -#define SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */ -#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ -#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ -#define SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local Tx Master, 0=Slave */ -#define SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */ - -#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT 5 - -/* PHY 1000 MII Register/Bit Definitions */ -/* PHY Registers defined by IEEE */ -#define PHY_CONTROL 0x00 /* Control Register */ -#define PHY_STATUS 0x01 /* Status Register */ -#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ -#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ -#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ -#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ -#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ -#define PHY_NEXT_PAGE_TX 0x07 /* Next Page Tx */ -#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */ -#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ -#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ -#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ - -#define PHY_CONTROL_LB 0x4000 /* PHY Loopback bit */ - -/* NVM Control */ -#define E1000_EECD_SK 0x00000001 /* NVM Clock */ -#define E1000_EECD_CS 0x00000002 /* NVM Chip Select */ -#define E1000_EECD_DI 0x00000004 /* NVM Data In */ -#define E1000_EECD_DO 0x00000008 /* NVM Data Out */ -#define E1000_EECD_REQ 0x00000040 /* NVM Access Request */ -#define E1000_EECD_GNT 0x00000080 /* NVM Access Grant */ -#define E1000_EECD_PRES 0x00000100 /* NVM Present */ -#define E1000_EECD_SIZE 0x00000200 /* NVM Size (0=64 word 1=256 word) */ -#define E1000_EECD_BLOCKED 0x00008000 /* Bit banging access blocked flag */ -#define E1000_EECD_ABORT 0x00010000 /* NVM operation aborted flag */ -#define E1000_EECD_TIMEOUT 0x00020000 /* NVM read operation timeout flag */ -#define E1000_EECD_ERROR_CLR 0x00040000 /* NVM error status clear bit */ -/* NVM Addressing bits based on type 0=small, 1=large */ -#define E1000_EECD_ADDR_BITS 0x00000400 -#define E1000_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */ -#define E1000_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */ -#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */ -#define E1000_EECD_SIZE_EX_SHIFT 11 -#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */ -#define E1000_EECD_AUPDEN 0x00100000 /* Ena Auto FLASH update */ -#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ -#define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES) -#define E1000_EECD_FLUPD_I210 0x00800000 /* Update FLASH */ -#define E1000_EECD_FLUDONE_I210 0x04000000 /* Update FLASH done */ -#define E1000_EECD_FLASH_DETECTED_I210 0x00080000 /* FLASH detected */ -#define E1000_EECD_SEC1VAL_I210 0x02000000 /* Sector One Valid */ -#define E1000_FLUDONE_ATTEMPTS 20000 -#define E1000_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */ -#define E1000_I210_FIFO_SEL_RX 0x00 -#define E1000_I210_FIFO_SEL_TX_QAV(_i) (0x02 + (_i)) -#define E1000_I210_FIFO_SEL_TX_LEGACY E1000_I210_FIFO_SEL_TX_QAV(0) -#define E1000_I210_FIFO_SEL_BMC2OS_TX 0x06 -#define E1000_I210_FIFO_SEL_BMC2OS_RX 0x01 - -#define E1000_I210_FLASH_SECTOR_SIZE 0x1000 /* 4KB FLASH sector unit size */ -/* Secure FLASH mode requires removing MSb */ -#define E1000_I210_FW_PTR_MASK 0x7FFF -/* Firmware code revision field word offset*/ -#define E1000_I210_FW_VER_OFFSET 328 - -#define E1000_NVM_RW_REG_DATA 16 /* Offset to data in NVM read/write regs */ -#define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ -#define E1000_NVM_RW_REG_START 1 /* Start operation */ -#define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ -#define E1000_NVM_POLL_WRITE 1 /* Flag for polling for write complete */ -#define E1000_NVM_POLL_READ 0 /* Flag for polling for read complete */ -#define E1000_FLASH_UPDATES 2000 - -/* NVM Word Offsets */ -#define NVM_COMPAT 0x0003 -#define NVM_ID_LED_SETTINGS 0x0004 -#define NVM_VERSION 0x0005 -#define E1000_I210_NVM_FW_MODULE_PTR 0x0010 -#define E1000_I350_NVM_FW_MODULE_PTR 0x0051 -#define NVM_FUTURE_INIT_WORD1 0x0019 -#define NVM_ETRACK_WORD 0x0042 -#define NVM_ETRACK_HIWORD 0x0043 -#define NVM_COMB_VER_OFF 0x0083 -#define NVM_COMB_VER_PTR 0x003d - -/* NVM version defines */ -#define NVM_MAJOR_MASK 0xF000 -#define NVM_MINOR_MASK 0x0FF0 -#define NVM_IMAGE_ID_MASK 0x000F -#define NVM_COMB_VER_MASK 0x00FF -#define NVM_MAJOR_SHIFT 12 -#define NVM_MINOR_SHIFT 4 -#define NVM_COMB_VER_SHFT 8 -#define NVM_VER_INVALID 0xFFFF -#define NVM_ETRACK_SHIFT 16 -#define NVM_ETRACK_VALID 0x8000 -#define NVM_NEW_DEC_MASK 0x0F00 -#define NVM_HEX_CONV 16 -#define NVM_HEX_TENS 10 - -/* FW version defines */ -/* Offset of "Loader patch ptr" in Firmware Header */ -#define E1000_I350_NVM_FW_LOADER_PATCH_PTR_OFFSET 0x01 -/* Patch generation hour & minutes */ -#define E1000_I350_NVM_FW_VER_WORD1_OFFSET 0x04 -/* Patch generation month & day */ -#define E1000_I350_NVM_FW_VER_WORD2_OFFSET 0x05 -/* Patch generation year */ -#define E1000_I350_NVM_FW_VER_WORD3_OFFSET 0x06 -/* Patch major & minor numbers */ -#define E1000_I350_NVM_FW_VER_WORD4_OFFSET 0x07 - -#define NVM_MAC_ADDR 0x0000 -#define NVM_SUB_DEV_ID 0x000B -#define NVM_SUB_VEN_ID 0x000C -#define NVM_DEV_ID 0x000D -#define NVM_VEN_ID 0x000E -#define NVM_INIT_CTRL_2 0x000F -#define NVM_INIT_CTRL_4 0x0013 -#define NVM_LED_1_CFG 0x001C -#define NVM_LED_0_2_CFG 0x001F - -#define NVM_COMPAT_VALID_CSUM 0x0001 -#define NVM_FUTURE_INIT_WORD1_VALID_CSUM 0x0040 - -#define NVM_ETS_CFG 0x003E -#define NVM_ETS_LTHRES_DELTA_MASK 0x07C0 -#define NVM_ETS_LTHRES_DELTA_SHIFT 6 -#define NVM_ETS_TYPE_MASK 0x0038 -#define NVM_ETS_TYPE_SHIFT 3 -#define NVM_ETS_TYPE_EMC 0x000 -#define NVM_ETS_NUM_SENSORS_MASK 0x0007 -#define NVM_ETS_DATA_LOC_MASK 0x3C00 -#define NVM_ETS_DATA_LOC_SHIFT 10 -#define NVM_ETS_DATA_INDEX_MASK 0x0300 -#define NVM_ETS_DATA_INDEX_SHIFT 8 -#define NVM_ETS_DATA_HTHRESH_MASK 0x00FF -#define NVM_INIT_CONTROL2_REG 0x000F -#define NVM_INIT_CONTROL3_PORT_B 0x0014 -#define NVM_INIT_3GIO_3 0x001A -#define NVM_SWDEF_PINS_CTRL_PORT_0 0x0020 -#define NVM_INIT_CONTROL3_PORT_A 0x0024 -#define NVM_CFG 0x0012 -#define NVM_ALT_MAC_ADDR_PTR 0x0037 -#define NVM_CHECKSUM_REG 0x003F -#define NVM_COMPATIBILITY_REG_3 0x0003 -#define NVM_COMPATIBILITY_BIT_MASK 0x8000 - -#define E1000_NVM_CFG_DONE_PORT_0 0x040000 /* MNG config cycle done */ -#define E1000_NVM_CFG_DONE_PORT_1 0x080000 /* ...for second port */ -#define E1000_NVM_CFG_DONE_PORT_2 0x100000 /* ...for third port */ -#define E1000_NVM_CFG_DONE_PORT_3 0x200000 /* ...for fourth port */ - -#define NVM_82580_LAN_FUNC_OFFSET(a) ((a) ? (0x40 + (0x40 * (a))) : 0) - -/* Mask bits for fields in Word 0x24 of the NVM */ -#define NVM_WORD24_COM_MDIO 0x0008 /* MDIO interface shared */ -#define NVM_WORD24_EXT_MDIO 0x0004 /* MDIO accesses routed extrnl */ -/* Offset of Link Mode bits for 82575/82576 */ -#define NVM_WORD24_LNK_MODE_OFFSET 8 -/* Offset of Link Mode bits for 82580 up */ -#define NVM_WORD24_82580_LNK_MODE_OFFSET 4 - - -/* Mask bits for fields in Word 0x0f of the NVM */ -#define NVM_WORD0F_PAUSE_MASK 0x3000 -#define NVM_WORD0F_PAUSE 0x1000 -#define NVM_WORD0F_ASM_DIR 0x2000 - -/* Mask bits for fields in Word 0x1a of the NVM */ -#define NVM_WORD1A_ASPM_MASK 0x000C - -/* Mask bits for fields in Word 0x03 of the EEPROM */ -#define NVM_COMPAT_LOM 0x0800 - -/* length of string needed to store PBA number */ -#define E1000_PBANUM_LENGTH 11 - -/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ -#define NVM_SUM 0xBABA - -/* PBA (printed board assembly) number words */ -#define NVM_PBA_OFFSET_0 8 -#define NVM_PBA_OFFSET_1 9 -#define NVM_PBA_PTR_GUARD 0xFAFA -#define NVM_RESERVED_WORD 0xFFFF -#define NVM_WORD_SIZE_BASE_SHIFT 6 - -/* NVM Commands - SPI */ -#define NVM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */ -#define NVM_READ_OPCODE_SPI 0x03 /* NVM read opcode */ -#define NVM_WRITE_OPCODE_SPI 0x02 /* NVM write opcode */ -#define NVM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */ -#define NVM_WREN_OPCODE_SPI 0x06 /* NVM set Write Enable latch */ -#define NVM_RDSR_OPCODE_SPI 0x05 /* NVM read Status register */ - -/* SPI NVM Status Register */ -#define NVM_STATUS_RDY_SPI 0x01 - -/* Word definitions for ID LED Settings */ -#define ID_LED_RESERVED_0000 0x0000 -#define ID_LED_RESERVED_FFFF 0xFFFF -#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \ - (ID_LED_OFF1_OFF2 << 8) | \ - (ID_LED_DEF1_DEF2 << 4) | \ - (ID_LED_DEF1_DEF2)) -#define ID_LED_DEF1_DEF2 0x1 -#define ID_LED_DEF1_ON2 0x2 -#define ID_LED_DEF1_OFF2 0x3 -#define ID_LED_ON1_DEF2 0x4 -#define ID_LED_ON1_ON2 0x5 -#define ID_LED_ON1_OFF2 0x6 -#define ID_LED_OFF1_DEF2 0x7 -#define ID_LED_OFF1_ON2 0x8 -#define ID_LED_OFF1_OFF2 0x9 - -#define IGP_ACTIVITY_LED_MASK 0xFFFFF0FF -#define IGP_ACTIVITY_LED_ENABLE 0x0300 -#define IGP_LED3_MODE 0x07000000 - -/* PCI/PCI-X/PCI-EX Config space */ -#define PCI_HEADER_TYPE_REGISTER 0x0E -#define PCIE_LINK_STATUS 0x12 -#define PCIE_DEVICE_CONTROL2 0x28 - -#define PCI_HEADER_TYPE_MULTIFUNC 0x80 -#define PCIE_LINK_WIDTH_MASK 0x3F0 -#define PCIE_LINK_WIDTH_SHIFT 4 -#define PCIE_LINK_SPEED_MASK 0x0F -#define PCIE_LINK_SPEED_2500 0x01 -#define PCIE_LINK_SPEED_5000 0x02 -#define PCIE_DEVICE_CONTROL2_16ms 0x0005 - -#ifndef ETH_ADDR_LEN -#define ETH_ADDR_LEN 6 -#endif - -#define PHY_REVISION_MASK 0xFFFFFFF0 -#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ -#define MAX_PHY_MULTI_PAGE_REG 0xF - -/* Bit definitions for valid PHY IDs. - * I = Integrated - * E = External - */ -#define M88E1000_E_PHY_ID 0x01410C50 -#define M88E1000_I_PHY_ID 0x01410C30 -#define M88E1011_I_PHY_ID 0x01410C20 -#define IGP01E1000_I_PHY_ID 0x02A80380 -#define M88E1111_I_PHY_ID 0x01410CC0 -#define M88E1543_E_PHY_ID 0x01410EA0 -#define M88E1112_E_PHY_ID 0x01410C90 -#define I347AT4_E_PHY_ID 0x01410DC0 -#define M88E1340M_E_PHY_ID 0x01410DF0 -#define GG82563_E_PHY_ID 0x01410CA0 -#define IGP03E1000_E_PHY_ID 0x02A80390 -#define IFE_E_PHY_ID 0x02A80330 -#define IFE_PLUS_E_PHY_ID 0x02A80320 -#define IFE_C_E_PHY_ID 0x02A80310 -#define I82580_I_PHY_ID 0x015403A0 -#define I350_I_PHY_ID 0x015403B0 -#define I210_I_PHY_ID 0x01410C00 -#define IGP04E1000_E_PHY_ID 0x02A80391 -#define M88_VENDOR 0x0141 - -/* M88E1000 Specific Registers */ -#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Reg */ -#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Reg */ -#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Cntrl */ -#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */ - -#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for pg number setting */ -#define M88E1000_PHY_GEN_CONTROL 0x1E /* meaning depends on reg 29 */ - -/* M88E1000 PHY Specific Control Register */ -#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reverse enabled */ -/* MDI Crossover Mode bits 6:5 Manual MDI configuration */ -#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 -#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */ -/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */ -#define M88E1000_PSCR_AUTO_X_1000T 0x0040 -/* Auto crossover enabled all speeds */ -#define M88E1000_PSCR_AUTO_X_MODE 0x0060 -#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Tx */ - -/* M88E1000 PHY Specific Status Register */ -#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ -#define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */ -#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ -/* 0 = <50M - * 1 = 50-80M - * 2 = 80-110M - * 3 = 110-140M - * 4 = >140M - */ -#define M88E1000_PSSR_CABLE_LENGTH 0x0380 -#define M88E1000_PSSR_LINK 0x0400 /* 1=Link up, 0=Link down */ -#define M88E1000_PSSR_SPD_DPLX_RESOLVED 0x0800 /* 1=Speed & Duplex resolved */ -#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */ -#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */ - -#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 - -/* Number of times we will attempt to autonegotiate before downshifting if we - * are the master - */ -#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 -#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 -/* Number of times we will attempt to autonegotiate before downshifting if we - * are the slave - */ -#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 -#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100 -#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ - -/* Intel I347AT4 Registers */ -#define I347AT4_PCDL 0x10 /* PHY Cable Diagnostics Length */ -#define I347AT4_PCDC 0x15 /* PHY Cable Diagnostics Control */ -#define I347AT4_PAGE_SELECT 0x16 - -/* I347AT4 Extended PHY Specific Control Register */ - -/* Number of times we will attempt to autonegotiate before downshifting if we - * are the master - */ -#define I347AT4_PSCR_DOWNSHIFT_ENABLE 0x0800 -#define I347AT4_PSCR_DOWNSHIFT_MASK 0x7000 -#define I347AT4_PSCR_DOWNSHIFT_1X 0x0000 -#define I347AT4_PSCR_DOWNSHIFT_2X 0x1000 -#define I347AT4_PSCR_DOWNSHIFT_3X 0x2000 -#define I347AT4_PSCR_DOWNSHIFT_4X 0x3000 -#define I347AT4_PSCR_DOWNSHIFT_5X 0x4000 -#define I347AT4_PSCR_DOWNSHIFT_6X 0x5000 -#define I347AT4_PSCR_DOWNSHIFT_7X 0x6000 -#define I347AT4_PSCR_DOWNSHIFT_8X 0x7000 - -/* I347AT4 PHY Cable Diagnostics Control */ -#define I347AT4_PCDC_CABLE_LENGTH_UNIT 0x0400 /* 0=cm 1=meters */ - -/* M88E1112 only registers */ -#define M88E1112_VCT_DSP_DISTANCE 0x001A - -/* M88EC018 Rev 2 specific DownShift settings */ -#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00 -#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X 0x0800 - -/* Bits... - * 15-5: page - * 4-0: register offset - */ -#define GG82563_PAGE_SHIFT 5 -#define GG82563_REG(page, reg) \ - (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS)) -#define GG82563_MIN_ALT_REG 30 - -/* GG82563 Specific Registers */ -#define GG82563_PHY_SPEC_CTRL GG82563_REG(0, 16) /* PHY Spec Cntrl */ -#define GG82563_PHY_PAGE_SELECT GG82563_REG(0, 22) /* Page Select */ -#define GG82563_PHY_SPEC_CTRL_2 GG82563_REG(0, 26) /* PHY Spec Cntrl2 */ -#define GG82563_PHY_PAGE_SELECT_ALT GG82563_REG(0, 29) /* Alt Page Select */ - -/* MAC Specific Control Register */ -#define GG82563_PHY_MAC_SPEC_CTRL GG82563_REG(2, 21) - -#define GG82563_PHY_DSP_DISTANCE GG82563_REG(5, 26) /* DSP Distance */ - -/* Page 193 - Port Control Registers */ -/* Kumeran Mode Control */ -#define GG82563_PHY_KMRN_MODE_CTRL GG82563_REG(193, 16) -#define GG82563_PHY_PWR_MGMT_CTRL GG82563_REG(193, 20) /* Pwr Mgt Ctrl */ - -/* Page 194 - KMRN Registers */ -#define GG82563_PHY_INBAND_CTRL GG82563_REG(194, 18) /* Inband Ctrl */ - -/* MDI Control */ -#define E1000_MDIC_REG_MASK 0x001F0000 -#define E1000_MDIC_REG_SHIFT 16 -#define E1000_MDIC_PHY_MASK 0x03E00000 -#define E1000_MDIC_PHY_SHIFT 21 -#define E1000_MDIC_OP_WRITE 0x04000000 -#define E1000_MDIC_OP_READ 0x08000000 -#define E1000_MDIC_READY 0x10000000 -#define E1000_MDIC_ERROR 0x40000000 -#define E1000_MDIC_DEST 0x80000000 - -/* SerDes Control */ -#define E1000_GEN_CTL_READY 0x80000000 -#define E1000_GEN_CTL_ADDRESS_SHIFT 8 -#define E1000_GEN_POLL_TIMEOUT 640 - -/* LinkSec register fields */ -#define E1000_LSECTXCAP_SUM_MASK 0x00FF0000 -#define E1000_LSECTXCAP_SUM_SHIFT 16 -#define E1000_LSECRXCAP_SUM_MASK 0x00FF0000 -#define E1000_LSECRXCAP_SUM_SHIFT 16 - -#define E1000_LSECTXCTRL_EN_MASK 0x00000003 -#define E1000_LSECTXCTRL_DISABLE 0x0 -#define E1000_LSECTXCTRL_AUTH 0x1 -#define E1000_LSECTXCTRL_AUTH_ENCRYPT 0x2 -#define E1000_LSECTXCTRL_AISCI 0x00000020 -#define E1000_LSECTXCTRL_PNTHRSH_MASK 0xFFFFFF00 -#define E1000_LSECTXCTRL_RSV_MASK 0x000000D8 - -#define E1000_LSECRXCTRL_EN_MASK 0x0000000C -#define E1000_LSECRXCTRL_EN_SHIFT 2 -#define E1000_LSECRXCTRL_DISABLE 0x0 -#define E1000_LSECRXCTRL_CHECK 0x1 -#define E1000_LSECRXCTRL_STRICT 0x2 -#define E1000_LSECRXCTRL_DROP 0x3 -#define E1000_LSECRXCTRL_PLSH 0x00000040 -#define E1000_LSECRXCTRL_RP 0x00000080 -#define E1000_LSECRXCTRL_RSV_MASK 0xFFFFFF33 - -/* Tx Rate-Scheduler Config fields */ -#define E1000_RTTBCNRC_RS_ENA 0x80000000 -#define E1000_RTTBCNRC_RF_DEC_MASK 0x00003FFF -#define E1000_RTTBCNRC_RF_INT_SHIFT 14 -#define E1000_RTTBCNRC_RF_INT_MASK \ - (E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT) - -/* DMA Coalescing register fields */ -/* DMA Coalescing Watchdog Timer */ -#define E1000_DMACR_DMACWT_MASK 0x00003FFF -/* DMA Coalescing Rx Threshold */ -#define E1000_DMACR_DMACTHR_MASK 0x00FF0000 -#define E1000_DMACR_DMACTHR_SHIFT 16 -/* Lx when no PCIe transactions */ -#define E1000_DMACR_DMAC_LX_MASK 0x30000000 -#define E1000_DMACR_DMAC_LX_SHIFT 28 -#define E1000_DMACR_DMAC_EN 0x80000000 /* Enable DMA Coalescing */ -/* DMA Coalescing BMC-to-OS Watchdog Enable */ -#define E1000_DMACR_DC_BMC2OSW_EN 0x00008000 - -/* DMA Coalescing Transmit Threshold */ -#define E1000_DMCTXTH_DMCTTHR_MASK 0x00000FFF - -#define E1000_DMCTLX_TTLX_MASK 0x00000FFF /* Time to LX request */ - -/* Rx Traffic Rate Threshold */ -#define E1000_DMCRTRH_UTRESH_MASK 0x0007FFFF -/* Rx packet rate in current window */ -#define E1000_DMCRTRH_LRPRCW 0x80000000 - -/* DMA Coal Rx Traffic Current Count */ -#define E1000_DMCCNT_CCOUNT_MASK 0x01FFFFFF - -/* Flow ctrl Rx Threshold High val */ -#define E1000_FCRTC_RTH_COAL_MASK 0x0003FFF0 -#define E1000_FCRTC_RTH_COAL_SHIFT 4 -/* Lx power decision based on DMA coal */ -#define E1000_PCIEMISC_LX_DECISION 0x00000080 - -#define E1000_RXPBS_CFG_TS_EN 0x80000000 /* Timestamp in Rx buffer */ -#define E1000_RXPBS_SIZE_I210_MASK 0x0000003F /* Rx packet buffer size */ -#define E1000_TXPB0S_SIZE_I210_MASK 0x0000003F /* Tx packet buffer 0 size */ - -/* Proxy Filter Control */ -#define E1000_PROXYFC_D0 0x00000001 /* Enable offload in D0 */ -#define E1000_PROXYFC_EX 0x00000004 /* Directed exact proxy */ -#define E1000_PROXYFC_MC 0x00000008 /* Directed MC Proxy */ -#define E1000_PROXYFC_BC 0x00000010 /* Broadcast Proxy Enable */ -#define E1000_PROXYFC_ARP_DIRECTED 0x00000020 /* Directed ARP Proxy Ena */ -#define E1000_PROXYFC_IPV4 0x00000040 /* Directed IPv4 Enable */ -#define E1000_PROXYFC_IPV6 0x00000080 /* Directed IPv6 Enable */ -#define E1000_PROXYFC_NS 0x00000200 /* IPv6 Neighbor Solicitation */ -#define E1000_PROXYFC_ARP 0x00000800 /* ARP Request Proxy Ena */ -/* Proxy Status */ -#define E1000_PROXYS_CLEAR 0xFFFFFFFF /* Clear */ - -/* Firmware Status */ -#define E1000_FWSTS_FWRI 0x80000000 /* FW Reset Indication */ -/* VF Control */ -#define E1000_VTCTRL_RST 0x04000000 /* Reset VF */ - -#define E1000_STATUS_LAN_ID_MASK 0x00000000C /* Mask for Lan ID field */ -/* Lan ID bit field offset in status register */ -#define E1000_STATUS_LAN_ID_OFFSET 2 -#define E1000_VFTA_ENTRIES 128 -#ifndef E1000_UNUSEDARG -#define E1000_UNUSEDARG -#endif /* E1000_UNUSEDARG */ -#endif /* _E1000_DEFINES_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h deleted file mode 100644 index ed43ef5a..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h +++ /dev/null @@ -1,778 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_HW_H_ -#define _E1000_HW_H_ - -#include "e1000_osdep.h" -#include "e1000_regs.h" -#include "e1000_defines.h" - -struct e1000_hw; - -#define E1000_DEV_ID_82576 0x10C9 -#define E1000_DEV_ID_82576_FIBER 0x10E6 -#define E1000_DEV_ID_82576_SERDES 0x10E7 -#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 -#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 -#define E1000_DEV_ID_82576_NS 0x150A -#define E1000_DEV_ID_82576_NS_SERDES 0x1518 -#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D -#define E1000_DEV_ID_82575EB_COPPER 0x10A7 -#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 -#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 -#define E1000_DEV_ID_82580_COPPER 0x150E -#define E1000_DEV_ID_82580_FIBER 0x150F -#define E1000_DEV_ID_82580_SERDES 0x1510 -#define E1000_DEV_ID_82580_SGMII 0x1511 -#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 -#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 -#define E1000_DEV_ID_I350_COPPER 0x1521 -#define E1000_DEV_ID_I350_FIBER 0x1522 -#define E1000_DEV_ID_I350_SERDES 0x1523 -#define E1000_DEV_ID_I350_SGMII 0x1524 -#define E1000_DEV_ID_I350_DA4 0x1546 -#define E1000_DEV_ID_I210_COPPER 0x1533 -#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 -#define E1000_DEV_ID_I210_COPPER_IT 0x1535 -#define E1000_DEV_ID_I210_FIBER 0x1536 -#define E1000_DEV_ID_I210_SERDES 0x1537 -#define E1000_DEV_ID_I210_SGMII 0x1538 -#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B -#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C -#define E1000_DEV_ID_I211_COPPER 0x1539 -#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 -#define E1000_DEV_ID_I354_SGMII 0x1F41 -#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 -#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 -#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A -#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C -#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 - -#define E1000_REVISION_0 0 -#define E1000_REVISION_1 1 -#define E1000_REVISION_2 2 -#define E1000_REVISION_3 3 -#define E1000_REVISION_4 4 - -#define E1000_FUNC_0 0 -#define E1000_FUNC_1 1 -#define E1000_FUNC_2 2 -#define E1000_FUNC_3 3 - -#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 -#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 -#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2 6 -#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3 9 - -enum e1000_mac_type { - e1000_undefined = 0, - e1000_82575, - e1000_82576, - e1000_82580, - e1000_i350, - e1000_i354, - e1000_i210, - e1000_i211, - e1000_num_macs /* List is 1-based, so subtract 1 for true count. */ -}; - -enum e1000_media_type { - e1000_media_type_unknown = 0, - e1000_media_type_copper = 1, - e1000_media_type_fiber = 2, - e1000_media_type_internal_serdes = 3, - e1000_num_media_types -}; - -enum e1000_nvm_type { - e1000_nvm_unknown = 0, - e1000_nvm_none, - e1000_nvm_eeprom_spi, - e1000_nvm_flash_hw, - e1000_nvm_invm, - e1000_nvm_flash_sw -}; - -enum e1000_nvm_override { - e1000_nvm_override_none = 0, - e1000_nvm_override_spi_small, - e1000_nvm_override_spi_large, -}; - -enum e1000_phy_type { - e1000_phy_unknown = 0, - e1000_phy_none, - e1000_phy_m88, - e1000_phy_igp, - e1000_phy_igp_2, - e1000_phy_gg82563, - e1000_phy_igp_3, - e1000_phy_ife, - e1000_phy_82580, - e1000_phy_vf, - e1000_phy_i210, -}; - -enum e1000_bus_type { - e1000_bus_type_unknown = 0, - e1000_bus_type_pci, - e1000_bus_type_pcix, - e1000_bus_type_pci_express, - e1000_bus_type_reserved -}; - -enum e1000_bus_speed { - e1000_bus_speed_unknown = 0, - e1000_bus_speed_33, - e1000_bus_speed_66, - e1000_bus_speed_100, - e1000_bus_speed_120, - e1000_bus_speed_133, - e1000_bus_speed_2500, - e1000_bus_speed_5000, - e1000_bus_speed_reserved -}; - -enum e1000_bus_width { - e1000_bus_width_unknown = 0, - e1000_bus_width_pcie_x1, - e1000_bus_width_pcie_x2, - e1000_bus_width_pcie_x4 = 4, - e1000_bus_width_pcie_x8 = 8, - e1000_bus_width_32, - e1000_bus_width_64, - e1000_bus_width_reserved -}; - -enum e1000_1000t_rx_status { - e1000_1000t_rx_status_not_ok = 0, - e1000_1000t_rx_status_ok, - e1000_1000t_rx_status_undefined = 0xFF -}; - -enum e1000_rev_polarity { - e1000_rev_polarity_normal = 0, - e1000_rev_polarity_reversed, - e1000_rev_polarity_undefined = 0xFF -}; - -enum e1000_fc_mode { - e1000_fc_none = 0, - e1000_fc_rx_pause, - e1000_fc_tx_pause, - e1000_fc_full, - e1000_fc_default = 0xFF -}; - -enum e1000_ms_type { - e1000_ms_hw_default = 0, - e1000_ms_force_master, - e1000_ms_force_slave, - e1000_ms_auto -}; - -enum e1000_smart_speed { - e1000_smart_speed_default = 0, - e1000_smart_speed_on, - e1000_smart_speed_off -}; - -enum e1000_serdes_link_state { - e1000_serdes_link_down = 0, - e1000_serdes_link_autoneg_progress, - e1000_serdes_link_autoneg_complete, - e1000_serdes_link_forced_up -}; - -#ifndef __le16 -#define __le16 u16 -#endif -#ifndef __le32 -#define __le32 u32 -#endif -#ifndef __le64 -#define __le64 u64 -#endif -/* Receive Descriptor */ -struct e1000_rx_desc { - __le64 buffer_addr; /* Address of the descriptor's data buffer */ - __le16 length; /* Length of data DMAed into data buffer */ - __le16 csum; /* Packet checksum */ - u8 status; /* Descriptor status */ - u8 errors; /* Descriptor Errors */ - __le16 special; -}; - -/* Receive Descriptor - Extended */ -union e1000_rx_desc_extended { - struct { - __le64 buffer_addr; - __le64 reserved; - } read; - struct { - struct { - __le32 mrq; /* Multiple Rx Queues */ - union { - __le32 rss; /* RSS Hash */ - struct { - __le16 ip_id; /* IP id */ - __le16 csum; /* Packet Checksum */ - } csum_ip; - } hi_dword; - } lower; - struct { - __le32 status_error; /* ext status/error */ - __le16 length; - __le16 vlan; /* VLAN tag */ - } upper; - } wb; /* writeback */ -}; - -#define MAX_PS_BUFFERS 4 - -/* Number of packet split data buffers (not including the header buffer) */ -#define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1) - -/* Receive Descriptor - Packet Split */ -union e1000_rx_desc_packet_split { - struct { - /* one buffer for protocol header(s), three data buffers */ - __le64 buffer_addr[MAX_PS_BUFFERS]; - } read; - struct { - struct { - __le32 mrq; /* Multiple Rx Queues */ - union { - __le32 rss; /* RSS Hash */ - struct { - __le16 ip_id; /* IP id */ - __le16 csum; /* Packet Checksum */ - } csum_ip; - } hi_dword; - } lower; - struct { - __le32 status_error; /* ext status/error */ - __le16 length0; /* length of buffer 0 */ - __le16 vlan; /* VLAN tag */ - } middle; - struct { - __le16 header_status; - /* length of buffers 1-3 */ - __le16 length[PS_PAGE_BUFFERS]; - } upper; - __le64 reserved; - } wb; /* writeback */ -}; - -/* Transmit Descriptor */ -struct e1000_tx_desc { - __le64 buffer_addr; /* Address of the descriptor's data buffer */ - union { - __le32 data; - struct { - __le16 length; /* Data buffer length */ - u8 cso; /* Checksum offset */ - u8 cmd; /* Descriptor control */ - } flags; - } lower; - union { - __le32 data; - struct { - u8 status; /* Descriptor status */ - u8 css; /* Checksum start */ - __le16 special; - } fields; - } upper; -}; - -/* Offload Context Descriptor */ -struct e1000_context_desc { - union { - __le32 ip_config; - struct { - u8 ipcss; /* IP checksum start */ - u8 ipcso; /* IP checksum offset */ - __le16 ipcse; /* IP checksum end */ - } ip_fields; - } lower_setup; - union { - __le32 tcp_config; - struct { - u8 tucss; /* TCP checksum start */ - u8 tucso; /* TCP checksum offset */ - __le16 tucse; /* TCP checksum end */ - } tcp_fields; - } upper_setup; - __le32 cmd_and_length; - union { - __le32 data; - struct { - u8 status; /* Descriptor status */ - u8 hdr_len; /* Header length */ - __le16 mss; /* Maximum segment size */ - } fields; - } tcp_seg_setup; -}; - -/* Offload data descriptor */ -struct e1000_data_desc { - __le64 buffer_addr; /* Address of the descriptor's buffer address */ - union { - __le32 data; - struct { - __le16 length; /* Data buffer length */ - u8 typ_len_ext; - u8 cmd; - } flags; - } lower; - union { - __le32 data; - struct { - u8 status; /* Descriptor status */ - u8 popts; /* Packet Options */ - __le16 special; - } fields; - } upper; -}; - -/* Statistics counters collected by the MAC */ -struct e1000_hw_stats { - u64 crcerrs; - u64 algnerrc; - u64 symerrs; - u64 rxerrc; - u64 mpc; - u64 scc; - u64 ecol; - u64 mcc; - u64 latecol; - u64 colc; - u64 dc; - u64 tncrs; - u64 sec; - u64 cexterr; - u64 rlec; - u64 xonrxc; - u64 xontxc; - u64 xoffrxc; - u64 xofftxc; - u64 fcruc; - u64 prc64; - u64 prc127; - u64 prc255; - u64 prc511; - u64 prc1023; - u64 prc1522; - u64 gprc; - u64 bprc; - u64 mprc; - u64 gptc; - u64 gorc; - u64 gotc; - u64 rnbc; - u64 ruc; - u64 rfc; - u64 roc; - u64 rjc; - u64 mgprc; - u64 mgpdc; - u64 mgptc; - u64 tor; - u64 tot; - u64 tpr; - u64 tpt; - u64 ptc64; - u64 ptc127; - u64 ptc255; - u64 ptc511; - u64 ptc1023; - u64 ptc1522; - u64 mptc; - u64 bptc; - u64 tsctc; - u64 tsctfc; - u64 iac; - u64 icrxptc; - u64 icrxatc; - u64 ictxptc; - u64 ictxatc; - u64 ictxqec; - u64 ictxqmtc; - u64 icrxdmtc; - u64 icrxoc; - u64 cbtmpc; - u64 htdpmc; - u64 cbrdpc; - u64 cbrmpc; - u64 rpthc; - u64 hgptc; - u64 htcbdpc; - u64 hgorc; - u64 hgotc; - u64 lenerrs; - u64 scvpc; - u64 hrmpc; - u64 doosync; - u64 o2bgptc; - u64 o2bspc; - u64 b2ospc; - u64 b2ogprc; -}; - - -struct e1000_phy_stats { - u32 idle_errors; - u32 receive_errors; -}; - -struct e1000_host_mng_dhcp_cookie { - u32 signature; - u8 status; - u8 reserved0; - u16 vlan_id; - u32 reserved1; - u16 reserved2; - u8 reserved3; - u8 checksum; -}; - -/* Host Interface "Rev 1" */ -struct e1000_host_command_header { - u8 command_id; - u8 command_length; - u8 command_options; - u8 checksum; -}; - -#define E1000_HI_MAX_DATA_LENGTH 252 -struct e1000_host_command_info { - struct e1000_host_command_header command_header; - u8 command_data[E1000_HI_MAX_DATA_LENGTH]; -}; - -/* Host Interface "Rev 2" */ -struct e1000_host_mng_command_header { - u8 command_id; - u8 checksum; - u16 reserved1; - u16 reserved2; - u16 command_length; -}; - -#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 -struct e1000_host_mng_command_info { - struct e1000_host_mng_command_header command_header; - u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; -}; - -#include "e1000_mac.h" -#include "e1000_phy.h" -#include "e1000_nvm.h" -#include "e1000_manage.h" -#include "e1000_mbx.h" - -/* Function pointers for the MAC. */ -struct e1000_mac_operations { - s32 (*init_params)(struct e1000_hw *); - s32 (*id_led_init)(struct e1000_hw *); - s32 (*blink_led)(struct e1000_hw *); - bool (*check_mng_mode)(struct e1000_hw *); - s32 (*check_for_link)(struct e1000_hw *); - s32 (*cleanup_led)(struct e1000_hw *); - void (*clear_hw_cntrs)(struct e1000_hw *); - void (*clear_vfta)(struct e1000_hw *); - s32 (*get_bus_info)(struct e1000_hw *); - void (*set_lan_id)(struct e1000_hw *); - s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); - s32 (*led_on)(struct e1000_hw *); - s32 (*led_off)(struct e1000_hw *); - void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32); - s32 (*reset_hw)(struct e1000_hw *); - s32 (*init_hw)(struct e1000_hw *); - void (*shutdown_serdes)(struct e1000_hw *); - void (*power_up_serdes)(struct e1000_hw *); - s32 (*setup_link)(struct e1000_hw *); - s32 (*setup_physical_interface)(struct e1000_hw *); - s32 (*setup_led)(struct e1000_hw *); - void (*write_vfta)(struct e1000_hw *, u32, u32); - void (*config_collision_dist)(struct e1000_hw *); - void (*rar_set)(struct e1000_hw *, u8*, u32); - s32 (*read_mac_addr)(struct e1000_hw *); - s32 (*validate_mdi_setting)(struct e1000_hw *); - s32 (*get_thermal_sensor_data)(struct e1000_hw *); - s32 (*init_thermal_sensor_thresh)(struct e1000_hw *); - s32 (*acquire_swfw_sync)(struct e1000_hw *, u16); - void (*release_swfw_sync)(struct e1000_hw *, u16); -}; - -/* When to use various PHY register access functions: - * - * Func Caller - * Function Does Does When to use - * ~~~~~~~~~~~~ ~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * X_reg L,P,A n/a for simple PHY reg accesses - * X_reg_locked P,A L for multiple accesses of different regs - * on different pages - * X_reg_page A L,P for multiple accesses of different regs - * on the same page - * - * Where X=[read|write], L=locking, P=sets page, A=register access - * - */ -struct e1000_phy_operations { - s32 (*init_params)(struct e1000_hw *); - s32 (*acquire)(struct e1000_hw *); - s32 (*check_polarity)(struct e1000_hw *); - s32 (*check_reset_block)(struct e1000_hw *); - s32 (*commit)(struct e1000_hw *); - s32 (*force_speed_duplex)(struct e1000_hw *); - s32 (*get_cfg_done)(struct e1000_hw *hw); - s32 (*get_cable_length)(struct e1000_hw *); - s32 (*get_info)(struct e1000_hw *); - s32 (*set_page)(struct e1000_hw *, u16); - s32 (*read_reg)(struct e1000_hw *, u32, u16 *); - s32 (*read_reg_locked)(struct e1000_hw *, u32, u16 *); - s32 (*read_reg_page)(struct e1000_hw *, u32, u16 *); - void (*release)(struct e1000_hw *); - s32 (*reset)(struct e1000_hw *); - s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); - s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); - s32 (*write_reg)(struct e1000_hw *, u32, u16); - s32 (*write_reg_locked)(struct e1000_hw *, u32, u16); - s32 (*write_reg_page)(struct e1000_hw *, u32, u16); - void (*power_up)(struct e1000_hw *); - void (*power_down)(struct e1000_hw *); - s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *); - s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8); -}; - -/* Function pointers for the NVM. */ -struct e1000_nvm_operations { - s32 (*init_params)(struct e1000_hw *); - s32 (*acquire)(struct e1000_hw *); - s32 (*read)(struct e1000_hw *, u16, u16, u16 *); - void (*release)(struct e1000_hw *); - void (*reload)(struct e1000_hw *); - s32 (*update)(struct e1000_hw *); - s32 (*valid_led_default)(struct e1000_hw *, u16 *); - s32 (*validate)(struct e1000_hw *); - s32 (*write)(struct e1000_hw *, u16, u16, u16 *); -}; - -#define E1000_MAX_SENSORS 3 - -struct e1000_thermal_diode_data { - u8 location; - u8 temp; - u8 caution_thresh; - u8 max_op_thresh; -}; - -struct e1000_thermal_sensor_data { - struct e1000_thermal_diode_data sensor[E1000_MAX_SENSORS]; -}; - -struct e1000_mac_info { - struct e1000_mac_operations ops; - u8 addr[ETH_ADDR_LEN]; - u8 perm_addr[ETH_ADDR_LEN]; - - enum e1000_mac_type type; - - u32 collision_delta; - u32 ledctl_default; - u32 ledctl_mode1; - u32 ledctl_mode2; - u32 mc_filter_type; - u32 tx_packet_delta; - u32 txcw; - - u16 current_ifs_val; - u16 ifs_max_val; - u16 ifs_min_val; - u16 ifs_ratio; - u16 ifs_step_size; - u16 mta_reg_count; - u16 uta_reg_count; - - /* Maximum size of the MTA register table in all supported adapters */ - #define MAX_MTA_REG 128 - u32 mta_shadow[MAX_MTA_REG]; - u16 rar_entry_count; - - u8 forced_speed_duplex; - - bool adaptive_ifs; - bool has_fwsm; - bool arc_subsystem_valid; - bool asf_firmware_present; - bool autoneg; - bool autoneg_failed; - bool get_link_status; - bool in_ifs_mode; - enum e1000_serdes_link_state serdes_link_state; - bool serdes_has_link; - bool tx_pkt_filtering; - struct e1000_thermal_sensor_data thermal_sensor_data; -}; - -struct e1000_phy_info { - struct e1000_phy_operations ops; - enum e1000_phy_type type; - - enum e1000_1000t_rx_status local_rx; - enum e1000_1000t_rx_status remote_rx; - enum e1000_ms_type ms_type; - enum e1000_ms_type original_ms_type; - enum e1000_rev_polarity cable_polarity; - enum e1000_smart_speed smart_speed; - - u32 addr; - u32 id; - u32 reset_delay_us; /* in usec */ - u32 revision; - - enum e1000_media_type media_type; - - u16 autoneg_advertised; - u16 autoneg_mask; - u16 cable_length; - u16 max_cable_length; - u16 min_cable_length; - - u8 mdix; - - bool disable_polarity_correction; - bool is_mdix; - bool polarity_correction; - bool reset_disable; - bool speed_downgraded; - bool autoneg_wait_to_complete; -}; - -struct e1000_nvm_info { - struct e1000_nvm_operations ops; - enum e1000_nvm_type type; - enum e1000_nvm_override override; - - u32 flash_bank_size; - u32 flash_base_addr; - - u16 word_size; - u16 delay_usec; - u16 address_bits; - u16 opcode_bits; - u16 page_size; -}; - -struct e1000_bus_info { - enum e1000_bus_type type; - enum e1000_bus_speed speed; - enum e1000_bus_width width; - - u16 func; - u16 pci_cmd_word; -}; - -struct e1000_fc_info { - u32 high_water; /* Flow control high-water mark */ - u32 low_water; /* Flow control low-water mark */ - u16 pause_time; /* Flow control pause timer */ - u16 refresh_time; /* Flow control refresh timer */ - bool send_xon; /* Flow control send XON */ - bool strict_ieee; /* Strict IEEE mode */ - enum e1000_fc_mode current_mode; /* FC mode in effect */ - enum e1000_fc_mode requested_mode; /* FC mode requested by caller */ -}; - -struct e1000_mbx_operations { - s32 (*init_params)(struct e1000_hw *hw); - s32 (*read)(struct e1000_hw *, u32 *, u16, u16); - s32 (*write)(struct e1000_hw *, u32 *, u16, u16); - s32 (*read_posted)(struct e1000_hw *, u32 *, u16, u16); - s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16); - s32 (*check_for_msg)(struct e1000_hw *, u16); - s32 (*check_for_ack)(struct e1000_hw *, u16); - s32 (*check_for_rst)(struct e1000_hw *, u16); -}; - -struct e1000_mbx_stats { - u32 msgs_tx; - u32 msgs_rx; - - u32 acks; - u32 reqs; - u32 rsts; -}; - -struct e1000_mbx_info { - struct e1000_mbx_operations ops; - struct e1000_mbx_stats stats; - u32 timeout; - u32 usec_delay; - u16 size; -}; - -struct e1000_dev_spec_82575 { - bool sgmii_active; - bool global_device_reset; - bool eee_disable; - bool module_plugged; - bool clear_semaphore_once; - u32 mtu; - struct sfp_e1000_flags eth_flags; - u8 media_port; - bool media_changed; -}; - -struct e1000_dev_spec_vf { - u32 vf_number; - u32 v2p_mailbox; -}; - -struct e1000_hw { - void *back; - - u8 __iomem *hw_addr; - u8 __iomem *flash_address; - unsigned long io_base; - - struct e1000_mac_info mac; - struct e1000_fc_info fc; - struct e1000_phy_info phy; - struct e1000_nvm_info nvm; - struct e1000_bus_info bus; - struct e1000_mbx_info mbx; - struct e1000_host_mng_dhcp_cookie mng_cookie; - - union { - struct e1000_dev_spec_82575 _82575; - struct e1000_dev_spec_vf vf; - } dev_spec; - - u16 device_id; - u16 subsystem_vendor_id; - u16 subsystem_device_id; - u16 vendor_id; - - u8 revision_id; -}; - -#include "e1000_82575.h" -#include "e1000_i210.h" - -/* These functions must be implemented by drivers */ -s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); -s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); - -#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c deleted file mode 100644 index a4fabc3a..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c +++ /dev/null @@ -1,894 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "e1000_api.h" - - -static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw); -static void e1000_release_nvm_i210(struct e1000_hw *hw); -static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw); -static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data); -static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw); -static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data); - -/** - * e1000_acquire_nvm_i210 - Request for access to EEPROM - * @hw: pointer to the HW structure - * - * Acquire the necessary semaphores for exclusive access to the EEPROM. - * Set the EEPROM access request bit and wait for EEPROM access grant bit. - * Return successful if access grant bit set, else clear the request for - * EEPROM access and return -E1000_ERR_NVM (-1). - **/ -static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw) -{ - s32 ret_val; - - DEBUGFUNC("e1000_acquire_nvm_i210"); - - ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); - - return ret_val; -} - -/** - * e1000_release_nvm_i210 - Release exclusive access to EEPROM - * @hw: pointer to the HW structure - * - * Stop any current commands to the EEPROM and clear the EEPROM request bit, - * then release the semaphores acquired. - **/ -static void e1000_release_nvm_i210(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_release_nvm_i210"); - - e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); -} - -/** - * e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Acquire the SW/FW semaphore to access the PHY or NVM. The mask - * will also specify which port we're acquiring the lock for. - **/ -s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 16; - s32 ret_val = E1000_SUCCESS; - s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ - - DEBUGFUNC("e1000_acquire_swfw_sync_i210"); - - while (i < timeout) { - if (e1000_get_hw_semaphore_i210(hw)) { - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - if (!(swfw_sync & (fwmask | swmask))) - break; - - /* - * Firmware currently using resource (fwmask) - * or other software thread using resource (swmask) - */ - e1000_put_hw_semaphore_generic(hw); - msec_delay_irq(5); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync |= swmask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); - -out: - return ret_val; -} - -/** - * e1000_release_swfw_sync_i210 - Release SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Release the SW/FW semaphore used to access the PHY or NVM. The mask - * will also specify which port we're releasing the lock for. - **/ -void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - - DEBUGFUNC("e1000_release_swfw_sync_i210"); - - while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS) - ; /* Empty */ - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - swfw_sync &= ~mask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); -} - -/** - * e1000_get_hw_semaphore_i210 - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM - **/ -static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw) -{ - u32 swsm; - s32 timeout = hw->nvm.word_size + 1; - s32 i = 0; - - DEBUGFUNC("e1000_get_hw_semaphore_i210"); - - /* Get the SW semaphore */ - while (i < timeout) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - i++; - } - - if (i == timeout) { - /* In rare circumstances, the SW semaphore may already be held - * unintentionally. Clear the semaphore once before giving up. - */ - if (hw->dev_spec._82575.clear_semaphore_once) { - hw->dev_spec._82575.clear_semaphore_once = false; - e1000_put_hw_semaphore_generic(hw); - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - } - } - - /* If we do not have the semaphore here, we have to give up. */ - if (i == timeout) { - DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); - return -E1000_ERR_NVM; - } - } - - /* Get the FW semaphore. */ - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); - - /* Semaphore acquired if bit latched */ - if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) - break; - - usec_delay(50); - } - - if (i == timeout) { - /* Release semaphores */ - e1000_put_hw_semaphore_generic(hw); - DEBUGOUT("Driver can't access the NVM\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; -} - -/** - * e1000_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register - * @hw: pointer to the HW structure - * @offset: offset of word in the Shadow Ram to read - * @words: number of words to read - * @data: word read from the Shadow Ram - * - * Reads a 16 bit word from the Shadow Ram using the EERD register. - * Uses necessary synchronization semaphores. - **/ -s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data) -{ - s32 status = E1000_SUCCESS; - u16 i, count; - - DEBUGFUNC("e1000_read_nvm_srrd_i210"); - - /* We cannot hold synchronization semaphores for too long, - * because of forceful takeover procedure. However it is more efficient - * to read in bursts than synchronizing access for each word. */ - for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { - count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? - E1000_EERD_EEWR_MAX_COUNT : (words - i); - if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { - status = e1000_read_nvm_eerd(hw, offset, count, - data + i); - hw->nvm.ops.release(hw); - } else { - status = E1000_ERR_SWFW_SYNC; - } - - if (status != E1000_SUCCESS) - break; - } - - return status; -} - -/** - * e1000_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR - * @hw: pointer to the HW structure - * @offset: offset within the Shadow RAM to be written to - * @words: number of words to write - * @data: 16 bit word(s) to be written to the Shadow RAM - * - * Writes data to Shadow RAM at offset using EEWR register. - * - * If e1000_update_nvm_checksum is not called after this function , the - * data will not be committed to FLASH and also Shadow RAM will most likely - * contain an invalid checksum. - * - * If error code is returned, data and Shadow RAM may be inconsistent - buffer - * partially written. - **/ -s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data) -{ - s32 status = E1000_SUCCESS; - u16 i, count; - - DEBUGFUNC("e1000_write_nvm_srwr_i210"); - - /* We cannot hold synchronization semaphores for too long, - * because of forceful takeover procedure. However it is more efficient - * to write in bursts than synchronizing access for each word. */ - for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { - count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? - E1000_EERD_EEWR_MAX_COUNT : (words - i); - if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { - status = e1000_write_nvm_srwr(hw, offset, count, - data + i); - hw->nvm.ops.release(hw); - } else { - status = E1000_ERR_SWFW_SYNC; - } - - if (status != E1000_SUCCESS) - break; - } - - return status; -} - -/** - * e1000_write_nvm_srwr - Write to Shadow Ram using EEWR - * @hw: pointer to the HW structure - * @offset: offset within the Shadow Ram to be written to - * @words: number of words to write - * @data: 16 bit word(s) to be written to the Shadow Ram - * - * Writes data to Shadow Ram at offset using EEWR register. - * - * If e1000_update_nvm_checksum is not called after this function , the - * Shadow Ram will most likely contain an invalid checksum. - **/ -static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - u32 i, k, eewr = 0; - u32 attempts = 100000; - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("e1000_write_nvm_srwr"); - - /* - * A check for invalid values: offset too large, too many words, - * too many words for the offset, and not enough words. - */ - if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || - (words == 0)) { - DEBUGOUT("nvm parameter(s) out of bounds\n"); - ret_val = -E1000_ERR_NVM; - goto out; - } - - for (i = 0; i < words; i++) { - eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) | - (data[i] << E1000_NVM_RW_REG_DATA) | - E1000_NVM_RW_REG_START; - - E1000_WRITE_REG(hw, E1000_SRWR, eewr); - - for (k = 0; k < attempts; k++) { - if (E1000_NVM_RW_REG_DONE & - E1000_READ_REG(hw, E1000_SRWR)) { - ret_val = E1000_SUCCESS; - break; - } - usec_delay(5); - } - - if (ret_val != E1000_SUCCESS) { - DEBUGOUT("Shadow RAM write EEWR timed out\n"); - break; - } - } - -out: - return ret_val; -} - -/** e1000_read_invm_word_i210 - Reads OTP - * @hw: pointer to the HW structure - * @address: the word address (aka eeprom offset) to read - * @data: pointer to the data read - * - * Reads 16-bit words from the OTP. Return error when the word is not - * stored in OTP. - **/ -static s32 e1000_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data) -{ - s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; - u32 invm_dword; - u16 i; - u8 record_type, word_address; - - DEBUGFUNC("e1000_read_invm_word_i210"); - - for (i = 0; i < E1000_INVM_SIZE; i++) { - invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i)); - /* Get record type */ - record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword); - if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE) - break; - if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE) - i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS; - if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE) - i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS; - if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) { - word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword); - if (word_address == address) { - *data = INVM_DWORD_TO_WORD_DATA(invm_dword); - DEBUGOUT2("Read INVM Word 0x%02x = %x", - address, *data); - status = E1000_SUCCESS; - break; - } - } - } - if (status != E1000_SUCCESS) - DEBUGOUT1("Requested word 0x%02x not found in OTP\n", address); - return status; -} - -/** e1000_read_invm_i210 - Read invm wrapper function for I210/I211 - * @hw: pointer to the HW structure - * @address: the word address (aka eeprom offset) to read - * @data: pointer to the data read - * - * Wrapper function to return data formerly found in the NVM. - **/ -static s32 e1000_read_invm_i210(struct e1000_hw *hw, u16 offset, - u16 E1000_UNUSEDARG words, u16 *data) -{ - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("e1000_read_invm_i210"); - - /* Only the MAC addr is required to be present in the iNVM */ - switch (offset) { - case NVM_MAC_ADDR: - ret_val = e1000_read_invm_word_i210(hw, (u8)offset, &data[0]); - ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+1, - &data[1]); - ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+2, - &data[2]); - if (ret_val != E1000_SUCCESS) - DEBUGOUT("MAC Addr not found in iNVM\n"); - break; - case NVM_INIT_CTRL_2: - ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); - if (ret_val != E1000_SUCCESS) { - *data = NVM_INIT_CTRL_2_DEFAULT_I211; - ret_val = E1000_SUCCESS; - } - break; - case NVM_INIT_CTRL_4: - ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); - if (ret_val != E1000_SUCCESS) { - *data = NVM_INIT_CTRL_4_DEFAULT_I211; - ret_val = E1000_SUCCESS; - } - break; - case NVM_LED_1_CFG: - ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); - if (ret_val != E1000_SUCCESS) { - *data = NVM_LED_1_CFG_DEFAULT_I211; - ret_val = E1000_SUCCESS; - } - break; - case NVM_LED_0_2_CFG: - ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); - if (ret_val != E1000_SUCCESS) { - *data = NVM_LED_0_2_CFG_DEFAULT_I211; - ret_val = E1000_SUCCESS; - } - break; - case NVM_ID_LED_SETTINGS: - ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); - if (ret_val != E1000_SUCCESS) { - *data = ID_LED_RESERVED_FFFF; - ret_val = E1000_SUCCESS; - } - break; - case NVM_SUB_DEV_ID: - *data = hw->subsystem_device_id; - break; - case NVM_SUB_VEN_ID: - *data = hw->subsystem_vendor_id; - break; - case NVM_DEV_ID: - *data = hw->device_id; - break; - case NVM_VEN_ID: - *data = hw->vendor_id; - break; - default: - DEBUGOUT1("NVM word 0x%02x is not mapped.\n", offset); - *data = NVM_RESERVED_WORD; - break; - } - return ret_val; -} - -/** - * e1000_read_invm_version - Reads iNVM version and image type - * @hw: pointer to the HW structure - * @invm_ver: version structure for the version read - * - * Reads iNVM version and image type. - **/ -s32 e1000_read_invm_version(struct e1000_hw *hw, - struct e1000_fw_version *invm_ver) -{ - u32 *record = NULL; - u32 *next_record = NULL; - u32 i = 0; - u32 invm_dword = 0; - u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE / - E1000_INVM_RECORD_SIZE_IN_BYTES); - u32 buffer[E1000_INVM_SIZE]; - s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; - u16 version = 0; - - DEBUGFUNC("e1000_read_invm_version"); - - /* Read iNVM memory */ - for (i = 0; i < E1000_INVM_SIZE; i++) { - invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i)); - buffer[i] = invm_dword; - } - - /* Read version number */ - for (i = 1; i < invm_blocks; i++) { - record = &buffer[invm_blocks - i]; - next_record = &buffer[invm_blocks - i + 1]; - - /* Check if we have first version location used */ - if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) { - version = 0; - status = E1000_SUCCESS; - break; - } - /* Check if we have second version location used */ - else if ((i == 1) && - ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) { - version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; - status = E1000_SUCCESS; - break; - } - /* - * Check if we have odd version location - * used and it is the last one used - */ - else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) && - ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) && - (i != 1))) { - version = (*next_record & E1000_INVM_VER_FIELD_TWO) - >> 13; - status = E1000_SUCCESS; - break; - } - /* - * Check if we have even version location - * used and it is the last one used - */ - else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) && - ((*record & 0x3) == 0)) { - version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; - status = E1000_SUCCESS; - break; - } - } - - if (status == E1000_SUCCESS) { - invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK) - >> E1000_INVM_MAJOR_SHIFT; - invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK; - } - /* Read Image Type */ - for (i = 1; i < invm_blocks; i++) { - record = &buffer[invm_blocks - i]; - next_record = &buffer[invm_blocks - i + 1]; - - /* Check if we have image type in first location used */ - if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) { - invm_ver->invm_img_type = 0; - status = E1000_SUCCESS; - break; - } - /* Check if we have image type in first location used */ - else if ((((*record & 0x3) == 0) && - ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) || - ((((*record & 0x3) != 0) && (i != 1)))) { - invm_ver->invm_img_type = - (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23; - status = E1000_SUCCESS; - break; - } - } - return status; -} - -/** - * e1000_validate_nvm_checksum_i210 - Validate EEPROM checksum - * @hw: pointer to the HW structure - * - * Calculates the EEPROM checksum by reading/adding each word of the EEPROM - * and then verifies that the sum of the EEPROM is equal to 0xBABA. - **/ -s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw) -{ - s32 status = E1000_SUCCESS; - s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *); - - DEBUGFUNC("e1000_validate_nvm_checksum_i210"); - - if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { - - /* - * Replace the read function with semaphore grabbing with - * the one that skips this for a while. - * We have semaphore taken already here. - */ - read_op_ptr = hw->nvm.ops.read; - hw->nvm.ops.read = e1000_read_nvm_eerd; - - status = e1000_validate_nvm_checksum_generic(hw); - - /* Revert original read operation. */ - hw->nvm.ops.read = read_op_ptr; - - hw->nvm.ops.release(hw); - } else { - status = E1000_ERR_SWFW_SYNC; - } - - return status; -} - - -/** - * e1000_update_nvm_checksum_i210 - Update EEPROM checksum - * @hw: pointer to the HW structure - * - * Updates the EEPROM checksum by reading/adding each word of the EEPROM - * up to the checksum. Then calculates the EEPROM checksum and writes the - * value to the EEPROM. Next commit EEPROM data onto the Flash. - **/ -s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - u16 checksum = 0; - u16 i, nvm_data; - - DEBUGFUNC("e1000_update_nvm_checksum_i210"); - - /* - * Read the first word from the EEPROM. If this times out or fails, do - * not continue or we could be in for a very long wait while every - * EEPROM read fails - */ - ret_val = e1000_read_nvm_eerd(hw, 0, 1, &nvm_data); - if (ret_val != E1000_SUCCESS) { - DEBUGOUT("EEPROM read failed\n"); - goto out; - } - - if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { - /* - * Do not use hw->nvm.ops.write, hw->nvm.ops.read - * because we do not want to take the synchronization - * semaphores twice here. - */ - - for (i = 0; i < NVM_CHECKSUM_REG; i++) { - ret_val = e1000_read_nvm_eerd(hw, i, 1, &nvm_data); - if (ret_val) { - hw->nvm.ops.release(hw); - DEBUGOUT("NVM Read Error while updating checksum.\n"); - goto out; - } - checksum += nvm_data; - } - checksum = (u16) NVM_SUM - checksum; - ret_val = e1000_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, - &checksum); - if (ret_val != E1000_SUCCESS) { - hw->nvm.ops.release(hw); - DEBUGOUT("NVM Write Error while updating checksum.\n"); - goto out; - } - - hw->nvm.ops.release(hw); - - ret_val = e1000_update_flash_i210(hw); - } else { - ret_val = E1000_ERR_SWFW_SYNC; - } -out: - return ret_val; -} - -/** - * e1000_get_flash_presence_i210 - Check if flash device is detected. - * @hw: pointer to the HW structure - * - **/ -bool e1000_get_flash_presence_i210(struct e1000_hw *hw) -{ - u32 eec = 0; - bool ret_val = false; - - DEBUGFUNC("e1000_get_flash_presence_i210"); - - eec = E1000_READ_REG(hw, E1000_EECD); - - if (eec & E1000_EECD_FLASH_DETECTED_I210) - ret_val = true; - - return ret_val; -} - -/** - * e1000_update_flash_i210 - Commit EEPROM to the flash - * @hw: pointer to the HW structure - * - **/ -s32 e1000_update_flash_i210(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - u32 flup; - - DEBUGFUNC("e1000_update_flash_i210"); - - ret_val = e1000_pool_flash_update_done_i210(hw); - if (ret_val == -E1000_ERR_NVM) { - DEBUGOUT("Flash update time out\n"); - goto out; - } - - flup = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD_I210; - E1000_WRITE_REG(hw, E1000_EECD, flup); - - ret_val = e1000_pool_flash_update_done_i210(hw); - if (ret_val == E1000_SUCCESS) - DEBUGOUT("Flash update complete\n"); - else - DEBUGOUT("Flash update time out\n"); - -out: - return ret_val; -} - -/** - * e1000_pool_flash_update_done_i210 - Pool FLUDONE status. - * @hw: pointer to the HW structure - * - **/ -s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw) -{ - s32 ret_val = -E1000_ERR_NVM; - u32 i, reg; - - DEBUGFUNC("e1000_pool_flash_update_done_i210"); - - for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) { - reg = E1000_READ_REG(hw, E1000_EECD); - if (reg & E1000_EECD_FLUDONE_I210) { - ret_val = E1000_SUCCESS; - break; - } - usec_delay(5); - } - - return ret_val; -} - -/** - * e1000_init_nvm_params_i210 - Initialize i210 NVM function pointers - * @hw: pointer to the HW structure - * - * Initialize the i210/i211 NVM parameters and function pointers. - **/ -static s32 e1000_init_nvm_params_i210(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - struct e1000_nvm_info *nvm = &hw->nvm; - - DEBUGFUNC("e1000_init_nvm_params_i210"); - - ret_val = e1000_init_nvm_params_82575(hw); - nvm->ops.acquire = e1000_acquire_nvm_i210; - nvm->ops.release = e1000_release_nvm_i210; - nvm->ops.valid_led_default = e1000_valid_led_default_i210; - if (e1000_get_flash_presence_i210(hw)) { - hw->nvm.type = e1000_nvm_flash_hw; - nvm->ops.read = e1000_read_nvm_srrd_i210; - nvm->ops.write = e1000_write_nvm_srwr_i210; - nvm->ops.validate = e1000_validate_nvm_checksum_i210; - nvm->ops.update = e1000_update_nvm_checksum_i210; - } else { - hw->nvm.type = e1000_nvm_invm; - nvm->ops.read = e1000_read_invm_i210; - nvm->ops.write = e1000_null_write_nvm; - nvm->ops.validate = e1000_null_ops_generic; - nvm->ops.update = e1000_null_ops_generic; - } - return ret_val; -} - -/** - * e1000_init_function_pointers_i210 - Init func ptrs. - * @hw: pointer to the HW structure - * - * Called to initialize all function pointers and parameters. - **/ -void e1000_init_function_pointers_i210(struct e1000_hw *hw) -{ - e1000_init_function_pointers_82575(hw); - hw->nvm.ops.init_params = e1000_init_nvm_params_i210; - - return; -} - -/** - * e1000_valid_led_default_i210 - Verify a valid default LED config - * @hw: pointer to the HW structure - * @data: pointer to the NVM (EEPROM) - * - * Read the EEPROM for the current default LED configuration. If the - * LED configuration is not valid, set to a valid LED configuration. - **/ -static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data) -{ - s32 ret_val; - - DEBUGFUNC("e1000_valid_led_default_i210"); - - ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - goto out; - } - - if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { - switch (hw->phy.media_type) { - case e1000_media_type_internal_serdes: - *data = ID_LED_DEFAULT_I210_SERDES; - break; - case e1000_media_type_copper: - default: - *data = ID_LED_DEFAULT_I210; - break; - } - } -out: - return ret_val; -} - -/** - * __e1000_access_xmdio_reg - Read/write XMDIO register - * @hw: pointer to the HW structure - * @address: XMDIO address to program - * @dev_addr: device address to program - * @data: pointer to value to read/write from/to the XMDIO address - * @read: boolean flag to indicate read or write - **/ -static s32 __e1000_access_xmdio_reg(struct e1000_hw *hw, u16 address, - u8 dev_addr, u16 *data, bool read) -{ - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("__e1000_access_xmdio_reg"); - - ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr); - if (ret_val) - return ret_val; - - ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address); - if (ret_val) - return ret_val; - - ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA | - dev_addr); - if (ret_val) - return ret_val; - - if (read) - ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data); - else - ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data); - if (ret_val) - return ret_val; - - /* Recalibrate the device back to 0 */ - ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0); - if (ret_val) - return ret_val; - - return ret_val; -} - -/** - * e1000_read_xmdio_reg - Read XMDIO register - * @hw: pointer to the HW structure - * @addr: XMDIO address to program - * @dev_addr: device address to program - * @data: value to be read from the EMI address - **/ -s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data) -{ - DEBUGFUNC("e1000_read_xmdio_reg"); - - return __e1000_access_xmdio_reg(hw, addr, dev_addr, data, true); -} - -/** - * e1000_write_xmdio_reg - Write XMDIO register - * @hw: pointer to the HW structure - * @addr: XMDIO address to program - * @dev_addr: device address to program - * @data: value to be written to the XMDIO address - **/ -s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data) -{ - DEBUGFUNC("e1000_read_xmdio_reg"); - - return __e1000_access_xmdio_reg(hw, addr, dev_addr, &data, false); -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h deleted file mode 100644 index 9df7c203..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_I210_H_ -#define _E1000_I210_H_ - -bool e1000_get_flash_presence_i210(struct e1000_hw *hw); -s32 e1000_update_flash_i210(struct e1000_hw *hw); -s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw); -s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw); -s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, - u16 words, u16 *data); -s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, - u16 words, u16 *data); -s32 e1000_read_invm_version(struct e1000_hw *hw, - struct e1000_fw_version *invm_ver); -s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask); -void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask); -s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, - u16 *data); -s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, - u16 data); - -#define E1000_STM_OPCODE 0xDB00 -#define E1000_EEPROM_FLASH_SIZE_WORD 0x11 - -#define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \ - (u8)((invm_dword) & 0x7) -#define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \ - (u8)(((invm_dword) & 0x0000FE00) >> 9) -#define INVM_DWORD_TO_WORD_DATA(invm_dword) \ - (u16)(((invm_dword) & 0xFFFF0000) >> 16) - -enum E1000_INVM_STRUCTURE_TYPE { - E1000_INVM_UNINITIALIZED_STRUCTURE = 0x00, - E1000_INVM_WORD_AUTOLOAD_STRUCTURE = 0x01, - E1000_INVM_CSR_AUTOLOAD_STRUCTURE = 0x02, - E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE = 0x03, - E1000_INVM_RSA_KEY_SHA256_STRUCTURE = 0x04, - E1000_INVM_INVALIDATED_STRUCTURE = 0x0F, -}; - -#define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS 8 -#define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS 1 -#define E1000_INVM_ULT_BYTES_SIZE 8 -#define E1000_INVM_RECORD_SIZE_IN_BYTES 4 -#define E1000_INVM_VER_FIELD_ONE 0x1FF8 -#define E1000_INVM_VER_FIELD_TWO 0x7FE000 -#define E1000_INVM_IMGTYPE_FIELD 0x1F800000 - -#define E1000_INVM_MAJOR_MASK 0x3F0 -#define E1000_INVM_MINOR_MASK 0xF -#define E1000_INVM_MAJOR_SHIFT 4 - -#define ID_LED_DEFAULT_I210 ((ID_LED_OFF1_ON2 << 8) | \ - (ID_LED_DEF1_DEF2 << 4) | \ - (ID_LED_OFF1_OFF2)) -#define ID_LED_DEFAULT_I210_SERDES ((ID_LED_DEF1_DEF2 << 8) | \ - (ID_LED_DEF1_DEF2 << 4) | \ - (ID_LED_OFF1_ON2)) - -/* NVM offset defaults for I211 devices */ -#define NVM_INIT_CTRL_2_DEFAULT_I211 0X7243 -#define NVM_INIT_CTRL_4_DEFAULT_I211 0x00C1 -#define NVM_LED_1_CFG_DEFAULT_I211 0x0184 -#define NVM_LED_0_2_CFG_DEFAULT_I211 0x200C -#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c deleted file mode 100644 index 13a42267..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c +++ /dev/null @@ -1,2081 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "e1000_api.h" - -static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw); -static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw); -static void e1000_config_collision_dist_generic(struct e1000_hw *hw); -static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index); - -/** - * e1000_init_mac_ops_generic - Initialize MAC function pointers - * @hw: pointer to the HW structure - * - * Setups up the function pointers to no-op functions - **/ -void e1000_init_mac_ops_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - DEBUGFUNC("e1000_init_mac_ops_generic"); - - /* General Setup */ - mac->ops.init_params = e1000_null_ops_generic; - mac->ops.init_hw = e1000_null_ops_generic; - mac->ops.reset_hw = e1000_null_ops_generic; - mac->ops.setup_physical_interface = e1000_null_ops_generic; - mac->ops.get_bus_info = e1000_null_ops_generic; - mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie; - mac->ops.read_mac_addr = e1000_read_mac_addr_generic; - mac->ops.config_collision_dist = e1000_config_collision_dist_generic; - mac->ops.clear_hw_cntrs = e1000_null_mac_generic; - /* LED */ - mac->ops.cleanup_led = e1000_null_ops_generic; - mac->ops.setup_led = e1000_null_ops_generic; - mac->ops.blink_led = e1000_null_ops_generic; - mac->ops.led_on = e1000_null_ops_generic; - mac->ops.led_off = e1000_null_ops_generic; - /* LINK */ - mac->ops.setup_link = e1000_null_ops_generic; - mac->ops.get_link_up_info = e1000_null_link_info; - mac->ops.check_for_link = e1000_null_ops_generic; - /* Management */ - mac->ops.check_mng_mode = e1000_null_mng_mode; - /* VLAN, MC, etc. */ - mac->ops.update_mc_addr_list = e1000_null_update_mc; - mac->ops.clear_vfta = e1000_null_mac_generic; - mac->ops.write_vfta = e1000_null_write_vfta; - mac->ops.rar_set = e1000_rar_set_generic; - mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_generic; -} - -/** - * e1000_null_ops_generic - No-op function, returns 0 - * @hw: pointer to the HW structure - **/ -s32 e1000_null_ops_generic(struct e1000_hw E1000_UNUSEDARG *hw) -{ - DEBUGFUNC("e1000_null_ops_generic"); - return E1000_SUCCESS; -} - -/** - * e1000_null_mac_generic - No-op function, return void - * @hw: pointer to the HW structure - **/ -void e1000_null_mac_generic(struct e1000_hw E1000_UNUSEDARG *hw) -{ - DEBUGFUNC("e1000_null_mac_generic"); - return; -} - -/** - * e1000_null_link_info - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -s32 e1000_null_link_info(struct e1000_hw E1000_UNUSEDARG *hw, - u16 E1000_UNUSEDARG *s, u16 E1000_UNUSEDARG *d) -{ - DEBUGFUNC("e1000_null_link_info"); - return E1000_SUCCESS; -} - -/** - * e1000_null_mng_mode - No-op function, return false - * @hw: pointer to the HW structure - **/ -bool e1000_null_mng_mode(struct e1000_hw E1000_UNUSEDARG *hw) -{ - DEBUGFUNC("e1000_null_mng_mode"); - return false; -} - -/** - * e1000_null_update_mc - No-op function, return void - * @hw: pointer to the HW structure - **/ -void e1000_null_update_mc(struct e1000_hw E1000_UNUSEDARG *hw, - u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) -{ - DEBUGFUNC("e1000_null_update_mc"); - return; -} - -/** - * e1000_null_write_vfta - No-op function, return void - * @hw: pointer to the HW structure - **/ -void e1000_null_write_vfta(struct e1000_hw E1000_UNUSEDARG *hw, - u32 E1000_UNUSEDARG a, u32 E1000_UNUSEDARG b) -{ - DEBUGFUNC("e1000_null_write_vfta"); - return; -} - -/** - * e1000_null_rar_set - No-op function, return void - * @hw: pointer to the HW structure - **/ -void e1000_null_rar_set(struct e1000_hw E1000_UNUSEDARG *hw, - u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) -{ - DEBUGFUNC("e1000_null_rar_set"); - return; -} - -/** - * e1000_get_bus_info_pcie_generic - Get PCIe bus information - * @hw: pointer to the HW structure - * - * Determines and stores the system bus information for a particular - * network interface. The following bus information is determined and stored: - * bus speed, bus width, type (PCIe), and PCIe function. - **/ -s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - struct e1000_bus_info *bus = &hw->bus; - s32 ret_val; - u16 pcie_link_status; - - DEBUGFUNC("e1000_get_bus_info_pcie_generic"); - - bus->type = e1000_bus_type_pci_express; - - ret_val = e1000_read_pcie_cap_reg(hw, PCIE_LINK_STATUS, - &pcie_link_status); - if (ret_val) { - bus->width = e1000_bus_width_unknown; - bus->speed = e1000_bus_speed_unknown; - } else { - switch (pcie_link_status & PCIE_LINK_SPEED_MASK) { - case PCIE_LINK_SPEED_2500: - bus->speed = e1000_bus_speed_2500; - break; - case PCIE_LINK_SPEED_5000: - bus->speed = e1000_bus_speed_5000; - break; - default: - bus->speed = e1000_bus_speed_unknown; - break; - } - - bus->width = (enum e1000_bus_width)((pcie_link_status & - PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT); - } - - mac->ops.set_lan_id(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices - * - * @hw: pointer to the HW structure - * - * Determines the LAN function id by reading memory-mapped registers - * and swaps the port value if requested. - **/ -static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw) -{ - struct e1000_bus_info *bus = &hw->bus; - u32 reg; - - /* The status register reports the correct function number - * for the device regardless of function swap state. - */ - reg = E1000_READ_REG(hw, E1000_STATUS); - bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; -} - -/** - * e1000_set_lan_id_single_port - Set LAN id for a single port device - * @hw: pointer to the HW structure - * - * Sets the LAN function id to zero for a single port device. - **/ -void e1000_set_lan_id_single_port(struct e1000_hw *hw) -{ - struct e1000_bus_info *bus = &hw->bus; - - bus->func = 0; -} - -/** - * e1000_clear_vfta_generic - Clear VLAN filter table - * @hw: pointer to the HW structure - * - * Clears the register array which contains the VLAN filter table by - * setting all the values to 0. - **/ -void e1000_clear_vfta_generic(struct e1000_hw *hw) -{ - u32 offset; - - DEBUGFUNC("e1000_clear_vfta_generic"); - - for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { - E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); - E1000_WRITE_FLUSH(hw); - } -} - -/** - * e1000_write_vfta_generic - Write value to VLAN filter table - * @hw: pointer to the HW structure - * @offset: register offset in VLAN filter table - * @value: register value written to VLAN filter table - * - * Writes value at the given offset in the register array which stores - * the VLAN filter table. - **/ -void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value) -{ - DEBUGFUNC("e1000_write_vfta_generic"); - - E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); - E1000_WRITE_FLUSH(hw); -} - -/** - * e1000_init_rx_addrs_generic - Initialize receive address's - * @hw: pointer to the HW structure - * @rar_count: receive address registers - * - * Setup the receive address registers by setting the base receive address - * register to the devices MAC address and clearing all the other receive - * address registers to 0. - **/ -void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count) -{ - u32 i; - u8 mac_addr[ETH_ADDR_LEN] = {0}; - - DEBUGFUNC("e1000_init_rx_addrs_generic"); - - /* Setup the receive address */ - DEBUGOUT("Programming MAC Address into RAR[0]\n"); - - hw->mac.ops.rar_set(hw, hw->mac.addr, 0); - - /* Zero out the other (rar_entry_count - 1) receive addresses */ - DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1); - for (i = 1; i < rar_count; i++) - hw->mac.ops.rar_set(hw, mac_addr, i); -} - -/** - * e1000_check_alt_mac_addr_generic - Check for alternate MAC addr - * @hw: pointer to the HW structure - * - * Checks the nvm for an alternate MAC address. An alternate MAC address - * can be setup by pre-boot software and must be treated like a permanent - * address and must override the actual permanent MAC address. If an - * alternate MAC address is found it is programmed into RAR0, replacing - * the permanent address that was installed into RAR0 by the Si on reset. - * This function will return SUCCESS unless it encounters an error while - * reading the EEPROM. - **/ -s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw) -{ - u32 i; - s32 ret_val; - u16 offset, nvm_alt_mac_addr_offset, nvm_data; - u8 alt_mac_addr[ETH_ADDR_LEN]; - - DEBUGFUNC("e1000_check_alt_mac_addr_generic"); - - ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &nvm_data); - if (ret_val) - return ret_val; - - - /* Alternate MAC address is handled by the option ROM for 82580 - * and newer. SW support not required. - */ - if (hw->mac.type >= e1000_82580) - return E1000_SUCCESS; - - ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1, - &nvm_alt_mac_addr_offset); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - if ((nvm_alt_mac_addr_offset == 0xFFFF) || - (nvm_alt_mac_addr_offset == 0x0000)) - /* There is no Alternate MAC Address */ - return E1000_SUCCESS; - - if (hw->bus.func == E1000_FUNC_1) - nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1; - if (hw->bus.func == E1000_FUNC_2) - nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2; - - if (hw->bus.func == E1000_FUNC_3) - nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3; - for (i = 0; i < ETH_ADDR_LEN; i += 2) { - offset = nvm_alt_mac_addr_offset + (i >> 1); - ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - alt_mac_addr[i] = (u8)(nvm_data & 0xFF); - alt_mac_addr[i + 1] = (u8)(nvm_data >> 8); - } - - /* if multicast bit is set, the alternate address will not be used */ - if (alt_mac_addr[0] & 0x01) { - DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n"); - return E1000_SUCCESS; - } - - /* We have a valid alternate MAC address, and we want to treat it the - * same as the normal permanent MAC address stored by the HW into the - * RAR. Do this by mapping this address into RAR0. - */ - hw->mac.ops.rar_set(hw, alt_mac_addr, 0); - - return E1000_SUCCESS; -} - -/** - * e1000_rar_set_generic - Set receive address register - * @hw: pointer to the HW structure - * @addr: pointer to the receive address - * @index: receive address array register - * - * Sets the receive address array register at index to the address passed - * in by addr. - **/ -static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index) -{ - u32 rar_low, rar_high; - - DEBUGFUNC("e1000_rar_set_generic"); - - /* HW expects these in little endian so we reverse the byte order - * from network order (big endian) to little endian - */ - rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | - ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); - - rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); - - /* If MAC address zero, no need to set the AV bit */ - if (rar_low || rar_high) - rar_high |= E1000_RAH_AV; - - /* Some bridges will combine consecutive 32-bit writes into - * a single burst write, which will malfunction on some parts. - * The flushes avoid this. - */ - E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); - E1000_WRITE_FLUSH(hw); - E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); - E1000_WRITE_FLUSH(hw); -} - -/** - * e1000_hash_mc_addr_generic - Generate a multicast hash value - * @hw: pointer to the HW structure - * @mc_addr: pointer to a multicast address - * - * Generates a multicast address hash value which is used to determine - * the multicast filter table array address and new table value. - **/ -u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr) -{ - u32 hash_value, hash_mask; - u8 bit_shift = 0; - - DEBUGFUNC("e1000_hash_mc_addr_generic"); - - /* Register count multiplied by bits per register */ - hash_mask = (hw->mac.mta_reg_count * 32) - 1; - - /* For a mc_filter_type of 0, bit_shift is the number of left-shifts - * where 0xFF would still fall within the hash mask. - */ - while (hash_mask >> bit_shift != 0xFF) - bit_shift++; - - /* The portion of the address that is used for the hash table - * is determined by the mc_filter_type setting. - * The algorithm is such that there is a total of 8 bits of shifting. - * The bit_shift for a mc_filter_type of 0 represents the number of - * left-shifts where the MSB of mc_addr[5] would still fall within - * the hash_mask. Case 0 does this exactly. Since there are a total - * of 8 bits of shifting, then mc_addr[4] will shift right the - * remaining number of bits. Thus 8 - bit_shift. The rest of the - * cases are a variation of this algorithm...essentially raising the - * number of bits to shift mc_addr[5] left, while still keeping the - * 8-bit shifting total. - * - * For example, given the following Destination MAC Address and an - * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask), - * we can see that the bit_shift for case 0 is 4. These are the hash - * values resulting from each mc_filter_type... - * [0] [1] [2] [3] [4] [5] - * 01 AA 00 12 34 56 - * LSB MSB - * - * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563 - * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6 - * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163 - * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634 - */ - switch (hw->mac.mc_filter_type) { - default: - case 0: - break; - case 1: - bit_shift += 1; - break; - case 2: - bit_shift += 2; - break; - case 3: - bit_shift += 4; - break; - } - - hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) | - (((u16) mc_addr[5]) << bit_shift))); - - return hash_value; -} - -/** - * e1000_update_mc_addr_list_generic - Update Multicast addresses - * @hw: pointer to the HW structure - * @mc_addr_list: array of multicast addresses to program - * @mc_addr_count: number of multicast addresses to program - * - * Updates entire Multicast Table Array. - * The caller must have a packed mc_addr_list of multicast addresses. - **/ -void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count) -{ - u32 hash_value, hash_bit, hash_reg; - int i; - - DEBUGFUNC("e1000_update_mc_addr_list_generic"); - - /* clear mta_shadow */ - memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); - - /* update mta_shadow from mc_addr_list */ - for (i = 0; (u32) i < mc_addr_count; i++) { - hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list); - - hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); - hash_bit = hash_value & 0x1F; - - hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); - mc_addr_list += (ETH_ADDR_LEN); - } - - /* replace the entire MTA table */ - for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) - E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]); - E1000_WRITE_FLUSH(hw); -} - -/** - * e1000_clear_hw_cntrs_base_generic - Clear base hardware counters - * @hw: pointer to the HW structure - * - * Clears the base hardware counters by reading the counter registers. - **/ -void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_clear_hw_cntrs_base_generic"); - - E1000_READ_REG(hw, E1000_CRCERRS); - E1000_READ_REG(hw, E1000_SYMERRS); - E1000_READ_REG(hw, E1000_MPC); - E1000_READ_REG(hw, E1000_SCC); - E1000_READ_REG(hw, E1000_ECOL); - E1000_READ_REG(hw, E1000_MCC); - E1000_READ_REG(hw, E1000_LATECOL); - E1000_READ_REG(hw, E1000_COLC); - E1000_READ_REG(hw, E1000_DC); - E1000_READ_REG(hw, E1000_SEC); - E1000_READ_REG(hw, E1000_RLEC); - E1000_READ_REG(hw, E1000_XONRXC); - E1000_READ_REG(hw, E1000_XONTXC); - E1000_READ_REG(hw, E1000_XOFFRXC); - E1000_READ_REG(hw, E1000_XOFFTXC); - E1000_READ_REG(hw, E1000_FCRUC); - E1000_READ_REG(hw, E1000_GPRC); - E1000_READ_REG(hw, E1000_BPRC); - E1000_READ_REG(hw, E1000_MPRC); - E1000_READ_REG(hw, E1000_GPTC); - E1000_READ_REG(hw, E1000_GORCL); - E1000_READ_REG(hw, E1000_GORCH); - E1000_READ_REG(hw, E1000_GOTCL); - E1000_READ_REG(hw, E1000_GOTCH); - E1000_READ_REG(hw, E1000_RNBC); - E1000_READ_REG(hw, E1000_RUC); - E1000_READ_REG(hw, E1000_RFC); - E1000_READ_REG(hw, E1000_ROC); - E1000_READ_REG(hw, E1000_RJC); - E1000_READ_REG(hw, E1000_TORL); - E1000_READ_REG(hw, E1000_TORH); - E1000_READ_REG(hw, E1000_TOTL); - E1000_READ_REG(hw, E1000_TOTH); - E1000_READ_REG(hw, E1000_TPR); - E1000_READ_REG(hw, E1000_TPT); - E1000_READ_REG(hw, E1000_MPTC); - E1000_READ_REG(hw, E1000_BPTC); -} - -/** - * e1000_check_for_copper_link_generic - Check for link (Copper) - * @hw: pointer to the HW structure - * - * Checks to see of the link status of the hardware has changed. If a - * change in link status has been detected, then we read the PHY registers - * to get the current speed/duplex if link exists. - **/ -s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - s32 ret_val; - bool link; - - DEBUGFUNC("e1000_check_for_copper_link"); - - /* We only want to go out to the PHY registers to see if Auto-Neg - * has completed and/or if our link status has changed. The - * get_link_status flag is set upon receiving a Link Status - * Change or Rx Sequence Error interrupt. - */ - if (!mac->get_link_status) - return E1000_SUCCESS; - - /* First we want to see if the MII Status Register reports - * link. If so, then we want to get the current speed/duplex - * of the PHY. - */ - ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); - if (ret_val) - return ret_val; - - if (!link) - return E1000_SUCCESS; /* No link detected */ - - mac->get_link_status = false; - - /* Check if there was DownShift, must be checked - * immediately after link-up - */ - e1000_check_downshift_generic(hw); - - /* If we are forcing speed/duplex, then we simply return since - * we have already determined whether we have link or not. - */ - if (!mac->autoneg) - return -E1000_ERR_CONFIG; - - /* Auto-Neg is enabled. Auto Speed Detection takes care - * of MAC speed/duplex configuration. So we only need to - * configure Collision Distance in the MAC. - */ - mac->ops.config_collision_dist(hw); - - /* Configure Flow Control now that Auto-Neg has completed. - * First, we need to restore the desired flow control - * settings because we may have had to re-autoneg with a - * different link partner. - */ - ret_val = e1000_config_fc_after_link_up_generic(hw); - if (ret_val) - DEBUGOUT("Error configuring flow control\n"); - - return ret_val; -} - -/** - * e1000_check_for_fiber_link_generic - Check for link (Fiber) - * @hw: pointer to the HW structure - * - * Checks for link up on the hardware. If link is not up and we have - * a signal, then we need to force link up. - **/ -s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - u32 rxcw; - u32 ctrl; - u32 status; - s32 ret_val; - - DEBUGFUNC("e1000_check_for_fiber_link_generic"); - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - status = E1000_READ_REG(hw, E1000_STATUS); - rxcw = E1000_READ_REG(hw, E1000_RXCW); - - /* If we don't have link (auto-negotiation failed or link partner - * cannot auto-negotiate), the cable is plugged in (we have signal), - * and our link partner is not trying to auto-negotiate with us (we - * are receiving idles or data), we need to force link up. We also - * need to give auto-negotiation time to complete, in case the cable - * was just plugged in. The autoneg_failed flag does this. - */ - /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ - if ((ctrl & E1000_CTRL_SWDPIN1) && !(status & E1000_STATUS_LU) && - !(rxcw & E1000_RXCW_C)) { - if (!mac->autoneg_failed) { - mac->autoneg_failed = true; - return E1000_SUCCESS; - } - DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); - - /* Disable auto-negotiation in the TXCW register */ - E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); - - /* Force link-up and also force full-duplex. */ - ctrl = E1000_READ_REG(hw, E1000_CTRL); - ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - - /* Configure Flow Control after forcing link up. */ - ret_val = e1000_config_fc_after_link_up_generic(hw); - if (ret_val) { - DEBUGOUT("Error configuring flow control\n"); - return ret_val; - } - } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { - /* If we are forcing link and we are receiving /C/ ordered - * sets, re-enable auto-negotiation in the TXCW register - * and disable forced link in the Device Control register - * in an attempt to auto-negotiate with our link partner. - */ - DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); - E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); - E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); - - mac->serdes_has_link = true; - } - - return E1000_SUCCESS; -} - -/** - * e1000_check_for_serdes_link_generic - Check for link (Serdes) - * @hw: pointer to the HW structure - * - * Checks for link up on the hardware. If link is not up and we have - * a signal, then we need to force link up. - **/ -s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - u32 rxcw; - u32 ctrl; - u32 status; - s32 ret_val; - - DEBUGFUNC("e1000_check_for_serdes_link_generic"); - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - status = E1000_READ_REG(hw, E1000_STATUS); - rxcw = E1000_READ_REG(hw, E1000_RXCW); - - /* If we don't have link (auto-negotiation failed or link partner - * cannot auto-negotiate), and our link partner is not trying to - * auto-negotiate with us (we are receiving idles or data), - * we need to force link up. We also need to give auto-negotiation - * time to complete. - */ - /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ - if (!(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) { - if (!mac->autoneg_failed) { - mac->autoneg_failed = true; - return E1000_SUCCESS; - } - DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); - - /* Disable auto-negotiation in the TXCW register */ - E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); - - /* Force link-up and also force full-duplex. */ - ctrl = E1000_READ_REG(hw, E1000_CTRL); - ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - - /* Configure Flow Control after forcing link up. */ - ret_val = e1000_config_fc_after_link_up_generic(hw); - if (ret_val) { - DEBUGOUT("Error configuring flow control\n"); - return ret_val; - } - } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { - /* If we are forcing link and we are receiving /C/ ordered - * sets, re-enable auto-negotiation in the TXCW register - * and disable forced link in the Device Control register - * in an attempt to auto-negotiate with our link partner. - */ - DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); - E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); - E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); - - mac->serdes_has_link = true; - } else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) { - /* If we force link for non-auto-negotiation switch, check - * link status based on MAC synchronization for internal - * serdes media type. - */ - /* SYNCH bit and IV bit are sticky. */ - usec_delay(10); - rxcw = E1000_READ_REG(hw, E1000_RXCW); - if (rxcw & E1000_RXCW_SYNCH) { - if (!(rxcw & E1000_RXCW_IV)) { - mac->serdes_has_link = true; - DEBUGOUT("SERDES: Link up - forced.\n"); - } - } else { - mac->serdes_has_link = false; - DEBUGOUT("SERDES: Link down - force failed.\n"); - } - } - - if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) { - status = E1000_READ_REG(hw, E1000_STATUS); - if (status & E1000_STATUS_LU) { - /* SYNCH bit and IV bit are sticky, so reread rxcw. */ - usec_delay(10); - rxcw = E1000_READ_REG(hw, E1000_RXCW); - if (rxcw & E1000_RXCW_SYNCH) { - if (!(rxcw & E1000_RXCW_IV)) { - mac->serdes_has_link = true; - DEBUGOUT("SERDES: Link up - autoneg completed successfully.\n"); - } else { - mac->serdes_has_link = false; - DEBUGOUT("SERDES: Link down - invalid codewords detected in autoneg.\n"); - } - } else { - mac->serdes_has_link = false; - DEBUGOUT("SERDES: Link down - no sync.\n"); - } - } else { - mac->serdes_has_link = false; - DEBUGOUT("SERDES: Link down - autoneg failed\n"); - } - } - - return E1000_SUCCESS; -} - -/** - * e1000_set_default_fc_generic - Set flow control default values - * @hw: pointer to the HW structure - * - * Read the EEPROM for the default values for flow control and store the - * values. - **/ -static s32 e1000_set_default_fc_generic(struct e1000_hw *hw) -{ - s32 ret_val; - u16 nvm_data; - - DEBUGFUNC("e1000_set_default_fc_generic"); - - /* Read and store word 0x0F of the EEPROM. This word contains bits - * that determine the hardware's default PAUSE (flow control) mode, - * a bit that determines whether the HW defaults to enabling or - * disabling auto-negotiation, and the direction of the - * SW defined pins. If there is no SW over-ride of the flow - * control setting, then the variable hw->fc will - * be initialized based on a value in the EEPROM. - */ - ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data); - - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - if (!(nvm_data & NVM_WORD0F_PAUSE_MASK)) - hw->fc.requested_mode = e1000_fc_none; - else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == - NVM_WORD0F_ASM_DIR) - hw->fc.requested_mode = e1000_fc_tx_pause; - else - hw->fc.requested_mode = e1000_fc_full; - - return E1000_SUCCESS; -} - -/** - * e1000_setup_link_generic - Setup flow control and link settings - * @hw: pointer to the HW structure - * - * Determines which flow control settings to use, then configures flow - * control. Calls the appropriate media-specific link configuration - * function. Assuming the adapter has a valid link partner, a valid link - * should be established. Assumes the hardware has previously been reset - * and the transmitter and receiver are not enabled. - **/ -s32 e1000_setup_link_generic(struct e1000_hw *hw) -{ - s32 ret_val; - - DEBUGFUNC("e1000_setup_link_generic"); - - /* In the case of the phy reset being blocked, we already have a link. - * We do not need to set it up again. - */ - if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) - return E1000_SUCCESS; - - /* If requested flow control is set to default, set flow control - * based on the EEPROM flow control settings. - */ - if (hw->fc.requested_mode == e1000_fc_default) { - ret_val = e1000_set_default_fc_generic(hw); - if (ret_val) - return ret_val; - } - - /* Save off the requested flow control mode for use later. Depending - * on the link partner's capabilities, we may or may not use this mode. - */ - hw->fc.current_mode = hw->fc.requested_mode; - - DEBUGOUT1("After fix-ups FlowControl is now = %x\n", - hw->fc.current_mode); - - /* Call the necessary media_type subroutine to configure the link. */ - ret_val = hw->mac.ops.setup_physical_interface(hw); - if (ret_val) - return ret_val; - - /* Initialize the flow control address, type, and PAUSE timer - * registers to their default values. This is done even if flow - * control is disabled, because it does not hurt anything to - * initialize these registers. - */ - DEBUGOUT("Initializing the Flow Control address, type and timer regs\n"); - E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE); - E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH); - E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW); - - E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); - - return e1000_set_fc_watermarks_generic(hw); -} - -/** - * e1000_commit_fc_settings_generic - Configure flow control - * @hw: pointer to the HW structure - * - * Write the flow control settings to the Transmit Config Word Register (TXCW) - * base on the flow control settings in e1000_mac_info. - **/ -static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - u32 txcw; - - DEBUGFUNC("e1000_commit_fc_settings_generic"); - - /* Check for a software override of the flow control settings, and - * setup the device accordingly. If auto-negotiation is enabled, then - * software will have to set the "PAUSE" bits to the correct value in - * the Transmit Config Word Register (TXCW) and re-start auto- - * negotiation. However, if auto-negotiation is disabled, then - * software will have to manually configure the two flow control enable - * bits in the CTRL register. - * - * The possible values of the "fc" parameter are: - * 0: Flow control is completely disabled - * 1: Rx flow control is enabled (we can receive pause frames, - * but not send pause frames). - * 2: Tx flow control is enabled (we can send pause frames but we - * do not support receiving pause frames). - * 3: Both Rx and Tx flow control (symmetric) are enabled. - */ - switch (hw->fc.current_mode) { - case e1000_fc_none: - /* Flow control completely disabled by a software over-ride. */ - txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); - break; - case e1000_fc_rx_pause: - /* Rx Flow control is enabled and Tx Flow control is disabled - * by a software over-ride. Since there really isn't a way to - * advertise that we are capable of Rx Pause ONLY, we will - * advertise that we support both symmetric and asymmetric Rx - * PAUSE. Later, we will disable the adapter's ability to send - * PAUSE frames. - */ - txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); - break; - case e1000_fc_tx_pause: - /* Tx Flow control is enabled, and Rx Flow control is disabled, - * by a software over-ride. - */ - txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); - break; - case e1000_fc_full: - /* Flow control (both Rx and Tx) is enabled by a software - * over-ride. - */ - txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); - break; - default: - DEBUGOUT("Flow control param set incorrectly\n"); - return -E1000_ERR_CONFIG; - break; - } - - E1000_WRITE_REG(hw, E1000_TXCW, txcw); - mac->txcw = txcw; - - return E1000_SUCCESS; -} - -/** - * e1000_poll_fiber_serdes_link_generic - Poll for link up - * @hw: pointer to the HW structure - * - * Polls for link up by reading the status register, if link fails to come - * up with auto-negotiation, then the link is forced if a signal is detected. - **/ -static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - u32 i, status; - s32 ret_val; - - DEBUGFUNC("e1000_poll_fiber_serdes_link_generic"); - - /* If we have a signal (the cable is plugged in, or assumed true for - * serdes media) then poll for a "Link-Up" indication in the Device - * Status Register. Time-out if a link isn't seen in 500 milliseconds - * seconds (Auto-negotiation should complete in less than 500 - * milliseconds even if the other end is doing it in SW). - */ - for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { - msec_delay(10); - status = E1000_READ_REG(hw, E1000_STATUS); - if (status & E1000_STATUS_LU) - break; - } - if (i == FIBER_LINK_UP_LIMIT) { - DEBUGOUT("Never got a valid link from auto-neg!!!\n"); - mac->autoneg_failed = true; - /* AutoNeg failed to achieve a link, so we'll call - * mac->check_for_link. This routine will force the - * link up if we detect a signal. This will allow us to - * communicate with non-autonegotiating link partners. - */ - ret_val = mac->ops.check_for_link(hw); - if (ret_val) { - DEBUGOUT("Error while checking for link\n"); - return ret_val; - } - mac->autoneg_failed = false; - } else { - mac->autoneg_failed = false; - DEBUGOUT("Valid Link Found\n"); - } - - return E1000_SUCCESS; -} - -/** - * e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes - * @hw: pointer to the HW structure - * - * Configures collision distance and flow control for fiber and serdes - * links. Upon successful setup, poll for link. - **/ -s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw) -{ - u32 ctrl; - s32 ret_val; - - DEBUGFUNC("e1000_setup_fiber_serdes_link_generic"); - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - - /* Take the link out of reset */ - ctrl &= ~E1000_CTRL_LRST; - - hw->mac.ops.config_collision_dist(hw); - - ret_val = e1000_commit_fc_settings_generic(hw); - if (ret_val) - return ret_val; - - /* Since auto-negotiation is enabled, take the link out of reset (the - * link will be in reset, because we previously reset the chip). This - * will restart auto-negotiation. If auto-negotiation is successful - * then the link-up status bit will be set and the flow control enable - * bits (RFCE and TFCE) will be set according to their negotiated value. - */ - DEBUGOUT("Auto-negotiation enabled\n"); - - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - E1000_WRITE_FLUSH(hw); - msec_delay(1); - - /* For these adapters, the SW definable pin 1 is set when the optics - * detect a signal. If we have a signal, then poll for a "Link-Up" - * indication. - */ - if (hw->phy.media_type == e1000_media_type_internal_serdes || - (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) { - ret_val = e1000_poll_fiber_serdes_link_generic(hw); - } else { - DEBUGOUT("No signal detected\n"); - } - - return ret_val; -} - -/** - * e1000_config_collision_dist_generic - Configure collision distance - * @hw: pointer to the HW structure - * - * Configures the collision distance to the default value and is used - * during link setup. - **/ -static void e1000_config_collision_dist_generic(struct e1000_hw *hw) -{ - u32 tctl; - - DEBUGFUNC("e1000_config_collision_dist_generic"); - - tctl = E1000_READ_REG(hw, E1000_TCTL); - - tctl &= ~E1000_TCTL_COLD; - tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT; - - E1000_WRITE_REG(hw, E1000_TCTL, tctl); - E1000_WRITE_FLUSH(hw); -} - -/** - * e1000_set_fc_watermarks_generic - Set flow control high/low watermarks - * @hw: pointer to the HW structure - * - * Sets the flow control high/low threshold (watermark) registers. If - * flow control XON frame transmission is enabled, then set XON frame - * transmission as well. - **/ -s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw) -{ - u32 fcrtl = 0, fcrth = 0; - - DEBUGFUNC("e1000_set_fc_watermarks_generic"); - - /* Set the flow control receive threshold registers. Normally, - * these registers will be set to a default threshold that may be - * adjusted later by the driver's runtime code. However, if the - * ability to transmit pause frames is not enabled, then these - * registers will be set to 0. - */ - if (hw->fc.current_mode & e1000_fc_tx_pause) { - /* We need to set up the Receive Threshold high and low water - * marks as well as (optionally) enabling the transmission of - * XON frames. - */ - fcrtl = hw->fc.low_water; - if (hw->fc.send_xon) - fcrtl |= E1000_FCRTL_XONE; - - fcrth = hw->fc.high_water; - } - E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl); - E1000_WRITE_REG(hw, E1000_FCRTH, fcrth); - - return E1000_SUCCESS; -} - -/** - * e1000_force_mac_fc_generic - Force the MAC's flow control settings - * @hw: pointer to the HW structure - * - * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the - * device control register to reflect the adapter settings. TFCE and RFCE - * need to be explicitly set by software when a copper PHY is used because - * autonegotiation is managed by the PHY rather than the MAC. Software must - * also configure these bits when link is forced on a fiber connection. - **/ -s32 e1000_force_mac_fc_generic(struct e1000_hw *hw) -{ - u32 ctrl; - - DEBUGFUNC("e1000_force_mac_fc_generic"); - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - - /* Because we didn't get link via the internal auto-negotiation - * mechanism (we either forced link or we got link via PHY - * auto-neg), we have to manually enable/disable transmit an - * receive flow control. - * - * The "Case" statement below enables/disable flow control - * according to the "hw->fc.current_mode" parameter. - * - * The possible values of the "fc" parameter are: - * 0: Flow control is completely disabled - * 1: Rx flow control is enabled (we can receive pause - * frames but not send pause frames). - * 2: Tx flow control is enabled (we can send pause frames - * frames but we do not receive pause frames). - * 3: Both Rx and Tx flow control (symmetric) is enabled. - * other: No other values should be possible at this point. - */ - DEBUGOUT1("hw->fc.current_mode = %u\n", hw->fc.current_mode); - - switch (hw->fc.current_mode) { - case e1000_fc_none: - ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); - break; - case e1000_fc_rx_pause: - ctrl &= (~E1000_CTRL_TFCE); - ctrl |= E1000_CTRL_RFCE; - break; - case e1000_fc_tx_pause: - ctrl &= (~E1000_CTRL_RFCE); - ctrl |= E1000_CTRL_TFCE; - break; - case e1000_fc_full: - ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); - break; - default: - DEBUGOUT("Flow control param set incorrectly\n"); - return -E1000_ERR_CONFIG; - } - - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - - return E1000_SUCCESS; -} - -/** - * e1000_config_fc_after_link_up_generic - Configures flow control after link - * @hw: pointer to the HW structure - * - * Checks the status of auto-negotiation after link up to ensure that the - * speed and duplex were not forced. If the link needed to be forced, then - * flow control needs to be forced also. If auto-negotiation is enabled - * and did not fail, then we configure flow control based on our link - * partner. - **/ -s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - s32 ret_val = E1000_SUCCESS; - u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg; - u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; - u16 speed, duplex; - - DEBUGFUNC("e1000_config_fc_after_link_up_generic"); - - /* Check for the case where we have fiber media and auto-neg failed - * so we had to force link. In this case, we need to force the - * configuration of the MAC to match the "fc" parameter. - */ - if (mac->autoneg_failed) { - if (hw->phy.media_type == e1000_media_type_fiber || - hw->phy.media_type == e1000_media_type_internal_serdes) - ret_val = e1000_force_mac_fc_generic(hw); - } else { - if (hw->phy.media_type == e1000_media_type_copper) - ret_val = e1000_force_mac_fc_generic(hw); - } - - if (ret_val) { - DEBUGOUT("Error forcing flow control settings\n"); - return ret_val; - } - - /* Check for the case where we have copper media and auto-neg is - * enabled. In this case, we need to check and see if Auto-Neg - * has completed, and if so, how the PHY and link partner has - * flow control configured. - */ - if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) { - /* Read the MII Status Register and check to see if AutoNeg - * has completed. We read this twice because this reg has - * some "sticky" (latched) bits. - */ - ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); - if (ret_val) - return ret_val; - ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); - if (ret_val) - return ret_val; - - if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { - DEBUGOUT("Copper PHY and Auto Neg has not completed.\n"); - return ret_val; - } - - /* The AutoNeg process has completed, so we now need to - * read both the Auto Negotiation Advertisement - * Register (Address 4) and the Auto_Negotiation Base - * Page Ability Register (Address 5) to determine how - * flow control was negotiated. - */ - ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV, - &mii_nway_adv_reg); - if (ret_val) - return ret_val; - ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY, - &mii_nway_lp_ability_reg); - if (ret_val) - return ret_val; - - /* Two bits in the Auto Negotiation Advertisement Register - * (Address 4) and two bits in the Auto Negotiation Base - * Page Ability Register (Address 5) determine flow control - * for both the PHY and the link partner. The following - * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, - * 1999, describes these PAUSE resolution bits and how flow - * control is determined based upon these settings. - * NOTE: DC = Don't Care - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution - *-------|---------|-------|---------|-------------------- - * 0 | 0 | DC | DC | e1000_fc_none - * 0 | 1 | 0 | DC | e1000_fc_none - * 0 | 1 | 1 | 0 | e1000_fc_none - * 0 | 1 | 1 | 1 | e1000_fc_tx_pause - * 1 | 0 | 0 | DC | e1000_fc_none - * 1 | DC | 1 | DC | e1000_fc_full - * 1 | 1 | 0 | 0 | e1000_fc_none - * 1 | 1 | 0 | 1 | e1000_fc_rx_pause - * - * Are both PAUSE bits set to 1? If so, this implies - * Symmetric Flow Control is enabled at both ends. The - * ASM_DIR bits are irrelevant per the spec. - * - * For Symmetric Flow Control: - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result - *-------|---------|-------|---------|-------------------- - * 1 | DC | 1 | DC | E1000_fc_full - * - */ - if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && - (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { - /* Now we need to check if the user selected Rx ONLY - * of pause frames. In this case, we had to advertise - * FULL flow control because we could not advertise Rx - * ONLY. Hence, we must now check to see if we need to - * turn OFF the TRANSMISSION of PAUSE frames. - */ - if (hw->fc.requested_mode == e1000_fc_full) { - hw->fc.current_mode = e1000_fc_full; - DEBUGOUT("Flow Control = FULL.\n"); - } else { - hw->fc.current_mode = e1000_fc_rx_pause; - DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); - } - } - /* For receiving PAUSE frames ONLY. - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result - *-------|---------|-------|---------|-------------------- - * 0 | 1 | 1 | 1 | e1000_fc_tx_pause - */ - else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && - (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && - (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && - (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { - hw->fc.current_mode = e1000_fc_tx_pause; - DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); - } - /* For transmitting PAUSE frames ONLY. - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result - *-------|---------|-------|---------|-------------------- - * 1 | 1 | 0 | 1 | e1000_fc_rx_pause - */ - else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && - (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && - !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && - (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { - hw->fc.current_mode = e1000_fc_rx_pause; - DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); - } else { - /* Per the IEEE spec, at this point flow control - * should be disabled. - */ - hw->fc.current_mode = e1000_fc_none; - DEBUGOUT("Flow Control = NONE.\n"); - } - - /* Now we need to do one last check... If we auto- - * negotiated to HALF DUPLEX, flow control should not be - * enabled per IEEE 802.3 spec. - */ - ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex); - if (ret_val) { - DEBUGOUT("Error getting link speed and duplex\n"); - return ret_val; - } - - if (duplex == HALF_DUPLEX) - hw->fc.current_mode = e1000_fc_none; - - /* Now we call a subroutine to actually force the MAC - * controller to use the correct flow control settings. - */ - ret_val = e1000_force_mac_fc_generic(hw); - if (ret_val) { - DEBUGOUT("Error forcing flow control settings\n"); - return ret_val; - } - } - - /* Check for the case where we have SerDes media and auto-neg is - * enabled. In this case, we need to check and see if Auto-Neg - * has completed, and if so, how the PHY and link partner has - * flow control configured. - */ - if ((hw->phy.media_type == e1000_media_type_internal_serdes) && - mac->autoneg) { - /* Read the PCS_LSTS and check to see if AutoNeg - * has completed. - */ - pcs_status_reg = E1000_READ_REG(hw, E1000_PCS_LSTAT); - - if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) { - DEBUGOUT("PCS Auto Neg has not completed.\n"); - return ret_val; - } - - /* The AutoNeg process has completed, so we now need to - * read both the Auto Negotiation Advertisement - * Register (PCS_ANADV) and the Auto_Negotiation Base - * Page Ability Register (PCS_LPAB) to determine how - * flow control was negotiated. - */ - pcs_adv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); - pcs_lp_ability_reg = E1000_READ_REG(hw, E1000_PCS_LPAB); - - /* Two bits in the Auto Negotiation Advertisement Register - * (PCS_ANADV) and two bits in the Auto Negotiation Base - * Page Ability Register (PCS_LPAB) determine flow control - * for both the PHY and the link partner. The following - * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, - * 1999, describes these PAUSE resolution bits and how flow - * control is determined based upon these settings. - * NOTE: DC = Don't Care - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution - *-------|---------|-------|---------|-------------------- - * 0 | 0 | DC | DC | e1000_fc_none - * 0 | 1 | 0 | DC | e1000_fc_none - * 0 | 1 | 1 | 0 | e1000_fc_none - * 0 | 1 | 1 | 1 | e1000_fc_tx_pause - * 1 | 0 | 0 | DC | e1000_fc_none - * 1 | DC | 1 | DC | e1000_fc_full - * 1 | 1 | 0 | 0 | e1000_fc_none - * 1 | 1 | 0 | 1 | e1000_fc_rx_pause - * - * Are both PAUSE bits set to 1? If so, this implies - * Symmetric Flow Control is enabled at both ends. The - * ASM_DIR bits are irrelevant per the spec. - * - * For Symmetric Flow Control: - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result - *-------|---------|-------|---------|-------------------- - * 1 | DC | 1 | DC | e1000_fc_full - * - */ - if ((pcs_adv_reg & E1000_TXCW_PAUSE) && - (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) { - /* Now we need to check if the user selected Rx ONLY - * of pause frames. In this case, we had to advertise - * FULL flow control because we could not advertise Rx - * ONLY. Hence, we must now check to see if we need to - * turn OFF the TRANSMISSION of PAUSE frames. - */ - if (hw->fc.requested_mode == e1000_fc_full) { - hw->fc.current_mode = e1000_fc_full; - DEBUGOUT("Flow Control = FULL.\n"); - } else { - hw->fc.current_mode = e1000_fc_rx_pause; - DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); - } - } - /* For receiving PAUSE frames ONLY. - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result - *-------|---------|-------|---------|-------------------- - * 0 | 1 | 1 | 1 | e1000_fc_tx_pause - */ - else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) && - (pcs_adv_reg & E1000_TXCW_ASM_DIR) && - (pcs_lp_ability_reg & E1000_TXCW_PAUSE) && - (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { - hw->fc.current_mode = e1000_fc_tx_pause; - DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); - } - /* For transmitting PAUSE frames ONLY. - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result - *-------|---------|-------|---------|-------------------- - * 1 | 1 | 0 | 1 | e1000_fc_rx_pause - */ - else if ((pcs_adv_reg & E1000_TXCW_PAUSE) && - (pcs_adv_reg & E1000_TXCW_ASM_DIR) && - !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) && - (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { - hw->fc.current_mode = e1000_fc_rx_pause; - DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); - } else { - /* Per the IEEE spec, at this point flow control - * should be disabled. - */ - hw->fc.current_mode = e1000_fc_none; - DEBUGOUT("Flow Control = NONE.\n"); - } - - /* Now we call a subroutine to actually force the MAC - * controller to use the correct flow control settings. - */ - pcs_ctrl_reg = E1000_READ_REG(hw, E1000_PCS_LCTL); - pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL; - E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_ctrl_reg); - - ret_val = e1000_force_mac_fc_generic(hw); - if (ret_val) { - DEBUGOUT("Error forcing flow control settings\n"); - return ret_val; - } - } - - return E1000_SUCCESS; -} - -/** - * e1000_get_speed_and_duplex_copper_generic - Retrieve current speed/duplex - * @hw: pointer to the HW structure - * @speed: stores the current speed - * @duplex: stores the current duplex - * - * Read the status register for the current speed/duplex and store the current - * speed and duplex for copper connections. - **/ -s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, - u16 *duplex) -{ - u32 status; - - DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic"); - - status = E1000_READ_REG(hw, E1000_STATUS); - if (status & E1000_STATUS_SPEED_1000) { - *speed = SPEED_1000; - DEBUGOUT("1000 Mbs, "); - } else if (status & E1000_STATUS_SPEED_100) { - *speed = SPEED_100; - DEBUGOUT("100 Mbs, "); - } else { - *speed = SPEED_10; - DEBUGOUT("10 Mbs, "); - } - - if (status & E1000_STATUS_FD) { - *duplex = FULL_DUPLEX; - DEBUGOUT("Full Duplex\n"); - } else { - *duplex = HALF_DUPLEX; - DEBUGOUT("Half Duplex\n"); - } - - return E1000_SUCCESS; -} - -/** - * e1000_get_speed_and_duplex_fiber_generic - Retrieve current speed/duplex - * @hw: pointer to the HW structure - * @speed: stores the current speed - * @duplex: stores the current duplex - * - * Sets the speed and duplex to gigabit full duplex (the only possible option) - * for fiber/serdes links. - **/ -s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSEDARG *hw, - u16 *speed, u16 *duplex) -{ - DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic"); - - *speed = SPEED_1000; - *duplex = FULL_DUPLEX; - - return E1000_SUCCESS; -} - -/** - * e1000_get_hw_semaphore_generic - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM - **/ -s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw) -{ - u32 swsm; - s32 timeout = hw->nvm.word_size + 1; - s32 i = 0; - - DEBUGFUNC("e1000_get_hw_semaphore_generic"); - - /* Get the SW semaphore */ - while (i < timeout) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); - return -E1000_ERR_NVM; - } - - /* Get the FW semaphore. */ - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); - - /* Semaphore acquired if bit latched */ - if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) - break; - - usec_delay(50); - } - - if (i == timeout) { - /* Release semaphores */ - e1000_put_hw_semaphore_generic(hw); - DEBUGOUT("Driver can't access the NVM\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; -} - -/** - * e1000_put_hw_semaphore_generic - Release hardware semaphore - * @hw: pointer to the HW structure - * - * Release hardware semaphore used to access the PHY or NVM - **/ -void e1000_put_hw_semaphore_generic(struct e1000_hw *hw) -{ - u32 swsm; - - DEBUGFUNC("e1000_put_hw_semaphore_generic"); - - swsm = E1000_READ_REG(hw, E1000_SWSM); - - swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); - - E1000_WRITE_REG(hw, E1000_SWSM, swsm); -} - -/** - * e1000_get_auto_rd_done_generic - Check for auto read completion - * @hw: pointer to the HW structure - * - * Check EEPROM for Auto Read done bit. - **/ -s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw) -{ - s32 i = 0; - - DEBUGFUNC("e1000_get_auto_rd_done_generic"); - - while (i < AUTO_READ_DONE_TIMEOUT) { - if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD) - break; - msec_delay(1); - i++; - } - - if (i == AUTO_READ_DONE_TIMEOUT) { - DEBUGOUT("Auto read by HW from NVM has not completed.\n"); - return -E1000_ERR_RESET; - } - - return E1000_SUCCESS; -} - -/** - * e1000_valid_led_default_generic - Verify a valid default LED config - * @hw: pointer to the HW structure - * @data: pointer to the NVM (EEPROM) - * - * Read the EEPROM for the current default LED configuration. If the - * LED configuration is not valid, set to a valid LED configuration. - **/ -s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data) -{ - s32 ret_val; - - DEBUGFUNC("e1000_valid_led_default_generic"); - - ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) - *data = ID_LED_DEFAULT; - - return E1000_SUCCESS; -} - -/** - * e1000_id_led_init_generic - - * @hw: pointer to the HW structure - * - **/ -s32 e1000_id_led_init_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - s32 ret_val; - const u32 ledctl_mask = 0x000000FF; - const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON; - const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF; - u16 data, i, temp; - const u16 led_mask = 0x0F; - - DEBUGFUNC("e1000_id_led_init_generic"); - - ret_val = hw->nvm.ops.valid_led_default(hw, &data); - if (ret_val) - return ret_val; - - mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); - mac->ledctl_mode1 = mac->ledctl_default; - mac->ledctl_mode2 = mac->ledctl_default; - - for (i = 0; i < 4; i++) { - temp = (data >> (i << 2)) & led_mask; - switch (temp) { - case ID_LED_ON1_DEF2: - case ID_LED_ON1_ON2: - case ID_LED_ON1_OFF2: - mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); - mac->ledctl_mode1 |= ledctl_on << (i << 3); - break; - case ID_LED_OFF1_DEF2: - case ID_LED_OFF1_ON2: - case ID_LED_OFF1_OFF2: - mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); - mac->ledctl_mode1 |= ledctl_off << (i << 3); - break; - default: - /* Do nothing */ - break; - } - switch (temp) { - case ID_LED_DEF1_ON2: - case ID_LED_ON1_ON2: - case ID_LED_OFF1_ON2: - mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); - mac->ledctl_mode2 |= ledctl_on << (i << 3); - break; - case ID_LED_DEF1_OFF2: - case ID_LED_ON1_OFF2: - case ID_LED_OFF1_OFF2: - mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); - mac->ledctl_mode2 |= ledctl_off << (i << 3); - break; - default: - /* Do nothing */ - break; - } - } - - return E1000_SUCCESS; -} - -/** - * e1000_setup_led_generic - Configures SW controllable LED - * @hw: pointer to the HW structure - * - * This prepares the SW controllable LED for use and saves the current state - * of the LED so it can be later restored. - **/ -s32 e1000_setup_led_generic(struct e1000_hw *hw) -{ - u32 ledctl; - - DEBUGFUNC("e1000_setup_led_generic"); - - if (hw->mac.ops.setup_led != e1000_setup_led_generic) - return -E1000_ERR_CONFIG; - - if (hw->phy.media_type == e1000_media_type_fiber) { - ledctl = E1000_READ_REG(hw, E1000_LEDCTL); - hw->mac.ledctl_default = ledctl; - /* Turn off LED0 */ - ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK | - E1000_LEDCTL_LED0_MODE_MASK); - ledctl |= (E1000_LEDCTL_MODE_LED_OFF << - E1000_LEDCTL_LED0_MODE_SHIFT); - E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); - } else if (hw->phy.media_type == e1000_media_type_copper) { - E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); - } - - return E1000_SUCCESS; -} - -/** - * e1000_cleanup_led_generic - Set LED config to default operation - * @hw: pointer to the HW structure - * - * Remove the current LED configuration and set the LED configuration - * to the default value, saved from the EEPROM. - **/ -s32 e1000_cleanup_led_generic(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_cleanup_led_generic"); - - E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); - return E1000_SUCCESS; -} - -/** - * e1000_blink_led_generic - Blink LED - * @hw: pointer to the HW structure - * - * Blink the LEDs which are set to be on. - **/ -s32 e1000_blink_led_generic(struct e1000_hw *hw) -{ - u32 ledctl_blink = 0; - u32 i; - - DEBUGFUNC("e1000_blink_led_generic"); - - if (hw->phy.media_type == e1000_media_type_fiber) { - /* always blink LED0 for PCI-E fiber */ - ledctl_blink = E1000_LEDCTL_LED0_BLINK | - (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); - } else { - /* Set the blink bit for each LED that's "on" (0x0E) - * (or "off" if inverted) in ledctl_mode2. The blink - * logic in hardware only works when mode is set to "on" - * so it must be changed accordingly when the mode is - * "off" and inverted. - */ - ledctl_blink = hw->mac.ledctl_mode2; - for (i = 0; i < 32; i += 8) { - u32 mode = (hw->mac.ledctl_mode2 >> i) & - E1000_LEDCTL_LED0_MODE_MASK; - u32 led_default = hw->mac.ledctl_default >> i; - - if ((!(led_default & E1000_LEDCTL_LED0_IVRT) && - (mode == E1000_LEDCTL_MODE_LED_ON)) || - ((led_default & E1000_LEDCTL_LED0_IVRT) && - (mode == E1000_LEDCTL_MODE_LED_OFF))) { - ledctl_blink &= - ~(E1000_LEDCTL_LED0_MODE_MASK << i); - ledctl_blink |= (E1000_LEDCTL_LED0_BLINK | - E1000_LEDCTL_MODE_LED_ON) << i; - } - } - } - - E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink); - - return E1000_SUCCESS; -} - -/** - * e1000_led_on_generic - Turn LED on - * @hw: pointer to the HW structure - * - * Turn LED on. - **/ -s32 e1000_led_on_generic(struct e1000_hw *hw) -{ - u32 ctrl; - - DEBUGFUNC("e1000_led_on_generic"); - - switch (hw->phy.media_type) { - case e1000_media_type_fiber: - ctrl = E1000_READ_REG(hw, E1000_CTRL); - ctrl &= ~E1000_CTRL_SWDPIN0; - ctrl |= E1000_CTRL_SWDPIO0; - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - break; - case e1000_media_type_copper: - E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2); - break; - default: - break; - } - - return E1000_SUCCESS; -} - -/** - * e1000_led_off_generic - Turn LED off - * @hw: pointer to the HW structure - * - * Turn LED off. - **/ -s32 e1000_led_off_generic(struct e1000_hw *hw) -{ - u32 ctrl; - - DEBUGFUNC("e1000_led_off_generic"); - - switch (hw->phy.media_type) { - case e1000_media_type_fiber: - ctrl = E1000_READ_REG(hw, E1000_CTRL); - ctrl |= E1000_CTRL_SWDPIN0; - ctrl |= E1000_CTRL_SWDPIO0; - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - break; - case e1000_media_type_copper: - E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); - break; - default: - break; - } - - return E1000_SUCCESS; -} - -/** - * e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities - * @hw: pointer to the HW structure - * @no_snoop: bitmap of snoop events - * - * Set the PCI-express register to snoop for events enabled in 'no_snoop'. - **/ -void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop) -{ - u32 gcr; - - DEBUGFUNC("e1000_set_pcie_no_snoop_generic"); - - if (no_snoop) { - gcr = E1000_READ_REG(hw, E1000_GCR); - gcr &= ~(PCIE_NO_SNOOP_ALL); - gcr |= no_snoop; - E1000_WRITE_REG(hw, E1000_GCR, gcr); - } -} - -/** - * e1000_disable_pcie_master_generic - Disables PCI-express master access - * @hw: pointer to the HW structure - * - * Returns E1000_SUCCESS if successful, else returns -10 - * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused - * the master requests to be disabled. - * - * Disables PCI-Express master access and verifies there are no pending - * requests. - **/ -s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw) -{ - u32 ctrl; - s32 timeout = MASTER_DISABLE_TIMEOUT; - - DEBUGFUNC("e1000_disable_pcie_master_generic"); - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - ctrl |= E1000_CTRL_GIO_MASTER_DISABLE; - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - - while (timeout) { - if (!(E1000_READ_REG(hw, E1000_STATUS) & - E1000_STATUS_GIO_MASTER_ENABLE)) - break; - usec_delay(100); - timeout--; - } - - if (!timeout) { - DEBUGOUT("Master requests are pending.\n"); - return -E1000_ERR_MASTER_REQUESTS_PENDING; - } - - return E1000_SUCCESS; -} - -/** - * e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing - * @hw: pointer to the HW structure - * - * Reset the Adaptive Interframe Spacing throttle to default values. - **/ -void e1000_reset_adaptive_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - - DEBUGFUNC("e1000_reset_adaptive_generic"); - - if (!mac->adaptive_ifs) { - DEBUGOUT("Not in Adaptive IFS mode!\n"); - return; - } - - mac->current_ifs_val = 0; - mac->ifs_min_val = IFS_MIN; - mac->ifs_max_val = IFS_MAX; - mac->ifs_step_size = IFS_STEP; - mac->ifs_ratio = IFS_RATIO; - - mac->in_ifs_mode = false; - E1000_WRITE_REG(hw, E1000_AIT, 0); -} - -/** - * e1000_update_adaptive_generic - Update Adaptive Interframe Spacing - * @hw: pointer to the HW structure - * - * Update the Adaptive Interframe Spacing Throttle value based on the - * time between transmitted packets and time between collisions. - **/ -void e1000_update_adaptive_generic(struct e1000_hw *hw) -{ - struct e1000_mac_info *mac = &hw->mac; - - DEBUGFUNC("e1000_update_adaptive_generic"); - - if (!mac->adaptive_ifs) { - DEBUGOUT("Not in Adaptive IFS mode!\n"); - return; - } - - if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) { - if (mac->tx_packet_delta > MIN_NUM_XMITS) { - mac->in_ifs_mode = true; - if (mac->current_ifs_val < mac->ifs_max_val) { - if (!mac->current_ifs_val) - mac->current_ifs_val = mac->ifs_min_val; - else - mac->current_ifs_val += - mac->ifs_step_size; - E1000_WRITE_REG(hw, E1000_AIT, - mac->current_ifs_val); - } - } - } else { - if (mac->in_ifs_mode && - (mac->tx_packet_delta <= MIN_NUM_XMITS)) { - mac->current_ifs_val = 0; - mac->in_ifs_mode = false; - E1000_WRITE_REG(hw, E1000_AIT, 0); - } - } -} - -/** - * e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings - * @hw: pointer to the HW structure - * - * Verify that when not using auto-negotiation that MDI/MDIx is correctly - * set, which is forced to MDI mode only. - **/ -static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_validate_mdi_setting_generic"); - - if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) { - DEBUGOUT("Invalid MDI setting detected\n"); - hw->phy.mdix = 1; - return -E1000_ERR_CONFIG; - } - - return E1000_SUCCESS; -} - -/** - * e1000_validate_mdi_setting_crossover_generic - Verify MDI/MDIx settings - * @hw: pointer to the HW structure - * - * Validate the MDI/MDIx setting, allowing for auto-crossover during forced - * operation. - **/ -s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw E1000_UNUSEDARG *hw) -{ - DEBUGFUNC("e1000_validate_mdi_setting_crossover_generic"); - - return E1000_SUCCESS; -} - -/** - * e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register - * @hw: pointer to the HW structure - * @reg: 32bit register offset such as E1000_SCTL - * @offset: register offset to write to - * @data: data to write at register offset - * - * Writes an address/data control type register. There are several of these - * and they all have the format address << 8 | data and bit 31 is polled for - * completion. - **/ -s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, - u32 offset, u8 data) -{ - u32 i, regvalue = 0; - - DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic"); - - /* Set up the address and data */ - regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT); - E1000_WRITE_REG(hw, reg, regvalue); - - /* Poll the ready bit to see if the MDI read completed */ - for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) { - usec_delay(5); - regvalue = E1000_READ_REG(hw, reg); - if (regvalue & E1000_GEN_CTL_READY) - break; - } - if (!(regvalue & E1000_GEN_CTL_READY)) { - DEBUGOUT1("Reg %08x did not indicate ready\n", reg); - return -E1000_ERR_PHY; - } - - return E1000_SUCCESS; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h deleted file mode 100644 index a3e78498..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h +++ /dev/null @@ -1,65 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_MAC_H_ -#define _E1000_MAC_H_ - -void e1000_init_mac_ops_generic(struct e1000_hw *hw); -void e1000_null_mac_generic(struct e1000_hw *hw); -s32 e1000_null_ops_generic(struct e1000_hw *hw); -s32 e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d); -bool e1000_null_mng_mode(struct e1000_hw *hw); -void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a); -void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b); -void e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a); -s32 e1000_blink_led_generic(struct e1000_hw *hw); -s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw); -s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw); -s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw); -s32 e1000_cleanup_led_generic(struct e1000_hw *hw); -s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw); -s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw); -s32 e1000_force_mac_fc_generic(struct e1000_hw *hw); -s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw); -s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw); -void e1000_set_lan_id_single_port(struct e1000_hw *hw); -s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw); -s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, - u16 *duplex); -s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw, - u16 *speed, u16 *duplex); -s32 e1000_id_led_init_generic(struct e1000_hw *hw); -s32 e1000_led_on_generic(struct e1000_hw *hw); -s32 e1000_led_off_generic(struct e1000_hw *hw); -void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count); -s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw); -s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw); -s32 e1000_setup_led_generic(struct e1000_hw *hw); -s32 e1000_setup_link_generic(struct e1000_hw *hw); -s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw *hw); -s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, - u32 offset, u8 data); - -u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr); - -void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw); -void e1000_clear_vfta_generic(struct e1000_hw *hw); -void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count); -void e1000_put_hw_semaphore_generic(struct e1000_hw *hw); -s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw); -void e1000_reset_adaptive_generic(struct e1000_hw *hw); -void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop); -void e1000_update_adaptive_generic(struct e1000_hw *hw); -void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value); - -#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c deleted file mode 100644 index 2f75bc35..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c +++ /dev/null @@ -1,539 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "e1000_api.h" - -/** - * e1000_calculate_checksum - Calculate checksum for buffer - * @buffer: pointer to EEPROM - * @length: size of EEPROM to calculate a checksum for - * - * Calculates the checksum for some buffer on a specified length. The - * checksum calculated is returned. - **/ -u8 e1000_calculate_checksum(u8 *buffer, u32 length) -{ - u32 i; - u8 sum = 0; - - DEBUGFUNC("e1000_calculate_checksum"); - - if (!buffer) - return 0; - - for (i = 0; i < length; i++) - sum += buffer[i]; - - return (u8) (0 - sum); -} - -/** - * e1000_mng_enable_host_if_generic - Checks host interface is enabled - * @hw: pointer to the HW structure - * - * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND - * - * This function checks whether the HOST IF is enabled for command operation - * and also checks whether the previous command is completed. It busy waits - * in case of previous command is not completed. - **/ -s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw) -{ - u32 hicr; - u8 i; - - DEBUGFUNC("e1000_mng_enable_host_if_generic"); - - if (!hw->mac.arc_subsystem_valid) { - DEBUGOUT("ARC subsystem not valid.\n"); - return -E1000_ERR_HOST_INTERFACE_COMMAND; - } - - /* Check that the host interface is enabled. */ - hicr = E1000_READ_REG(hw, E1000_HICR); - if (!(hicr & E1000_HICR_EN)) { - DEBUGOUT("E1000_HOST_EN bit disabled.\n"); - return -E1000_ERR_HOST_INTERFACE_COMMAND; - } - /* check the previous command is completed */ - for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) { - hicr = E1000_READ_REG(hw, E1000_HICR); - if (!(hicr & E1000_HICR_C)) - break; - msec_delay_irq(1); - } - - if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) { - DEBUGOUT("Previous command timeout failed .\n"); - return -E1000_ERR_HOST_INTERFACE_COMMAND; - } - - return E1000_SUCCESS; -} - -/** - * e1000_check_mng_mode_generic - Generic check management mode - * @hw: pointer to the HW structure - * - * Reads the firmware semaphore register and returns true (>0) if - * manageability is enabled, else false (0). - **/ -bool e1000_check_mng_mode_generic(struct e1000_hw *hw) -{ - u32 fwsm = E1000_READ_REG(hw, E1000_FWSM); - - DEBUGFUNC("e1000_check_mng_mode_generic"); - - - return (fwsm & E1000_FWSM_MODE_MASK) == - (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT); -} - -/** - * e1000_enable_tx_pkt_filtering_generic - Enable packet filtering on Tx - * @hw: pointer to the HW structure - * - * Enables packet filtering on transmit packets if manageability is enabled - * and host interface is enabled. - **/ -bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw) -{ - struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie; - u32 *buffer = (u32 *)&hw->mng_cookie; - u32 offset; - s32 ret_val, hdr_csum, csum; - u8 i, len; - - DEBUGFUNC("e1000_enable_tx_pkt_filtering_generic"); - - hw->mac.tx_pkt_filtering = true; - - /* No manageability, no filtering */ - if (!hw->mac.ops.check_mng_mode(hw)) { - hw->mac.tx_pkt_filtering = false; - return hw->mac.tx_pkt_filtering; - } - - /* If we can't read from the host interface for whatever - * reason, disable filtering. - */ - ret_val = e1000_mng_enable_host_if_generic(hw); - if (ret_val != E1000_SUCCESS) { - hw->mac.tx_pkt_filtering = false; - return hw->mac.tx_pkt_filtering; - } - - /* Read in the header. Length and offset are in dwords. */ - len = E1000_MNG_DHCP_COOKIE_LENGTH >> 2; - offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2; - for (i = 0; i < len; i++) - *(buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, - offset + i); - hdr_csum = hdr->checksum; - hdr->checksum = 0; - csum = e1000_calculate_checksum((u8 *)hdr, - E1000_MNG_DHCP_COOKIE_LENGTH); - /* If either the checksums or signature don't match, then - * the cookie area isn't considered valid, in which case we - * take the safe route of assuming Tx filtering is enabled. - */ - if ((hdr_csum != csum) || (hdr->signature != E1000_IAMT_SIGNATURE)) { - hw->mac.tx_pkt_filtering = true; - return hw->mac.tx_pkt_filtering; - } - - /* Cookie area is valid, make the final check for filtering. */ - if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING)) - hw->mac.tx_pkt_filtering = false; - - return hw->mac.tx_pkt_filtering; -} - -/** - * e1000_mng_write_cmd_header_generic - Writes manageability command header - * @hw: pointer to the HW structure - * @hdr: pointer to the host interface command header - * - * Writes the command header after does the checksum calculation. - **/ -s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw, - struct e1000_host_mng_command_header *hdr) -{ - u16 i, length = sizeof(struct e1000_host_mng_command_header); - - DEBUGFUNC("e1000_mng_write_cmd_header_generic"); - - /* Write the whole command header structure with new checksum. */ - - hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length); - - length >>= 2; - /* Write the relevant command block into the ram area. */ - for (i = 0; i < length; i++) { - E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i, - *((u32 *) hdr + i)); - E1000_WRITE_FLUSH(hw); - } - - return E1000_SUCCESS; -} - -/** - * e1000_mng_host_if_write_generic - Write to the manageability host interface - * @hw: pointer to the HW structure - * @buffer: pointer to the host interface buffer - * @length: size of the buffer - * @offset: location in the buffer to write to - * @sum: sum of the data (not checksum) - * - * This function writes the buffer content at the offset given on the host if. - * It also does alignment considerations to do the writes in most efficient - * way. Also fills up the sum of the buffer in *buffer parameter. - **/ -s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer, - u16 length, u16 offset, u8 *sum) -{ - u8 *tmp; - u8 *bufptr = buffer; - u32 data = 0; - u16 remaining, i, j, prev_bytes; - - DEBUGFUNC("e1000_mng_host_if_write_generic"); - - /* sum = only sum of the data and it is not checksum */ - - if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH) - return -E1000_ERR_PARAM; - - tmp = (u8 *)&data; - prev_bytes = offset & 0x3; - offset >>= 2; - - if (prev_bytes) { - data = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset); - for (j = prev_bytes; j < sizeof(u32); j++) { - *(tmp + j) = *bufptr++; - *sum += *(tmp + j); - } - E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset, data); - length -= j - prev_bytes; - offset++; - } - - remaining = length & 0x3; - length -= remaining; - - /* Calculate length in DWORDs */ - length >>= 2; - - /* The device driver writes the relevant command block into the - * ram area. - */ - for (i = 0; i < length; i++) { - for (j = 0; j < sizeof(u32); j++) { - *(tmp + j) = *bufptr++; - *sum += *(tmp + j); - } - - E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, - data); - } - if (remaining) { - for (j = 0; j < sizeof(u32); j++) { - if (j < remaining) - *(tmp + j) = *bufptr++; - else - *(tmp + j) = 0; - - *sum += *(tmp + j); - } - E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, - data); - } - - return E1000_SUCCESS; -} - -/** - * e1000_mng_write_dhcp_info_generic - Writes DHCP info to host interface - * @hw: pointer to the HW structure - * @buffer: pointer to the host interface - * @length: size of the buffer - * - * Writes the DHCP information to the host interface. - **/ -s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, u8 *buffer, - u16 length) -{ - struct e1000_host_mng_command_header hdr; - s32 ret_val; - u32 hicr; - - DEBUGFUNC("e1000_mng_write_dhcp_info_generic"); - - hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD; - hdr.command_length = length; - hdr.reserved1 = 0; - hdr.reserved2 = 0; - hdr.checksum = 0; - - /* Enable the host interface */ - ret_val = e1000_mng_enable_host_if_generic(hw); - if (ret_val) - return ret_val; - - /* Populate the host interface with the contents of "buffer". */ - ret_val = e1000_mng_host_if_write_generic(hw, buffer, length, - sizeof(hdr), &(hdr.checksum)); - if (ret_val) - return ret_val; - - /* Write the manageability command header */ - ret_val = e1000_mng_write_cmd_header_generic(hw, &hdr); - if (ret_val) - return ret_val; - - /* Tell the ARC a new command is pending. */ - hicr = E1000_READ_REG(hw, E1000_HICR); - E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); - - return E1000_SUCCESS; -} - -/** - * e1000_enable_mng_pass_thru - Check if management passthrough is needed - * @hw: pointer to the HW structure - * - * Verifies the hardware needs to leave interface enabled so that frames can - * be directed to and from the management interface. - **/ -bool e1000_enable_mng_pass_thru(struct e1000_hw *hw) -{ - u32 manc; - u32 fwsm, factps; - - DEBUGFUNC("e1000_enable_mng_pass_thru"); - - if (!hw->mac.asf_firmware_present) - return false; - - manc = E1000_READ_REG(hw, E1000_MANC); - - if (!(manc & E1000_MANC_RCV_TCO_EN)) - return false; - - if (hw->mac.has_fwsm) { - fwsm = E1000_READ_REG(hw, E1000_FWSM); - factps = E1000_READ_REG(hw, E1000_FACTPS); - - if (!(factps & E1000_FACTPS_MNGCG) && - ((fwsm & E1000_FWSM_MODE_MASK) == - (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) - return true; - } else if ((manc & E1000_MANC_SMBUS_EN) && - !(manc & E1000_MANC_ASF_EN)) { - return true; - } - - return false; -} - -/** - * e1000_host_interface_command - Writes buffer to host interface - * @hw: pointer to the HW structure - * @buffer: contains a command to write - * @length: the byte length of the buffer, must be multiple of 4 bytes - * - * Writes a buffer to the Host Interface. Upon success, returns E1000_SUCCESS - * else returns E1000_ERR_HOST_INTERFACE_COMMAND. - **/ -s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length) -{ - u32 hicr, i; - - DEBUGFUNC("e1000_host_interface_command"); - - if (!(hw->mac.arc_subsystem_valid)) { - DEBUGOUT("Hardware doesn't support host interface command.\n"); - return E1000_SUCCESS; - } - - if (!hw->mac.asf_firmware_present) { - DEBUGOUT("Firmware is not present.\n"); - return E1000_SUCCESS; - } - - if (length == 0 || length & 0x3 || - length > E1000_HI_MAX_BLOCK_BYTE_LENGTH) { - DEBUGOUT("Buffer length failure.\n"); - return -E1000_ERR_HOST_INTERFACE_COMMAND; - } - - /* Check that the host interface is enabled. */ - hicr = E1000_READ_REG(hw, E1000_HICR); - if (!(hicr & E1000_HICR_EN)) { - DEBUGOUT("E1000_HOST_EN bit disabled.\n"); - return -E1000_ERR_HOST_INTERFACE_COMMAND; - } - - /* Calculate length in DWORDs */ - length >>= 2; - - /* The device driver writes the relevant command block - * into the ram area. - */ - for (i = 0; i < length; i++) - E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i, - *((u32 *)buffer + i)); - - /* Setting this bit tells the ARC that a new command is pending. */ - E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); - - for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) { - hicr = E1000_READ_REG(hw, E1000_HICR); - if (!(hicr & E1000_HICR_C)) - break; - msec_delay(1); - } - - /* Check command successful completion. */ - if (i == E1000_HI_COMMAND_TIMEOUT || - (!(E1000_READ_REG(hw, E1000_HICR) & E1000_HICR_SV))) { - DEBUGOUT("Command has failed with no status valid.\n"); - return -E1000_ERR_HOST_INTERFACE_COMMAND; - } - - for (i = 0; i < length; i++) - *((u32 *)buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, - E1000_HOST_IF, - i); - - return E1000_SUCCESS; -} -/** - * e1000_load_firmware - Writes proxy FW code buffer to host interface - * and execute. - * @hw: pointer to the HW structure - * @buffer: contains a firmware to write - * @length: the byte length of the buffer, must be multiple of 4 bytes - * - * Upon success returns E1000_SUCCESS, returns E1000_ERR_CONFIG if not enabled - * in HW else returns E1000_ERR_HOST_INTERFACE_COMMAND. - **/ -s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length) -{ - u32 hicr, hibba, fwsm, icr, i; - - DEBUGFUNC("e1000_load_firmware"); - - if (hw->mac.type < e1000_i210) { - DEBUGOUT("Hardware doesn't support loading FW by the driver\n"); - return -E1000_ERR_CONFIG; - } - - /* Check that the host interface is enabled. */ - hicr = E1000_READ_REG(hw, E1000_HICR); - if (!(hicr & E1000_HICR_EN)) { - DEBUGOUT("E1000_HOST_EN bit disabled.\n"); - return -E1000_ERR_CONFIG; - } - if (!(hicr & E1000_HICR_MEMORY_BASE_EN)) { - DEBUGOUT("E1000_HICR_MEMORY_BASE_EN bit disabled.\n"); - return -E1000_ERR_CONFIG; - } - - if (length == 0 || length & 0x3 || length > E1000_HI_FW_MAX_LENGTH) { - DEBUGOUT("Buffer length failure.\n"); - return -E1000_ERR_INVALID_ARGUMENT; - } - - /* Clear notification from ROM-FW by reading ICR register */ - icr = E1000_READ_REG(hw, E1000_ICR_V2); - - /* Reset ROM-FW */ - hicr = E1000_READ_REG(hw, E1000_HICR); - hicr |= E1000_HICR_FW_RESET_ENABLE; - E1000_WRITE_REG(hw, E1000_HICR, hicr); - hicr |= E1000_HICR_FW_RESET; - E1000_WRITE_REG(hw, E1000_HICR, hicr); - E1000_WRITE_FLUSH(hw); - - /* Wait till MAC notifies about its readiness after ROM-FW reset */ - for (i = 0; i < (E1000_HI_COMMAND_TIMEOUT * 2); i++) { - icr = E1000_READ_REG(hw, E1000_ICR_V2); - if (icr & E1000_ICR_MNG) - break; - msec_delay(1); - } - - /* Check for timeout */ - if (i == E1000_HI_COMMAND_TIMEOUT) { - DEBUGOUT("FW reset failed.\n"); - return -E1000_ERR_HOST_INTERFACE_COMMAND; - } - - /* Wait till MAC is ready to accept new FW code */ - for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) { - fwsm = E1000_READ_REG(hw, E1000_FWSM); - if ((fwsm & E1000_FWSM_FW_VALID) && - ((fwsm & E1000_FWSM_MODE_MASK) >> E1000_FWSM_MODE_SHIFT == - E1000_FWSM_HI_EN_ONLY_MODE)) - break; - msec_delay(1); - } - - /* Check for timeout */ - if (i == E1000_HI_COMMAND_TIMEOUT) { - DEBUGOUT("FW reset failed.\n"); - return -E1000_ERR_HOST_INTERFACE_COMMAND; - } - - /* Calculate length in DWORDs */ - length >>= 2; - - /* The device driver writes the relevant FW code block - * into the ram area in DWORDs via 1kB ram addressing window. - */ - for (i = 0; i < length; i++) { - if (!(i % E1000_HI_FW_BLOCK_DWORD_LENGTH)) { - /* Point to correct 1kB ram window */ - hibba = E1000_HI_FW_BASE_ADDRESS + - ((E1000_HI_FW_BLOCK_DWORD_LENGTH << 2) * - (i / E1000_HI_FW_BLOCK_DWORD_LENGTH)); - - E1000_WRITE_REG(hw, E1000_HIBBA, hibba); - } - - E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, - i % E1000_HI_FW_BLOCK_DWORD_LENGTH, - *((u32 *)buffer + i)); - } - - /* Setting this bit tells the ARC that a new FW is ready to execute. */ - hicr = E1000_READ_REG(hw, E1000_HICR); - E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); - - for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) { - hicr = E1000_READ_REG(hw, E1000_HICR); - if (!(hicr & E1000_HICR_C)) - break; - msec_delay(1); - } - - /* Check for successful FW start. */ - if (i == E1000_HI_COMMAND_TIMEOUT) { - DEBUGOUT("New FW did not start within timeout period.\n"); - return -E1000_ERR_HOST_INTERFACE_COMMAND; - } - - return E1000_SUCCESS; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h deleted file mode 100644 index 9f27b934..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_MANAGE_H_ -#define _E1000_MANAGE_H_ - -bool e1000_check_mng_mode_generic(struct e1000_hw *hw); -bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw); -s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw); -s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer, - u16 length, u16 offset, u8 *sum); -s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw, - struct e1000_host_mng_command_header *hdr); -s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, - u8 *buffer, u16 length); -bool e1000_enable_mng_pass_thru(struct e1000_hw *hw); -u8 e1000_calculate_checksum(u8 *buffer, u32 length); -s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length); -s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length); - -enum e1000_mng_mode { - e1000_mng_mode_none = 0, - e1000_mng_mode_asf, - e1000_mng_mode_pt, - e1000_mng_mode_ipmi, - e1000_mng_mode_host_if_only -}; - -#define E1000_FACTPS_MNGCG 0x20000000 - -#define E1000_FWSM_MODE_MASK 0xE -#define E1000_FWSM_MODE_SHIFT 1 -#define E1000_FWSM_FW_VALID 0x00008000 -#define E1000_FWSM_HI_EN_ONLY_MODE 0x4 - -#define E1000_MNG_IAMT_MODE 0x3 -#define E1000_MNG_DHCP_COOKIE_LENGTH 0x10 -#define E1000_MNG_DHCP_COOKIE_OFFSET 0x6F0 -#define E1000_MNG_DHCP_COMMAND_TIMEOUT 10 -#define E1000_MNG_DHCP_TX_PAYLOAD_CMD 64 -#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING 0x1 -#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2 - -#define E1000_VFTA_ENTRY_SHIFT 5 -#define E1000_VFTA_ENTRY_MASK 0x7F -#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F - -#define E1000_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Num of bytes in range */ -#define E1000_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Num of dwords in range */ -#define E1000_HI_COMMAND_TIMEOUT 500 /* Process HI cmd limit */ -#define E1000_HI_FW_BASE_ADDRESS 0x10000 -#define E1000_HI_FW_MAX_LENGTH (64 * 1024) /* Num of bytes */ -#define E1000_HI_FW_BLOCK_DWORD_LENGTH 256 /* Num of DWORDs per page */ -#define E1000_HICR_MEMORY_BASE_EN 0x200 /* MB Enable bit - RO */ -#define E1000_HICR_EN 0x01 /* Enable bit - RO */ -/* Driver sets this bit when done to put command in RAM */ -#define E1000_HICR_C 0x02 -#define E1000_HICR_SV 0x04 /* Status Validity */ -#define E1000_HICR_FW_RESET_ENABLE 0x40 -#define E1000_HICR_FW_RESET 0x80 - -/* Intel(R) Active Management Technology signature */ -#define E1000_IAMT_SIGNATURE 0x544D4149 - -#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c deleted file mode 100644 index 1be44349..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c +++ /dev/null @@ -1,510 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "e1000_mbx.h" - -/** - * e1000_null_mbx_check_for_flag - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -static s32 e1000_null_mbx_check_for_flag(struct e1000_hw E1000_UNUSEDARG *hw, - u16 E1000_UNUSEDARG mbx_id) -{ - DEBUGFUNC("e1000_null_mbx_check_flag"); - - return E1000_SUCCESS; -} - -/** - * e1000_null_mbx_transact - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -static s32 e1000_null_mbx_transact(struct e1000_hw E1000_UNUSEDARG *hw, - u32 E1000_UNUSEDARG *msg, - u16 E1000_UNUSEDARG size, - u16 E1000_UNUSEDARG mbx_id) -{ - DEBUGFUNC("e1000_null_mbx_rw_msg"); - - return E1000_SUCCESS; -} - -/** - * e1000_read_mbx - Reads a message from the mailbox - * @hw: pointer to the HW structure - * @msg: The message buffer - * @size: Length of buffer - * @mbx_id: id of mailbox to read - * - * returns SUCCESS if it successfully read message from buffer - **/ -s32 e1000_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - s32 ret_val = -E1000_ERR_MBX; - - DEBUGFUNC("e1000_read_mbx"); - - /* limit read to size of mailbox */ - if (size > mbx->size) - size = mbx->size; - - if (mbx->ops.read) - ret_val = mbx->ops.read(hw, msg, size, mbx_id); - - return ret_val; -} - -/** - * e1000_write_mbx - Write a message to the mailbox - * @hw: pointer to the HW structure - * @msg: The message buffer - * @size: Length of buffer - * @mbx_id: id of mailbox to write - * - * returns SUCCESS if it successfully copied message into the buffer - **/ -s32 e1000_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("e1000_write_mbx"); - - if (size > mbx->size) - ret_val = -E1000_ERR_MBX; - - else if (mbx->ops.write) - ret_val = mbx->ops.write(hw, msg, size, mbx_id); - - return ret_val; -} - -/** - * e1000_check_for_msg - checks to see if someone sent us mail - * @hw: pointer to the HW structure - * @mbx_id: id of mailbox to check - * - * returns SUCCESS if the Status bit was found or else ERR_MBX - **/ -s32 e1000_check_for_msg(struct e1000_hw *hw, u16 mbx_id) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - s32 ret_val = -E1000_ERR_MBX; - - DEBUGFUNC("e1000_check_for_msg"); - - if (mbx->ops.check_for_msg) - ret_val = mbx->ops.check_for_msg(hw, mbx_id); - - return ret_val; -} - -/** - * e1000_check_for_ack - checks to see if someone sent us ACK - * @hw: pointer to the HW structure - * @mbx_id: id of mailbox to check - * - * returns SUCCESS if the Status bit was found or else ERR_MBX - **/ -s32 e1000_check_for_ack(struct e1000_hw *hw, u16 mbx_id) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - s32 ret_val = -E1000_ERR_MBX; - - DEBUGFUNC("e1000_check_for_ack"); - - if (mbx->ops.check_for_ack) - ret_val = mbx->ops.check_for_ack(hw, mbx_id); - - return ret_val; -} - -/** - * e1000_check_for_rst - checks to see if other side has reset - * @hw: pointer to the HW structure - * @mbx_id: id of mailbox to check - * - * returns SUCCESS if the Status bit was found or else ERR_MBX - **/ -s32 e1000_check_for_rst(struct e1000_hw *hw, u16 mbx_id) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - s32 ret_val = -E1000_ERR_MBX; - - DEBUGFUNC("e1000_check_for_rst"); - - if (mbx->ops.check_for_rst) - ret_val = mbx->ops.check_for_rst(hw, mbx_id); - - return ret_val; -} - -/** - * e1000_poll_for_msg - Wait for message notification - * @hw: pointer to the HW structure - * @mbx_id: id of mailbox to write - * - * returns SUCCESS if it successfully received a message notification - **/ -static s32 e1000_poll_for_msg(struct e1000_hw *hw, u16 mbx_id) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - int countdown = mbx->timeout; - - DEBUGFUNC("e1000_poll_for_msg"); - - if (!countdown || !mbx->ops.check_for_msg) - goto out; - - while (countdown && mbx->ops.check_for_msg(hw, mbx_id)) { - countdown--; - if (!countdown) - break; - usec_delay(mbx->usec_delay); - } - - /* if we failed, all future posted messages fail until reset */ - if (!countdown) - mbx->timeout = 0; -out: - return countdown ? E1000_SUCCESS : -E1000_ERR_MBX; -} - -/** - * e1000_poll_for_ack - Wait for message acknowledgement - * @hw: pointer to the HW structure - * @mbx_id: id of mailbox to write - * - * returns SUCCESS if it successfully received a message acknowledgement - **/ -static s32 e1000_poll_for_ack(struct e1000_hw *hw, u16 mbx_id) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - int countdown = mbx->timeout; - - DEBUGFUNC("e1000_poll_for_ack"); - - if (!countdown || !mbx->ops.check_for_ack) - goto out; - - while (countdown && mbx->ops.check_for_ack(hw, mbx_id)) { - countdown--; - if (!countdown) - break; - usec_delay(mbx->usec_delay); - } - - /* if we failed, all future posted messages fail until reset */ - if (!countdown) - mbx->timeout = 0; -out: - return countdown ? E1000_SUCCESS : -E1000_ERR_MBX; -} - -/** - * e1000_read_posted_mbx - Wait for message notification and receive message - * @hw: pointer to the HW structure - * @msg: The message buffer - * @size: Length of buffer - * @mbx_id: id of mailbox to write - * - * returns SUCCESS if it successfully received a message notification and - * copied it into the receive buffer. - **/ -s32 e1000_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - s32 ret_val = -E1000_ERR_MBX; - - DEBUGFUNC("e1000_read_posted_mbx"); - - if (!mbx->ops.read) - goto out; - - ret_val = e1000_poll_for_msg(hw, mbx_id); - - /* if ack received read message, otherwise we timed out */ - if (!ret_val) - ret_val = mbx->ops.read(hw, msg, size, mbx_id); -out: - return ret_val; -} - -/** - * e1000_write_posted_mbx - Write a message to the mailbox, wait for ack - * @hw: pointer to the HW structure - * @msg: The message buffer - * @size: Length of buffer - * @mbx_id: id of mailbox to write - * - * returns SUCCESS if it successfully copied message into the buffer and - * received an ack to that message within delay * timeout period - **/ -s32 e1000_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - s32 ret_val = -E1000_ERR_MBX; - - DEBUGFUNC("e1000_write_posted_mbx"); - - /* exit if either we can't write or there isn't a defined timeout */ - if (!mbx->ops.write || !mbx->timeout) - goto out; - - /* send msg */ - ret_val = mbx->ops.write(hw, msg, size, mbx_id); - - /* if msg sent wait until we receive an ack */ - if (!ret_val) - ret_val = e1000_poll_for_ack(hw, mbx_id); -out: - return ret_val; -} - -/** - * e1000_init_mbx_ops_generic - Initialize mbx function pointers - * @hw: pointer to the HW structure - * - * Sets the function pointers to no-op functions - **/ -void e1000_init_mbx_ops_generic(struct e1000_hw *hw) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - mbx->ops.init_params = e1000_null_ops_generic; - mbx->ops.read = e1000_null_mbx_transact; - mbx->ops.write = e1000_null_mbx_transact; - mbx->ops.check_for_msg = e1000_null_mbx_check_for_flag; - mbx->ops.check_for_ack = e1000_null_mbx_check_for_flag; - mbx->ops.check_for_rst = e1000_null_mbx_check_for_flag; - mbx->ops.read_posted = e1000_read_posted_mbx; - mbx->ops.write_posted = e1000_write_posted_mbx; -} - -static s32 e1000_check_for_bit_pf(struct e1000_hw *hw, u32 mask) -{ - u32 mbvficr = E1000_READ_REG(hw, E1000_MBVFICR); - s32 ret_val = -E1000_ERR_MBX; - - if (mbvficr & mask) { - ret_val = E1000_SUCCESS; - E1000_WRITE_REG(hw, E1000_MBVFICR, mask); - } - - return ret_val; -} - -/** - * e1000_check_for_msg_pf - checks to see if the VF has sent mail - * @hw: pointer to the HW structure - * @vf_number: the VF index - * - * returns SUCCESS if the VF has set the Status bit or else ERR_MBX - **/ -static s32 e1000_check_for_msg_pf(struct e1000_hw *hw, u16 vf_number) -{ - s32 ret_val = -E1000_ERR_MBX; - - DEBUGFUNC("e1000_check_for_msg_pf"); - - if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFREQ_VF1 << vf_number)) { - ret_val = E1000_SUCCESS; - hw->mbx.stats.reqs++; - } - - return ret_val; -} - -/** - * e1000_check_for_ack_pf - checks to see if the VF has ACKed - * @hw: pointer to the HW structure - * @vf_number: the VF index - * - * returns SUCCESS if the VF has set the Status bit or else ERR_MBX - **/ -static s32 e1000_check_for_ack_pf(struct e1000_hw *hw, u16 vf_number) -{ - s32 ret_val = -E1000_ERR_MBX; - - DEBUGFUNC("e1000_check_for_ack_pf"); - - if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFACK_VF1 << vf_number)) { - ret_val = E1000_SUCCESS; - hw->mbx.stats.acks++; - } - - return ret_val; -} - -/** - * e1000_check_for_rst_pf - checks to see if the VF has reset - * @hw: pointer to the HW structure - * @vf_number: the VF index - * - * returns SUCCESS if the VF has set the Status bit or else ERR_MBX - **/ -static s32 e1000_check_for_rst_pf(struct e1000_hw *hw, u16 vf_number) -{ - u32 vflre = E1000_READ_REG(hw, E1000_VFLRE); - s32 ret_val = -E1000_ERR_MBX; - - DEBUGFUNC("e1000_check_for_rst_pf"); - - if (vflre & (1 << vf_number)) { - ret_val = E1000_SUCCESS; - E1000_WRITE_REG(hw, E1000_VFLRE, (1 << vf_number)); - hw->mbx.stats.rsts++; - } - - return ret_val; -} - -/** - * e1000_obtain_mbx_lock_pf - obtain mailbox lock - * @hw: pointer to the HW structure - * @vf_number: the VF index - * - * return SUCCESS if we obtained the mailbox lock - **/ -static s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) -{ - s32 ret_val = -E1000_ERR_MBX; - u32 p2v_mailbox; - - DEBUGFUNC("e1000_obtain_mbx_lock_pf"); - - /* Take ownership of the buffer */ - E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); - - /* reserve mailbox for vf use */ - p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number)); - if (p2v_mailbox & E1000_P2VMAILBOX_PFU) - ret_val = E1000_SUCCESS; - - return ret_val; -} - -/** - * e1000_write_mbx_pf - Places a message in the mailbox - * @hw: pointer to the HW structure - * @msg: The message buffer - * @size: Length of buffer - * @vf_number: the VF index - * - * returns SUCCESS if it successfully copied message into the buffer - **/ -static s32 e1000_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, - u16 vf_number) -{ - s32 ret_val; - u16 i; - - DEBUGFUNC("e1000_write_mbx_pf"); - - /* lock the mailbox to prevent pf/vf race condition */ - ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number); - if (ret_val) - goto out_no_write; - - /* flush msg and acks as we are overwriting the message buffer */ - e1000_check_for_msg_pf(hw, vf_number); - e1000_check_for_ack_pf(hw, vf_number); - - /* copy the caller specified message to the mailbox memory buffer */ - for (i = 0; i < size; i++) - E1000_WRITE_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i, msg[i]); - - /* Interrupt VF to tell it a message has been sent and release buffer*/ - E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_STS); - - /* update stats */ - hw->mbx.stats.msgs_tx++; - -out_no_write: - return ret_val; - -} - -/** - * e1000_read_mbx_pf - Read a message from the mailbox - * @hw: pointer to the HW structure - * @msg: The message buffer - * @size: Length of buffer - * @vf_number: the VF index - * - * This function copies a message from the mailbox buffer to the caller's - * memory buffer. The presumption is that the caller knows that there was - * a message due to a VF request so no polling for message is needed. - **/ -static s32 e1000_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, - u16 vf_number) -{ - s32 ret_val; - u16 i; - - DEBUGFUNC("e1000_read_mbx_pf"); - - /* lock the mailbox to prevent pf/vf race condition */ - ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number); - if (ret_val) - goto out_no_read; - - /* copy the message to the mailbox memory buffer */ - for (i = 0; i < size; i++) - msg[i] = E1000_READ_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i); - - /* Acknowledge the message and release buffer */ - E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK); - - /* update stats */ - hw->mbx.stats.msgs_rx++; - -out_no_read: - return ret_val; -} - -/** - * e1000_init_mbx_params_pf - set initial values for pf mailbox - * @hw: pointer to the HW structure - * - * Initializes the hw->mbx struct to correct values for pf mailbox - */ -s32 e1000_init_mbx_params_pf(struct e1000_hw *hw) -{ - struct e1000_mbx_info *mbx = &hw->mbx; - - switch (hw->mac.type) { - case e1000_82576: - case e1000_i350: - case e1000_i354: - mbx->timeout = 0; - mbx->usec_delay = 0; - - mbx->size = E1000_VFMAILBOX_SIZE; - - mbx->ops.read = e1000_read_mbx_pf; - mbx->ops.write = e1000_write_mbx_pf; - mbx->ops.read_posted = e1000_read_posted_mbx; - mbx->ops.write_posted = e1000_write_posted_mbx; - mbx->ops.check_for_msg = e1000_check_for_msg_pf; - mbx->ops.check_for_ack = e1000_check_for_ack_pf; - mbx->ops.check_for_rst = e1000_check_for_rst_pf; - - mbx->stats.msgs_tx = 0; - mbx->stats.msgs_rx = 0; - mbx->stats.reqs = 0; - mbx->stats.acks = 0; - mbx->stats.rsts = 0; - default: - return E1000_SUCCESS; - } -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h deleted file mode 100644 index 5951f18f..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_MBX_H_ -#define _E1000_MBX_H_ - -#include "e1000_api.h" - -#define E1000_P2VMAILBOX_STS 0x00000001 /* Initiate message send to VF */ -#define E1000_P2VMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ -#define E1000_P2VMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ -#define E1000_P2VMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ -#define E1000_P2VMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ - -#define E1000_MBVFICR_VFREQ_MASK 0x000000FF /* bits for VF messages */ -#define E1000_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */ -#define E1000_MBVFICR_VFACK_MASK 0x00FF0000 /* bits for VF acks */ -#define E1000_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */ - -#define E1000_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ - -/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the - * PF. The reverse is true if it is E1000_PF_*. - * Message ACK's are the value or'd with 0xF0000000 - */ -/* Msgs below or'd with this are the ACK */ -#define E1000_VT_MSGTYPE_ACK 0x80000000 -/* Msgs below or'd with this are the NACK */ -#define E1000_VT_MSGTYPE_NACK 0x40000000 -/* Indicates that VF is still clear to send requests */ -#define E1000_VT_MSGTYPE_CTS 0x20000000 -#define E1000_VT_MSGINFO_SHIFT 16 -/* bits 23:16 are used for extra info for certain messages */ -#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT) - -#define E1000_VF_RESET 0x01 /* VF requests reset */ -#define E1000_VF_SET_MAC_ADDR 0x02 /* VF requests to set MAC addr */ -#define E1000_VF_SET_MULTICAST 0x03 /* VF requests to set MC addr */ -#define E1000_VF_SET_MULTICAST_COUNT_MASK (0x1F << E1000_VT_MSGINFO_SHIFT) -#define E1000_VF_SET_MULTICAST_OVERFLOW (0x80 << E1000_VT_MSGINFO_SHIFT) -#define E1000_VF_SET_VLAN 0x04 /* VF requests to set VLAN */ -#define E1000_VF_SET_VLAN_ADD (0x01 << E1000_VT_MSGINFO_SHIFT) -#define E1000_VF_SET_LPE 0x05 /* reqs to set VMOLR.LPE */ -#define E1000_VF_SET_PROMISC 0x06 /* reqs to clear VMOLR.ROPE/MPME*/ -#define E1000_VF_SET_PROMISC_UNICAST (0x01 << E1000_VT_MSGINFO_SHIFT) -#define E1000_VF_SET_PROMISC_MULTICAST (0x02 << E1000_VT_MSGINFO_SHIFT) - -#define E1000_PF_CONTROL_MSG 0x0100 /* PF control message */ - -#define E1000_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */ -#define E1000_VF_MBX_INIT_DELAY 500 /* microseconds between retries */ - -s32 e1000_read_mbx(struct e1000_hw *, u32 *, u16, u16); -s32 e1000_write_mbx(struct e1000_hw *, u32 *, u16, u16); -s32 e1000_read_posted_mbx(struct e1000_hw *, u32 *, u16, u16); -s32 e1000_write_posted_mbx(struct e1000_hw *, u32 *, u16, u16); -s32 e1000_check_for_msg(struct e1000_hw *, u16); -s32 e1000_check_for_ack(struct e1000_hw *, u16); -s32 e1000_check_for_rst(struct e1000_hw *, u16); -void e1000_init_mbx_ops_generic(struct e1000_hw *hw); -s32 e1000_init_mbx_params_pf(struct e1000_hw *); - -#endif /* _E1000_MBX_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c deleted file mode 100644 index 78c3fc0e..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c +++ /dev/null @@ -1,950 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "e1000_api.h" - -static void e1000_reload_nvm_generic(struct e1000_hw *hw); - -/** - * e1000_init_nvm_ops_generic - Initialize NVM function pointers - * @hw: pointer to the HW structure - * - * Setups up the function pointers to no-op functions - **/ -void e1000_init_nvm_ops_generic(struct e1000_hw *hw) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - DEBUGFUNC("e1000_init_nvm_ops_generic"); - - /* Initialize function pointers */ - nvm->ops.init_params = e1000_null_ops_generic; - nvm->ops.acquire = e1000_null_ops_generic; - nvm->ops.read = e1000_null_read_nvm; - nvm->ops.release = e1000_null_nvm_generic; - nvm->ops.reload = e1000_reload_nvm_generic; - nvm->ops.update = e1000_null_ops_generic; - nvm->ops.valid_led_default = e1000_null_led_default; - nvm->ops.validate = e1000_null_ops_generic; - nvm->ops.write = e1000_null_write_nvm; -} - -/** - * e1000_null_nvm_read - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -s32 e1000_null_read_nvm(struct e1000_hw E1000_UNUSEDARG *hw, - u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b, - u16 E1000_UNUSEDARG *c) -{ - DEBUGFUNC("e1000_null_read_nvm"); - return E1000_SUCCESS; -} - -/** - * e1000_null_nvm_generic - No-op function, return void - * @hw: pointer to the HW structure - **/ -void e1000_null_nvm_generic(struct e1000_hw E1000_UNUSEDARG *hw) -{ - DEBUGFUNC("e1000_null_nvm_generic"); - return; -} - -/** - * e1000_null_led_default - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -s32 e1000_null_led_default(struct e1000_hw E1000_UNUSEDARG *hw, - u16 E1000_UNUSEDARG *data) -{ - DEBUGFUNC("e1000_null_led_default"); - return E1000_SUCCESS; -} - -/** - * e1000_null_write_nvm - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -s32 e1000_null_write_nvm(struct e1000_hw E1000_UNUSEDARG *hw, - u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b, - u16 E1000_UNUSEDARG *c) -{ - DEBUGFUNC("e1000_null_write_nvm"); - return E1000_SUCCESS; -} - -/** - * e1000_raise_eec_clk - Raise EEPROM clock - * @hw: pointer to the HW structure - * @eecd: pointer to the EEPROM - * - * Enable/Raise the EEPROM clock bit. - **/ -static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd) -{ - *eecd = *eecd | E1000_EECD_SK; - E1000_WRITE_REG(hw, E1000_EECD, *eecd); - E1000_WRITE_FLUSH(hw); - usec_delay(hw->nvm.delay_usec); -} - -/** - * e1000_lower_eec_clk - Lower EEPROM clock - * @hw: pointer to the HW structure - * @eecd: pointer to the EEPROM - * - * Clear/Lower the EEPROM clock bit. - **/ -static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd) -{ - *eecd = *eecd & ~E1000_EECD_SK; - E1000_WRITE_REG(hw, E1000_EECD, *eecd); - E1000_WRITE_FLUSH(hw); - usec_delay(hw->nvm.delay_usec); -} - -/** - * e1000_shift_out_eec_bits - Shift data bits our to the EEPROM - * @hw: pointer to the HW structure - * @data: data to send to the EEPROM - * @count: number of bits to shift out - * - * We need to shift 'count' bits out to the EEPROM. So, the value in the - * "data" parameter will be shifted out to the EEPROM one bit at a time. - * In order to do this, "data" must be broken down into bits. - **/ -static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - u32 eecd = E1000_READ_REG(hw, E1000_EECD); - u32 mask; - - DEBUGFUNC("e1000_shift_out_eec_bits"); - - mask = 0x01 << (count - 1); - if (nvm->type == e1000_nvm_eeprom_spi) - eecd |= E1000_EECD_DO; - - do { - eecd &= ~E1000_EECD_DI; - - if (data & mask) - eecd |= E1000_EECD_DI; - - E1000_WRITE_REG(hw, E1000_EECD, eecd); - E1000_WRITE_FLUSH(hw); - - usec_delay(nvm->delay_usec); - - e1000_raise_eec_clk(hw, &eecd); - e1000_lower_eec_clk(hw, &eecd); - - mask >>= 1; - } while (mask); - - eecd &= ~E1000_EECD_DI; - E1000_WRITE_REG(hw, E1000_EECD, eecd); -} - -/** - * e1000_shift_in_eec_bits - Shift data bits in from the EEPROM - * @hw: pointer to the HW structure - * @count: number of bits to shift in - * - * In order to read a register from the EEPROM, we need to shift 'count' bits - * in from the EEPROM. Bits are "shifted in" by raising the clock input to - * the EEPROM (setting the SK bit), and then reading the value of the data out - * "DO" bit. During this "shifting in" process the data in "DI" bit should - * always be clear. - **/ -static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count) -{ - u32 eecd; - u32 i; - u16 data; - - DEBUGFUNC("e1000_shift_in_eec_bits"); - - eecd = E1000_READ_REG(hw, E1000_EECD); - - eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); - data = 0; - - for (i = 0; i < count; i++) { - data <<= 1; - e1000_raise_eec_clk(hw, &eecd); - - eecd = E1000_READ_REG(hw, E1000_EECD); - - eecd &= ~E1000_EECD_DI; - if (eecd & E1000_EECD_DO) - data |= 1; - - e1000_lower_eec_clk(hw, &eecd); - } - - return data; -} - -/** - * e1000_poll_eerd_eewr_done - Poll for EEPROM read/write completion - * @hw: pointer to the HW structure - * @ee_reg: EEPROM flag for polling - * - * Polls the EEPROM status bit for either read or write completion based - * upon the value of 'ee_reg'. - **/ -s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg) -{ - u32 attempts = 100000; - u32 i, reg = 0; - - DEBUGFUNC("e1000_poll_eerd_eewr_done"); - - for (i = 0; i < attempts; i++) { - if (ee_reg == E1000_NVM_POLL_READ) - reg = E1000_READ_REG(hw, E1000_EERD); - else - reg = E1000_READ_REG(hw, E1000_EEWR); - - if (reg & E1000_NVM_RW_REG_DONE) - return E1000_SUCCESS; - - usec_delay(5); - } - - return -E1000_ERR_NVM; -} - -/** - * e1000_acquire_nvm_generic - Generic request for access to EEPROM - * @hw: pointer to the HW structure - * - * Set the EEPROM access request bit and wait for EEPROM access grant bit. - * Return successful if access grant bit set, else clear the request for - * EEPROM access and return -E1000_ERR_NVM (-1). - **/ -s32 e1000_acquire_nvm_generic(struct e1000_hw *hw) -{ - u32 eecd = E1000_READ_REG(hw, E1000_EECD); - s32 timeout = E1000_NVM_GRANT_ATTEMPTS; - - DEBUGFUNC("e1000_acquire_nvm_generic"); - - E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_REQ); - eecd = E1000_READ_REG(hw, E1000_EECD); - - while (timeout) { - if (eecd & E1000_EECD_GNT) - break; - usec_delay(5); - eecd = E1000_READ_REG(hw, E1000_EECD); - timeout--; - } - - if (!timeout) { - eecd &= ~E1000_EECD_REQ; - E1000_WRITE_REG(hw, E1000_EECD, eecd); - DEBUGOUT("Could not acquire NVM grant\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; -} - -/** - * e1000_standby_nvm - Return EEPROM to standby state - * @hw: pointer to the HW structure - * - * Return the EEPROM to a standby state. - **/ -static void e1000_standby_nvm(struct e1000_hw *hw) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - u32 eecd = E1000_READ_REG(hw, E1000_EECD); - - DEBUGFUNC("e1000_standby_nvm"); - - if (nvm->type == e1000_nvm_eeprom_spi) { - /* Toggle CS to flush commands */ - eecd |= E1000_EECD_CS; - E1000_WRITE_REG(hw, E1000_EECD, eecd); - E1000_WRITE_FLUSH(hw); - usec_delay(nvm->delay_usec); - eecd &= ~E1000_EECD_CS; - E1000_WRITE_REG(hw, E1000_EECD, eecd); - E1000_WRITE_FLUSH(hw); - usec_delay(nvm->delay_usec); - } -} - -/** - * e1000_stop_nvm - Terminate EEPROM command - * @hw: pointer to the HW structure - * - * Terminates the current command by inverting the EEPROM's chip select pin. - **/ -static void e1000_stop_nvm(struct e1000_hw *hw) -{ - u32 eecd; - - DEBUGFUNC("e1000_stop_nvm"); - - eecd = E1000_READ_REG(hw, E1000_EECD); - if (hw->nvm.type == e1000_nvm_eeprom_spi) { - /* Pull CS high */ - eecd |= E1000_EECD_CS; - e1000_lower_eec_clk(hw, &eecd); - } -} - -/** - * e1000_release_nvm_generic - Release exclusive access to EEPROM - * @hw: pointer to the HW structure - * - * Stop any current commands to the EEPROM and clear the EEPROM request bit. - **/ -void e1000_release_nvm_generic(struct e1000_hw *hw) -{ - u32 eecd; - - DEBUGFUNC("e1000_release_nvm_generic"); - - e1000_stop_nvm(hw); - - eecd = E1000_READ_REG(hw, E1000_EECD); - eecd &= ~E1000_EECD_REQ; - E1000_WRITE_REG(hw, E1000_EECD, eecd); -} - -/** - * e1000_ready_nvm_eeprom - Prepares EEPROM for read/write - * @hw: pointer to the HW structure - * - * Setups the EEPROM for reading and writing. - **/ -static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - u32 eecd = E1000_READ_REG(hw, E1000_EECD); - u8 spi_stat_reg; - - DEBUGFUNC("e1000_ready_nvm_eeprom"); - - if (nvm->type == e1000_nvm_eeprom_spi) { - u16 timeout = NVM_MAX_RETRY_SPI; - - /* Clear SK and CS */ - eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); - E1000_WRITE_REG(hw, E1000_EECD, eecd); - E1000_WRITE_FLUSH(hw); - usec_delay(1); - - /* Read "Status Register" repeatedly until the LSB is cleared. - * The EEPROM will signal that the command has been completed - * by clearing bit 0 of the internal status register. If it's - * not cleared within 'timeout', then error out. - */ - while (timeout) { - e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI, - hw->nvm.opcode_bits); - spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8); - if (!(spi_stat_reg & NVM_STATUS_RDY_SPI)) - break; - - usec_delay(5); - e1000_standby_nvm(hw); - timeout--; - } - - if (!timeout) { - DEBUGOUT("SPI NVM Status error\n"); - return -E1000_ERR_NVM; - } - } - - return E1000_SUCCESS; -} - -/** - * e1000_read_nvm_spi - Read EEPROM's using SPI - * @hw: pointer to the HW structure - * @offset: offset of word in the EEPROM to read - * @words: number of words to read - * @data: word read from the EEPROM - * - * Reads a 16 bit word from the EEPROM. - **/ -s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - u32 i = 0; - s32 ret_val; - u16 word_in; - u8 read_opcode = NVM_READ_OPCODE_SPI; - - DEBUGFUNC("e1000_read_nvm_spi"); - - /* A check for invalid values: offset too large, too many words, - * and not enough words. - */ - if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || - (words == 0)) { - DEBUGOUT("nvm parameter(s) out of bounds\n"); - return -E1000_ERR_NVM; - } - - ret_val = nvm->ops.acquire(hw); - if (ret_val) - return ret_val; - - ret_val = e1000_ready_nvm_eeprom(hw); - if (ret_val) - goto release; - - e1000_standby_nvm(hw); - - if ((nvm->address_bits == 8) && (offset >= 128)) - read_opcode |= NVM_A8_OPCODE_SPI; - - /* Send the READ command (opcode + addr) */ - e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits); - e1000_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits); - - /* Read the data. SPI NVMs increment the address with each byte - * read and will roll over if reading beyond the end. This allows - * us to read the whole NVM from any offset - */ - for (i = 0; i < words; i++) { - word_in = e1000_shift_in_eec_bits(hw, 16); - data[i] = (word_in >> 8) | (word_in << 8); - } - -release: - nvm->ops.release(hw); - - return ret_val; -} - -/** - * e1000_read_nvm_eerd - Reads EEPROM using EERD register - * @hw: pointer to the HW structure - * @offset: offset of word in the EEPROM to read - * @words: number of words to read - * @data: word read from the EEPROM - * - * Reads a 16 bit word from the EEPROM using the EERD register. - **/ -s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - u32 i, eerd = 0; - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("e1000_read_nvm_eerd"); - - /* A check for invalid values: offset too large, too many words, - * too many words for the offset, and not enough words. - */ - if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || - (words == 0)) { - DEBUGOUT("nvm parameter(s) out of bounds\n"); - return -E1000_ERR_NVM; - } - - for (i = 0; i < words; i++) { - eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) + - E1000_NVM_RW_REG_START; - - E1000_WRITE_REG(hw, E1000_EERD, eerd); - ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ); - if (ret_val) - break; - - data[i] = (E1000_READ_REG(hw, E1000_EERD) >> - E1000_NVM_RW_REG_DATA); - } - - return ret_val; -} - -/** - * e1000_write_nvm_spi - Write to EEPROM using SPI - * @hw: pointer to the HW structure - * @offset: offset within the EEPROM to be written to - * @words: number of words to write - * @data: 16 bit word(s) to be written to the EEPROM - * - * Writes data to EEPROM at offset using SPI interface. - * - * If e1000_update_nvm_checksum is not called after this function , the - * EEPROM will most likely contain an invalid checksum. - **/ -s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - s32 ret_val = -E1000_ERR_NVM; - u16 widx = 0; - - DEBUGFUNC("e1000_write_nvm_spi"); - - /* A check for invalid values: offset too large, too many words, - * and not enough words. - */ - if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || - (words == 0)) { - DEBUGOUT("nvm parameter(s) out of bounds\n"); - return -E1000_ERR_NVM; - } - - while (widx < words) { - u8 write_opcode = NVM_WRITE_OPCODE_SPI; - - ret_val = nvm->ops.acquire(hw); - if (ret_val) - return ret_val; - - ret_val = e1000_ready_nvm_eeprom(hw); - if (ret_val) { - nvm->ops.release(hw); - return ret_val; - } - - e1000_standby_nvm(hw); - - /* Send the WRITE ENABLE command (8 bit opcode) */ - e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI, - nvm->opcode_bits); - - e1000_standby_nvm(hw); - - /* Some SPI eeproms use the 8th address bit embedded in the - * opcode - */ - if ((nvm->address_bits == 8) && (offset >= 128)) - write_opcode |= NVM_A8_OPCODE_SPI; - - /* Send the Write command (8-bit opcode + addr) */ - e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits); - e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2), - nvm->address_bits); - - /* Loop to allow for up to whole page write of eeprom */ - while (widx < words) { - u16 word_out = data[widx]; - word_out = (word_out >> 8) | (word_out << 8); - e1000_shift_out_eec_bits(hw, word_out, 16); - widx++; - - if ((((offset + widx) * 2) % nvm->page_size) == 0) { - e1000_standby_nvm(hw); - break; - } - } - msec_delay(10); - nvm->ops.release(hw); - } - - return ret_val; -} - -/** - * e1000_read_pba_string_generic - Read device part number - * @hw: pointer to the HW structure - * @pba_num: pointer to device part number - * @pba_num_size: size of part number buffer - * - * Reads the product board assembly (PBA) number from the EEPROM and stores - * the value in pba_num. - **/ -s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, - u32 pba_num_size) -{ - s32 ret_val; - u16 nvm_data; - u16 pba_ptr; - u16 offset; - u16 length; - - DEBUGFUNC("e1000_read_pba_string_generic"); - - if (pba_num == NULL) { - DEBUGOUT("PBA string buffer was null\n"); - return -E1000_ERR_INVALID_ARGUMENT; - } - - ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - /* if nvm_data is not ptr guard the PBA must be in legacy format which - * means pba_ptr is actually our second data word for the PBA number - * and we can decode it into an ascii string - */ - if (nvm_data != NVM_PBA_PTR_GUARD) { - DEBUGOUT("NVM PBA number is not stored as string\n"); - - /* make sure callers buffer is big enough to store the PBA */ - if (pba_num_size < E1000_PBANUM_LENGTH) { - DEBUGOUT("PBA string buffer too small\n"); - return E1000_ERR_NO_SPACE; - } - - /* extract hex string from data and pba_ptr */ - pba_num[0] = (nvm_data >> 12) & 0xF; - pba_num[1] = (nvm_data >> 8) & 0xF; - pba_num[2] = (nvm_data >> 4) & 0xF; - pba_num[3] = nvm_data & 0xF; - pba_num[4] = (pba_ptr >> 12) & 0xF; - pba_num[5] = (pba_ptr >> 8) & 0xF; - pba_num[6] = '-'; - pba_num[7] = 0; - pba_num[8] = (pba_ptr >> 4) & 0xF; - pba_num[9] = pba_ptr & 0xF; - - /* put a null character on the end of our string */ - pba_num[10] = '\0'; - - /* switch all the data but the '-' to hex char */ - for (offset = 0; offset < 10; offset++) { - if (pba_num[offset] < 0xA) - pba_num[offset] += '0'; - else if (pba_num[offset] < 0x10) - pba_num[offset] += 'A' - 0xA; - } - - return E1000_SUCCESS; - } - - ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - if (length == 0xFFFF || length == 0) { - DEBUGOUT("NVM PBA number section invalid length\n"); - return -E1000_ERR_NVM_PBA_SECTION; - } - /* check if pba_num buffer is big enough */ - if (pba_num_size < (((u32)length * 2) - 1)) { - DEBUGOUT("PBA string buffer too small\n"); - return -E1000_ERR_NO_SPACE; - } - - /* trim pba length from start of string */ - pba_ptr++; - length--; - - for (offset = 0; offset < length; offset++) { - ret_val = hw->nvm.ops.read(hw, pba_ptr + offset, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - pba_num[offset * 2] = (u8)(nvm_data >> 8); - pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF); - } - pba_num[offset * 2] = '\0'; - - return E1000_SUCCESS; -} - -/** - * e1000_read_pba_length_generic - Read device part number length - * @hw: pointer to the HW structure - * @pba_num_size: size of part number buffer - * - * Reads the product board assembly (PBA) number length from the EEPROM and - * stores the value in pba_num_size. - **/ -s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size) -{ - s32 ret_val; - u16 nvm_data; - u16 pba_ptr; - u16 length; - - DEBUGFUNC("e1000_read_pba_length_generic"); - - if (pba_num_size == NULL) { - DEBUGOUT("PBA buffer size was null\n"); - return -E1000_ERR_INVALID_ARGUMENT; - } - - ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - /* if data is not ptr guard the PBA must be in legacy format */ - if (nvm_data != NVM_PBA_PTR_GUARD) { - *pba_num_size = E1000_PBANUM_LENGTH; - return E1000_SUCCESS; - } - - ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - - if (length == 0xFFFF || length == 0) { - DEBUGOUT("NVM PBA number section invalid length\n"); - return -E1000_ERR_NVM_PBA_SECTION; - } - - /* Convert from length in u16 values to u8 chars, add 1 for NULL, - * and subtract 2 because length field is included in length. - */ - *pba_num_size = ((u32)length * 2) - 1; - - return E1000_SUCCESS; -} - - - - - -/** - * e1000_read_mac_addr_generic - Read device MAC address - * @hw: pointer to the HW structure - * - * Reads the device MAC address from the EEPROM and stores the value. - * Since devices with two ports use the same EEPROM, we increment the - * last bit in the MAC address for the second port. - **/ -s32 e1000_read_mac_addr_generic(struct e1000_hw *hw) -{ - u32 rar_high; - u32 rar_low; - u16 i; - - rar_high = E1000_READ_REG(hw, E1000_RAH(0)); - rar_low = E1000_READ_REG(hw, E1000_RAL(0)); - - for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++) - hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8)); - - for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++) - hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8)); - - for (i = 0; i < ETH_ADDR_LEN; i++) - hw->mac.addr[i] = hw->mac.perm_addr[i]; - - return E1000_SUCCESS; -} - -/** - * e1000_validate_nvm_checksum_generic - Validate EEPROM checksum - * @hw: pointer to the HW structure - * - * Calculates the EEPROM checksum by reading/adding each word of the EEPROM - * and then verifies that the sum of the EEPROM is equal to 0xBABA. - **/ -s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw) -{ - s32 ret_val; - u16 checksum = 0; - u16 i, nvm_data; - - DEBUGFUNC("e1000_validate_nvm_checksum_generic"); - - for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { - ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - checksum += nvm_data; - } - - if (checksum != (u16) NVM_SUM) { - DEBUGOUT("NVM Checksum Invalid\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; -} - -/** - * e1000_update_nvm_checksum_generic - Update EEPROM checksum - * @hw: pointer to the HW structure - * - * Updates the EEPROM checksum by reading/adding each word of the EEPROM - * up to the checksum. Then calculates the EEPROM checksum and writes the - * value to the EEPROM. - **/ -s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw) -{ - s32 ret_val; - u16 checksum = 0; - u16 i, nvm_data; - - DEBUGFUNC("e1000_update_nvm_checksum"); - - for (i = 0; i < NVM_CHECKSUM_REG; i++) { - ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error while updating checksum.\n"); - return ret_val; - } - checksum += nvm_data; - } - checksum = (u16) NVM_SUM - checksum; - ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum); - if (ret_val) - DEBUGOUT("NVM Write Error while updating checksum.\n"); - - return ret_val; -} - -/** - * e1000_reload_nvm_generic - Reloads EEPROM - * @hw: pointer to the HW structure - * - * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the - * extended control register. - **/ -static void e1000_reload_nvm_generic(struct e1000_hw *hw) -{ - u32 ctrl_ext; - - DEBUGFUNC("e1000_reload_nvm_generic"); - - usec_delay(10); - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - ctrl_ext |= E1000_CTRL_EXT_EE_RST; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); - E1000_WRITE_FLUSH(hw); -} - -/** - * e1000_get_fw_version - Get firmware version information - * @hw: pointer to the HW structure - * @fw_vers: pointer to output version structure - * - * unsupported/not present features return 0 in version structure - **/ -void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) -{ - u16 eeprom_verh, eeprom_verl, etrack_test, fw_version; - u8 q, hval, rem, result; - u16 comb_verh, comb_verl, comb_offset; - - memset(fw_vers, 0, sizeof(struct e1000_fw_version)); - - /* basic eeprom version numbers, bits used vary by part and by tool - * used to create the nvm images */ - /* Check which data format we have */ - hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test); - switch (hw->mac.type) { - case e1000_i211: - e1000_read_invm_version(hw, fw_vers); - return; - case e1000_82575: - case e1000_82576: - case e1000_82580: - /* Use this format, unless EETRACK ID exists, - * then use alternate format - */ - if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) { - hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); - fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) - >> NVM_MAJOR_SHIFT; - fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK) - >> NVM_MINOR_SHIFT; - fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK); - goto etrack_id; - } - break; - case e1000_i210: - if (!(e1000_get_flash_presence_i210(hw))) { - e1000_read_invm_version(hw, fw_vers); - return; - } - /* fall through */ - case e1000_i350: - case e1000_i354: - /* find combo image version */ - hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset); - if ((comb_offset != 0x0) && - (comb_offset != NVM_VER_INVALID)) { - - hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset - + 1), 1, &comb_verh); - hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset), - 1, &comb_verl); - - /* get Option Rom version if it exists and is valid */ - if ((comb_verh && comb_verl) && - ((comb_verh != NVM_VER_INVALID) && - (comb_verl != NVM_VER_INVALID))) { - - fw_vers->or_valid = true; - fw_vers->or_major = - comb_verl >> NVM_COMB_VER_SHFT; - fw_vers->or_build = - (comb_verl << NVM_COMB_VER_SHFT) - | (comb_verh >> NVM_COMB_VER_SHFT); - fw_vers->or_patch = - comb_verh & NVM_COMB_VER_MASK; - } - } - break; - default: - return; - } - hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); - fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) - >> NVM_MAJOR_SHIFT; - - /* check for old style version format in newer images*/ - if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) { - eeprom_verl = (fw_version & NVM_COMB_VER_MASK); - } else { - eeprom_verl = (fw_version & NVM_MINOR_MASK) - >> NVM_MINOR_SHIFT; - } - /* Convert minor value to hex before assigning to output struct - * Val to be converted will not be higher than 99, per tool output - */ - q = eeprom_verl / NVM_HEX_CONV; - hval = q * NVM_HEX_TENS; - rem = eeprom_verl % NVM_HEX_CONV; - result = hval + rem; - fw_vers->eep_minor = result; - -etrack_id: - if ((etrack_test & NVM_MAJOR_MASK) == NVM_ETRACK_VALID) { - hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl); - hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh); - fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT) - | eeprom_verl; - } - return; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h deleted file mode 100644 index e27b1c0a..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_NVM_H_ -#define _E1000_NVM_H_ - - -struct e1000_fw_version { - u32 etrack_id; - u16 eep_major; - u16 eep_minor; - u16 eep_build; - - u8 invm_major; - u8 invm_minor; - u8 invm_img_type; - - bool or_valid; - u16 or_major; - u16 or_build; - u16 or_patch; -}; - - -void e1000_init_nvm_ops_generic(struct e1000_hw *hw); -s32 e1000_null_read_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c); -void e1000_null_nvm_generic(struct e1000_hw *hw); -s32 e1000_null_led_default(struct e1000_hw *hw, u16 *data); -s32 e1000_null_write_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c); -s32 e1000_acquire_nvm_generic(struct e1000_hw *hw); - -s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg); -s32 e1000_read_mac_addr_generic(struct e1000_hw *hw); -s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, - u32 pba_num_size); -s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size); -s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); -s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data); -s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data); -s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw); -s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data); -s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw); -void e1000_release_nvm_generic(struct e1000_hw *hw); -void e1000_get_fw_version(struct e1000_hw *hw, - struct e1000_fw_version *fw_vers); - -#define E1000_STM_OPCODE 0xDB00 - -#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h deleted file mode 100644 index 3228100e..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h +++ /dev/null @@ -1,121 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - - -/* glue for the OS independent part of e1000 - * includes register access macros - */ - -#ifndef _E1000_OSDEP_H_ -#define _E1000_OSDEP_H_ - -#include -#include -#include -#include -#include -#include "kcompat.h" - -#ifndef __INTEL_COMPILER -#pragma GCC diagnostic ignored "-Wunused-function" -#endif - -#define usec_delay(x) udelay(x) -#define usec_delay_irq(x) udelay(x) -#ifndef msec_delay -#define msec_delay(x) do { \ - /* Don't mdelay in interrupt context! */ \ - if (in_interrupt()) \ - BUG(); \ - else \ - msleep(x); \ -} while (0) - -/* Some workarounds require millisecond delays and are run during interrupt - * context. Most notably, when establishing link, the phy may need tweaking - * but cannot process phy register reads/writes faster than millisecond - * intervals...and we establish link due to a "link status change" interrupt. - */ -#define msec_delay_irq(x) mdelay(x) -#endif - -#define PCI_COMMAND_REGISTER PCI_COMMAND -#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE -#define ETH_ADDR_LEN ETH_ALEN - -#ifdef __BIG_ENDIAN -#define E1000_BIG_ENDIAN __BIG_ENDIAN -#endif - - -#ifdef DEBUG -#define DEBUGOUT(S) printk(KERN_DEBUG S) -#define DEBUGOUT1(S, A...) printk(KERN_DEBUG S, ## A) -#else -#define DEBUGOUT(S) -#define DEBUGOUT1(S, A...) -#endif - -#ifdef DEBUG_FUNC -#define DEBUGFUNC(F) DEBUGOUT(F "\n") -#else -#define DEBUGFUNC(F) -#endif -#define DEBUGOUT2 DEBUGOUT1 -#define DEBUGOUT3 DEBUGOUT2 -#define DEBUGOUT7 DEBUGOUT3 - -#define E1000_REGISTER(a, reg) reg - -#define E1000_WRITE_REG(a, reg, value) ( \ - writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg)))) - -#define E1000_READ_REG(a, reg) (readl((a)->hw_addr + E1000_REGISTER(a, reg))) - -#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \ - writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2)))) - -#define E1000_READ_REG_ARRAY(a, reg, offset) ( \ - readl((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2))) - -#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY -#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY - -#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \ - writew((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1)))) - -#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \ - readw((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1))) - -#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \ - writeb((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + (offset)))) - -#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \ - readb((a)->hw_addr + E1000_REGISTER(a, reg) + (offset))) - -#define E1000_WRITE_REG_IO(a, reg, offset) do { \ - outl(reg, ((a)->io_base)); \ - outl(offset, ((a)->io_base + 4)); } while (0) - -#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS) - -#define E1000_WRITE_FLASH_REG(a, reg, value) ( \ - writel((value), ((a)->flash_address + reg))) - -#define E1000_WRITE_FLASH_REG16(a, reg, value) ( \ - writew((value), ((a)->flash_address + reg))) - -#define E1000_READ_FLASH_REG(a, reg) (readl((a)->flash_address + reg)) - -#define E1000_READ_FLASH_REG16(a, reg) (readw((a)->flash_address + reg)) - -#endif /* _E1000_OSDEP_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c deleted file mode 100644 index 1934a309..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c +++ /dev/null @@ -1,3392 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "e1000_api.h" - -static s32 e1000_wait_autoneg(struct e1000_hw *hw); -/* Cable length tables */ -static const u16 e1000_m88_cable_length_table[] = { - 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; -#define M88E1000_CABLE_LENGTH_TABLE_SIZE \ - (sizeof(e1000_m88_cable_length_table) / \ - sizeof(e1000_m88_cable_length_table[0])) - -static const u16 e1000_igp_2_cable_length_table[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3, - 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22, - 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40, - 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61, - 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82, - 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95, - 100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121, - 124}; -#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \ - (sizeof(e1000_igp_2_cable_length_table) / \ - sizeof(e1000_igp_2_cable_length_table[0])) - -/** - * e1000_init_phy_ops_generic - Initialize PHY function pointers - * @hw: pointer to the HW structure - * - * Setups up the function pointers to no-op functions - **/ -void e1000_init_phy_ops_generic(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - DEBUGFUNC("e1000_init_phy_ops_generic"); - - /* Initialize function pointers */ - phy->ops.init_params = e1000_null_ops_generic; - phy->ops.acquire = e1000_null_ops_generic; - phy->ops.check_polarity = e1000_null_ops_generic; - phy->ops.check_reset_block = e1000_null_ops_generic; - phy->ops.commit = e1000_null_ops_generic; - phy->ops.force_speed_duplex = e1000_null_ops_generic; - phy->ops.get_cfg_done = e1000_null_ops_generic; - phy->ops.get_cable_length = e1000_null_ops_generic; - phy->ops.get_info = e1000_null_ops_generic; - phy->ops.set_page = e1000_null_set_page; - phy->ops.read_reg = e1000_null_read_reg; - phy->ops.read_reg_locked = e1000_null_read_reg; - phy->ops.read_reg_page = e1000_null_read_reg; - phy->ops.release = e1000_null_phy_generic; - phy->ops.reset = e1000_null_ops_generic; - phy->ops.set_d0_lplu_state = e1000_null_lplu_state; - phy->ops.set_d3_lplu_state = e1000_null_lplu_state; - phy->ops.write_reg = e1000_null_write_reg; - phy->ops.write_reg_locked = e1000_null_write_reg; - phy->ops.write_reg_page = e1000_null_write_reg; - phy->ops.power_up = e1000_null_phy_generic; - phy->ops.power_down = e1000_null_phy_generic; - phy->ops.read_i2c_byte = e1000_read_i2c_byte_null; - phy->ops.write_i2c_byte = e1000_write_i2c_byte_null; -} - -/** - * e1000_null_set_page - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -s32 e1000_null_set_page(struct e1000_hw E1000_UNUSEDARG *hw, - u16 E1000_UNUSEDARG data) -{ - DEBUGFUNC("e1000_null_set_page"); - return E1000_SUCCESS; -} - -/** - * e1000_null_read_reg - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -s32 e1000_null_read_reg(struct e1000_hw E1000_UNUSEDARG *hw, - u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG *data) -{ - DEBUGFUNC("e1000_null_read_reg"); - return E1000_SUCCESS; -} - -/** - * e1000_null_phy_generic - No-op function, return void - * @hw: pointer to the HW structure - **/ -void e1000_null_phy_generic(struct e1000_hw E1000_UNUSEDARG *hw) -{ - DEBUGFUNC("e1000_null_phy_generic"); - return; -} - -/** - * e1000_null_lplu_state - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -s32 e1000_null_lplu_state(struct e1000_hw E1000_UNUSEDARG *hw, - bool E1000_UNUSEDARG active) -{ - DEBUGFUNC("e1000_null_lplu_state"); - return E1000_SUCCESS; -} - -/** - * e1000_null_write_reg - No-op function, return 0 - * @hw: pointer to the HW structure - **/ -s32 e1000_null_write_reg(struct e1000_hw E1000_UNUSEDARG *hw, - u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG data) -{ - DEBUGFUNC("e1000_null_write_reg"); - return E1000_SUCCESS; -} - -/** - * e1000_read_i2c_byte_null - No-op function, return 0 - * @hw: pointer to hardware structure - * @byte_offset: byte offset to write - * @dev_addr: device address - * @data: data value read - * - **/ -s32 e1000_read_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw, - u8 E1000_UNUSEDARG byte_offset, - u8 E1000_UNUSEDARG dev_addr, - u8 E1000_UNUSEDARG *data) -{ - DEBUGFUNC("e1000_read_i2c_byte_null"); - return E1000_SUCCESS; -} - -/** - * e1000_write_i2c_byte_null - No-op function, return 0 - * @hw: pointer to hardware structure - * @byte_offset: byte offset to write - * @dev_addr: device address - * @data: data value to write - * - **/ -s32 e1000_write_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw, - u8 E1000_UNUSEDARG byte_offset, - u8 E1000_UNUSEDARG dev_addr, - u8 E1000_UNUSEDARG data) -{ - DEBUGFUNC("e1000_write_i2c_byte_null"); - return E1000_SUCCESS; -} - -/** - * e1000_check_reset_block_generic - Check if PHY reset is blocked - * @hw: pointer to the HW structure - * - * Read the PHY management control register and check whether a PHY reset - * is blocked. If a reset is not blocked return E1000_SUCCESS, otherwise - * return E1000_BLK_PHY_RESET (12). - **/ -s32 e1000_check_reset_block_generic(struct e1000_hw *hw) -{ - u32 manc; - - DEBUGFUNC("e1000_check_reset_block"); - - manc = E1000_READ_REG(hw, E1000_MANC); - - return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? - E1000_BLK_PHY_RESET : E1000_SUCCESS; -} - -/** - * e1000_get_phy_id - Retrieve the PHY ID and revision - * @hw: pointer to the HW structure - * - * Reads the PHY registers and stores the PHY ID and possibly the PHY - * revision in the hardware structure. - **/ -s32 e1000_get_phy_id(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = E1000_SUCCESS; - u16 phy_id; - - DEBUGFUNC("e1000_get_phy_id"); - - if (!phy->ops.read_reg) - return E1000_SUCCESS; - - ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); - if (ret_val) - return ret_val; - - phy->id = (u32)(phy_id << 16); - usec_delay(20); - ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id); - if (ret_val) - return ret_val; - - phy->id |= (u32)(phy_id & PHY_REVISION_MASK); - phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); - - - return E1000_SUCCESS; -} - -/** - * e1000_phy_reset_dsp_generic - Reset PHY DSP - * @hw: pointer to the HW structure - * - * Reset the digital signal processor. - **/ -s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw) -{ - s32 ret_val; - - DEBUGFUNC("e1000_phy_reset_dsp_generic"); - - if (!hw->phy.ops.write_reg) - return E1000_SUCCESS; - - ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1); - if (ret_val) - return ret_val; - - return hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0); -} - -/** - * e1000_read_phy_reg_mdic - Read MDI control register - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * - * Reads the MDI control register in the PHY at offset and stores the - * information read to data. - **/ -s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) -{ - struct e1000_phy_info *phy = &hw->phy; - u32 i, mdic = 0; - - DEBUGFUNC("e1000_read_phy_reg_mdic"); - - if (offset > MAX_PHY_REG_ADDRESS) { - DEBUGOUT1("PHY Address %d is out of range\n", offset); - return -E1000_ERR_PARAM; - } - - /* Set up Op-code, Phy Address, and register offset in the MDI - * Control register. The MAC will take care of interfacing with the - * PHY to retrieve the desired data. - */ - mdic = ((offset << E1000_MDIC_REG_SHIFT) | - (phy->addr << E1000_MDIC_PHY_SHIFT) | - (E1000_MDIC_OP_READ)); - - E1000_WRITE_REG(hw, E1000_MDIC, mdic); - - /* Poll the ready bit to see if the MDI read completed - * Increasing the time out as testing showed failures with - * the lower time out - */ - for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - usec_delay_irq(50); - mdic = E1000_READ_REG(hw, E1000_MDIC); - if (mdic & E1000_MDIC_READY) - break; - } - if (!(mdic & E1000_MDIC_READY)) { - DEBUGOUT("MDI Read did not complete\n"); - return -E1000_ERR_PHY; - } - if (mdic & E1000_MDIC_ERROR) { - DEBUGOUT("MDI Error\n"); - return -E1000_ERR_PHY; - } - if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { - DEBUGOUT2("MDI Read offset error - requested %d, returned %d\n", - offset, - (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); - return -E1000_ERR_PHY; - } - *data = (u16) mdic; - - return E1000_SUCCESS; -} - -/** - * e1000_write_phy_reg_mdic - Write MDI control register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write to register at offset - * - * Writes data to MDI control register in the PHY at offset. - **/ -s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) -{ - struct e1000_phy_info *phy = &hw->phy; - u32 i, mdic = 0; - - DEBUGFUNC("e1000_write_phy_reg_mdic"); - - if (offset > MAX_PHY_REG_ADDRESS) { - DEBUGOUT1("PHY Address %d is out of range\n", offset); - return -E1000_ERR_PARAM; - } - - /* Set up Op-code, Phy Address, and register offset in the MDI - * Control register. The MAC will take care of interfacing with the - * PHY to retrieve the desired data. - */ - mdic = (((u32)data) | - (offset << E1000_MDIC_REG_SHIFT) | - (phy->addr << E1000_MDIC_PHY_SHIFT) | - (E1000_MDIC_OP_WRITE)); - - E1000_WRITE_REG(hw, E1000_MDIC, mdic); - - /* Poll the ready bit to see if the MDI read completed - * Increasing the time out as testing showed failures with - * the lower time out - */ - for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - usec_delay_irq(50); - mdic = E1000_READ_REG(hw, E1000_MDIC); - if (mdic & E1000_MDIC_READY) - break; - } - if (!(mdic & E1000_MDIC_READY)) { - DEBUGOUT("MDI Write did not complete\n"); - return -E1000_ERR_PHY; - } - if (mdic & E1000_MDIC_ERROR) { - DEBUGOUT("MDI Error\n"); - return -E1000_ERR_PHY; - } - if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { - DEBUGOUT2("MDI Write offset error - requested %d, returned %d\n", - offset, - (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); - return -E1000_ERR_PHY; - } - - return E1000_SUCCESS; -} - -/** - * e1000_read_phy_reg_i2c - Read PHY register using i2c - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * - * Reads the PHY register at offset using the i2c interface and stores the - * retrieved information in data. - **/ -s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data) -{ - struct e1000_phy_info *phy = &hw->phy; - u32 i, i2ccmd = 0; - - DEBUGFUNC("e1000_read_phy_reg_i2c"); - - /* Set up Op-code, Phy Address, and register address in the I2CCMD - * register. The MAC will take care of interfacing with the - * PHY to retrieve the desired data. - */ - i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | - (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | - (E1000_I2CCMD_OPCODE_READ)); - - E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); - - /* Poll the ready bit to see if the I2C read completed */ - for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { - usec_delay(50); - i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); - if (i2ccmd & E1000_I2CCMD_READY) - break; - } - if (!(i2ccmd & E1000_I2CCMD_READY)) { - DEBUGOUT("I2CCMD Read did not complete\n"); - return -E1000_ERR_PHY; - } - if (i2ccmd & E1000_I2CCMD_ERROR) { - DEBUGOUT("I2CCMD Error bit set\n"); - return -E1000_ERR_PHY; - } - - /* Need to byte-swap the 16-bit value. */ - *data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00); - - return E1000_SUCCESS; -} - -/** - * e1000_write_phy_reg_i2c - Write PHY register using i2c - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * - * Writes the data to PHY register at the offset using the i2c interface. - **/ -s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data) -{ - struct e1000_phy_info *phy = &hw->phy; - u32 i, i2ccmd = 0; - u16 phy_data_swapped; - - DEBUGFUNC("e1000_write_phy_reg_i2c"); - - /* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/ - if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) { - DEBUGOUT1("PHY I2C Address %d is out of range.\n", - hw->phy.addr); - return -E1000_ERR_CONFIG; - } - - /* Swap the data bytes for the I2C interface */ - phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00); - - /* Set up Op-code, Phy Address, and register address in the I2CCMD - * register. The MAC will take care of interfacing with the - * PHY to retrieve the desired data. - */ - i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | - (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | - E1000_I2CCMD_OPCODE_WRITE | - phy_data_swapped); - - E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); - - /* Poll the ready bit to see if the I2C read completed */ - for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { - usec_delay(50); - i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); - if (i2ccmd & E1000_I2CCMD_READY) - break; - } - if (!(i2ccmd & E1000_I2CCMD_READY)) { - DEBUGOUT("I2CCMD Write did not complete\n"); - return -E1000_ERR_PHY; - } - if (i2ccmd & E1000_I2CCMD_ERROR) { - DEBUGOUT("I2CCMD Error bit set\n"); - return -E1000_ERR_PHY; - } - - return E1000_SUCCESS; -} - -/** - * e1000_read_sfp_data_byte - Reads SFP module data. - * @hw: pointer to the HW structure - * @offset: byte location offset to be read - * @data: read data buffer pointer - * - * Reads one byte from SFP module data stored - * in SFP resided EEPROM memory or SFP diagnostic area. - * Function should be called with - * E1000_I2CCMD_SFP_DATA_ADDR() for SFP module database access - * E1000_I2CCMD_SFP_DIAG_ADDR() for SFP diagnostics parameters - * access - **/ -s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data) -{ - u32 i = 0; - u32 i2ccmd = 0; - u32 data_local = 0; - - DEBUGFUNC("e1000_read_sfp_data_byte"); - - if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { - DEBUGOUT("I2CCMD command address exceeds upper limit\n"); - return -E1000_ERR_PHY; - } - - /* Set up Op-code, EEPROM Address,in the I2CCMD - * register. The MAC will take care of interfacing with the - * EEPROM to retrieve the desired data. - */ - i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | - E1000_I2CCMD_OPCODE_READ); - - E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); - - /* Poll the ready bit to see if the I2C read completed */ - for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { - usec_delay(50); - data_local = E1000_READ_REG(hw, E1000_I2CCMD); - if (data_local & E1000_I2CCMD_READY) - break; - } - if (!(data_local & E1000_I2CCMD_READY)) { - DEBUGOUT("I2CCMD Read did not complete\n"); - return -E1000_ERR_PHY; - } - if (data_local & E1000_I2CCMD_ERROR) { - DEBUGOUT("I2CCMD Error bit set\n"); - return -E1000_ERR_PHY; - } - *data = (u8) data_local & 0xFF; - - return E1000_SUCCESS; -} - -/** - * e1000_write_sfp_data_byte - Writes SFP module data. - * @hw: pointer to the HW structure - * @offset: byte location offset to write to - * @data: data to write - * - * Writes one byte to SFP module data stored - * in SFP resided EEPROM memory or SFP diagnostic area. - * Function should be called with - * E1000_I2CCMD_SFP_DATA_ADDR() for SFP module database access - * E1000_I2CCMD_SFP_DIAG_ADDR() for SFP diagnostics parameters - * access - **/ -s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data) -{ - u32 i = 0; - u32 i2ccmd = 0; - u32 data_local = 0; - - DEBUGFUNC("e1000_write_sfp_data_byte"); - - if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { - DEBUGOUT("I2CCMD command address exceeds upper limit\n"); - return -E1000_ERR_PHY; - } - /* The programming interface is 16 bits wide - * so we need to read the whole word first - * then update appropriate byte lane and write - * the updated word back. - */ - /* Set up Op-code, EEPROM Address,in the I2CCMD - * register. The MAC will take care of interfacing - * with an EEPROM to write the data given. - */ - i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | - E1000_I2CCMD_OPCODE_READ); - /* Set a command to read single word */ - E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); - for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { - usec_delay(50); - /* Poll the ready bit to see if lastly - * launched I2C operation completed - */ - i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); - if (i2ccmd & E1000_I2CCMD_READY) { - /* Check if this is READ or WRITE phase */ - if ((i2ccmd & E1000_I2CCMD_OPCODE_READ) == - E1000_I2CCMD_OPCODE_READ) { - /* Write the selected byte - * lane and update whole word - */ - data_local = i2ccmd & 0xFF00; - data_local |= data; - i2ccmd = ((offset << - E1000_I2CCMD_REG_ADDR_SHIFT) | - E1000_I2CCMD_OPCODE_WRITE | data_local); - E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); - } else { - break; - } - } - } - if (!(i2ccmd & E1000_I2CCMD_READY)) { - DEBUGOUT("I2CCMD Write did not complete\n"); - return -E1000_ERR_PHY; - } - if (i2ccmd & E1000_I2CCMD_ERROR) { - DEBUGOUT("I2CCMD Error bit set\n"); - return -E1000_ERR_PHY; - } - return E1000_SUCCESS; -} - -/** - * e1000_read_phy_reg_m88 - Read m88 PHY register - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * - * Acquires semaphore, if necessary, then reads the PHY register at offset - * and storing the retrieved information in data. Release any acquired - * semaphores before exiting. - **/ -s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data) -{ - s32 ret_val; - - DEBUGFUNC("e1000_read_phy_reg_m88"); - - if (!hw->phy.ops.acquire) - return E1000_SUCCESS; - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - - ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, - data); - - hw->phy.ops.release(hw); - - return ret_val; -} - -/** - * e1000_write_phy_reg_m88 - Write m88 PHY register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * - * Acquires semaphore, if necessary, then writes the data to PHY register - * at the offset. Release any acquired semaphores before exiting. - **/ -s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data) -{ - s32 ret_val; - - DEBUGFUNC("e1000_write_phy_reg_m88"); - - if (!hw->phy.ops.acquire) - return E1000_SUCCESS; - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - - ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, - data); - - hw->phy.ops.release(hw); - - return ret_val; -} - -/** - * e1000_set_page_igp - Set page as on IGP-like PHY(s) - * @hw: pointer to the HW structure - * @page: page to set (shifted left when necessary) - * - * Sets PHY page required for PHY register access. Assumes semaphore is - * already acquired. Note, this function sets phy.addr to 1 so the caller - * must set it appropriately (if necessary) after this function returns. - **/ -s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page) -{ - DEBUGFUNC("e1000_set_page_igp"); - - DEBUGOUT1("Setting page 0x%x\n", page); - - hw->phy.addr = 1; - - return e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page); -} - -/** - * __e1000_read_phy_reg_igp - Read igp PHY register - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * @locked: semaphore has already been acquired or not - * - * Acquires semaphore, if necessary, then reads the PHY register at offset - * and stores the retrieved information in data. Release any acquired - * semaphores before exiting. - **/ -static s32 __e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data, - bool locked) -{ - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("__e1000_read_phy_reg_igp"); - - if (!locked) { - if (!hw->phy.ops.acquire) - return E1000_SUCCESS; - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - } - - if (offset > MAX_PHY_MULTI_PAGE_REG) - ret_val = e1000_write_phy_reg_mdic(hw, - IGP01E1000_PHY_PAGE_SELECT, - (u16)offset); - if (!ret_val) - ret_val = e1000_read_phy_reg_mdic(hw, - MAX_PHY_REG_ADDRESS & offset, - data); - if (!locked) - hw->phy.ops.release(hw); - - return ret_val; -} - -/** - * e1000_read_phy_reg_igp - Read igp PHY register - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * - * Acquires semaphore then reads the PHY register at offset and stores the - * retrieved information in data. - * Release the acquired semaphore before exiting. - **/ -s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data) -{ - return __e1000_read_phy_reg_igp(hw, offset, data, false); -} - -/** - * e1000_read_phy_reg_igp_locked - Read igp PHY register - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * - * Reads the PHY register at offset and stores the retrieved information - * in data. Assumes semaphore already acquired. - **/ -s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data) -{ - return __e1000_read_phy_reg_igp(hw, offset, data, true); -} - -/** - * e1000_write_phy_reg_igp - Write igp PHY register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * @locked: semaphore has already been acquired or not - * - * Acquires semaphore, if necessary, then writes the data to PHY register - * at the offset. Release any acquired semaphores before exiting. - **/ -static s32 __e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data, - bool locked) -{ - s32 ret_val = E1000_SUCCESS; - - DEBUGFUNC("e1000_write_phy_reg_igp"); - - if (!locked) { - if (!hw->phy.ops.acquire) - return E1000_SUCCESS; - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - } - - if (offset > MAX_PHY_MULTI_PAGE_REG) - ret_val = e1000_write_phy_reg_mdic(hw, - IGP01E1000_PHY_PAGE_SELECT, - (u16)offset); - if (!ret_val) - ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & - offset, - data); - if (!locked) - hw->phy.ops.release(hw); - - return ret_val; -} - -/** - * e1000_write_phy_reg_igp - Write igp PHY register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * - * Acquires semaphore then writes the data to PHY register - * at the offset. Release any acquired semaphores before exiting. - **/ -s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data) -{ - return __e1000_write_phy_reg_igp(hw, offset, data, false); -} - -/** - * e1000_write_phy_reg_igp_locked - Write igp PHY register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * - * Writes the data to PHY register at the offset. - * Assumes semaphore already acquired. - **/ -s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data) -{ - return __e1000_write_phy_reg_igp(hw, offset, data, true); -} - -/** - * __e1000_read_kmrn_reg - Read kumeran register - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * @locked: semaphore has already been acquired or not - * - * Acquires semaphore, if necessary. Then reads the PHY register at offset - * using the kumeran interface. The information retrieved is stored in data. - * Release any acquired semaphores before exiting. - **/ -static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data, - bool locked) -{ - u32 kmrnctrlsta; - - DEBUGFUNC("__e1000_read_kmrn_reg"); - - if (!locked) { - s32 ret_val = E1000_SUCCESS; - - if (!hw->phy.ops.acquire) - return E1000_SUCCESS; - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - } - - kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & - E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; - E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); - E1000_WRITE_FLUSH(hw); - - usec_delay(2); - - kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA); - *data = (u16)kmrnctrlsta; - - if (!locked) - hw->phy.ops.release(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_read_kmrn_reg_generic - Read kumeran register - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * - * Acquires semaphore then reads the PHY register at offset using the - * kumeran interface. The information retrieved is stored in data. - * Release the acquired semaphore before exiting. - **/ -s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data) -{ - return __e1000_read_kmrn_reg(hw, offset, data, false); -} - -/** - * e1000_read_kmrn_reg_locked - Read kumeran register - * @hw: pointer to the HW structure - * @offset: register offset to be read - * @data: pointer to the read data - * - * Reads the PHY register at offset using the kumeran interface. The - * information retrieved is stored in data. - * Assumes semaphore already acquired. - **/ -s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data) -{ - return __e1000_read_kmrn_reg(hw, offset, data, true); -} - -/** - * __e1000_write_kmrn_reg - Write kumeran register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * @locked: semaphore has already been acquired or not - * - * Acquires semaphore, if necessary. Then write the data to PHY register - * at the offset using the kumeran interface. Release any acquired semaphores - * before exiting. - **/ -static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data, - bool locked) -{ - u32 kmrnctrlsta; - - DEBUGFUNC("e1000_write_kmrn_reg_generic"); - - if (!locked) { - s32 ret_val = E1000_SUCCESS; - - if (!hw->phy.ops.acquire) - return E1000_SUCCESS; - - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - } - - kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & - E1000_KMRNCTRLSTA_OFFSET) | data; - E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); - E1000_WRITE_FLUSH(hw); - - usec_delay(2); - - if (!locked) - hw->phy.ops.release(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_write_kmrn_reg_generic - Write kumeran register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * - * Acquires semaphore then writes the data to the PHY register at the offset - * using the kumeran interface. Release the acquired semaphore before exiting. - **/ -s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data) -{ - return __e1000_write_kmrn_reg(hw, offset, data, false); -} - -/** - * e1000_write_kmrn_reg_locked - Write kumeran register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * - * Write the data to PHY register at the offset using the kumeran interface. - * Assumes semaphore already acquired. - **/ -s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data) -{ - return __e1000_write_kmrn_reg(hw, offset, data, true); -} - -/** - * e1000_set_master_slave_mode - Setup PHY for Master/slave mode - * @hw: pointer to the HW structure - * - * Sets up Master/slave mode - **/ -static s32 e1000_set_master_slave_mode(struct e1000_hw *hw) -{ - s32 ret_val; - u16 phy_data; - - /* Resolve Master/Slave mode */ - ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data); - if (ret_val) - return ret_val; - - /* load defaults for future use */ - hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ? - ((phy_data & CR_1000T_MS_VALUE) ? - e1000_ms_force_master : - e1000_ms_force_slave) : e1000_ms_auto; - - switch (hw->phy.ms_type) { - case e1000_ms_force_master: - phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); - break; - case e1000_ms_force_slave: - phy_data |= CR_1000T_MS_ENABLE; - phy_data &= ~(CR_1000T_MS_VALUE); - break; - case e1000_ms_auto: - phy_data &= ~CR_1000T_MS_ENABLE; - /* fall-through */ - default: - break; - } - - return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data); -} - -/** - * e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link - * @hw: pointer to the HW structure - * - * Sets up Carrier-sense on Transmit and downshift values. - **/ -s32 e1000_copper_link_setup_82577(struct e1000_hw *hw) -{ - s32 ret_val; - u16 phy_data; - - DEBUGFUNC("e1000_copper_link_setup_82577"); - - if (hw->phy.reset_disable) - return E1000_SUCCESS; - - if (hw->phy.type == e1000_phy_82580) { - ret_val = hw->phy.ops.reset(hw); - if (ret_val) { - DEBUGOUT("Error resetting the PHY.\n"); - return ret_val; - } - } - - /* Enable CRS on Tx. This must be set for half-duplex operation. */ - ret_val = hw->phy.ops.read_reg(hw, I82577_CFG_REG, &phy_data); - if (ret_val) - return ret_val; - - phy_data |= I82577_CFG_ASSERT_CRS_ON_TX; - - /* Enable downshift */ - phy_data |= I82577_CFG_ENABLE_DOWNSHIFT; - - ret_val = hw->phy.ops.write_reg(hw, I82577_CFG_REG, phy_data); - if (ret_val) - return ret_val; - - /* Set MDI/MDIX mode */ - ret_val = hw->phy.ops.read_reg(hw, I82577_PHY_CTRL_2, &phy_data); - if (ret_val) - return ret_val; - phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK; - /* Options: - * 0 - Auto (default) - * 1 - MDI mode - * 2 - MDI-X mode - */ - switch (hw->phy.mdix) { - case 1: - break; - case 2: - phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX; - break; - case 0: - default: - phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX; - break; - } - ret_val = hw->phy.ops.write_reg(hw, I82577_PHY_CTRL_2, phy_data); - if (ret_val) - return ret_val; - - return e1000_set_master_slave_mode(hw); -} - -/** - * e1000_copper_link_setup_m88 - Setup m88 PHY's for copper link - * @hw: pointer to the HW structure - * - * Sets up MDI/MDI-X and polarity for m88 PHY's. If necessary, transmit clock - * and downshift values are set also. - **/ -s32 e1000_copper_link_setup_m88(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data; - - DEBUGFUNC("e1000_copper_link_setup_m88"); - - if (phy->reset_disable) - return E1000_SUCCESS; - - /* Enable CRS on Tx. This must be set for half-duplex operation. */ - ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); - if (ret_val) - return ret_val; - - phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; - - /* Options: - * MDI/MDI-X = 0 (default) - * 0 - Auto for all speeds - * 1 - MDI mode - * 2 - MDI-X mode - * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) - */ - phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; - - switch (phy->mdix) { - case 1: - phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; - break; - case 2: - phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; - break; - case 3: - phy_data |= M88E1000_PSCR_AUTO_X_1000T; - break; - case 0: - default: - phy_data |= M88E1000_PSCR_AUTO_X_MODE; - break; - } - - /* Options: - * disable_polarity_correction = 0 (default) - * Automatic Correction for Reversed Cable Polarity - * 0 - Disabled - * 1 - Enabled - */ - phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; - if (phy->disable_polarity_correction) - phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; - - ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); - if (ret_val) - return ret_val; - - if (phy->revision < E1000_REVISION_4) { - /* Force TX_CLK in the Extended PHY Specific Control Register - * to 25MHz clock. - */ - ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, - &phy_data); - if (ret_val) - return ret_val; - - phy_data |= M88E1000_EPSCR_TX_CLK_25; - - if ((phy->revision == E1000_REVISION_2) && - (phy->id == M88E1111_I_PHY_ID)) { - /* 82573L PHY - set the downshift counter to 5x. */ - phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK; - phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; - } else { - /* Configure Master and Slave downshift values */ - phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | - M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); - phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | - M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); - } - ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, - phy_data); - if (ret_val) - return ret_val; - } - - /* Commit the changes. */ - ret_val = phy->ops.commit(hw); - if (ret_val) { - DEBUGOUT("Error committing the PHY changes\n"); - return ret_val; - } - - return E1000_SUCCESS; -} - -/** - * e1000_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link - * @hw: pointer to the HW structure - * - * Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's. - * Also enables and sets the downshift parameters. - **/ -s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data; - - DEBUGFUNC("e1000_copper_link_setup_m88_gen2"); - - if (phy->reset_disable) - return E1000_SUCCESS; - - /* Enable CRS on Tx. This must be set for half-duplex operation. */ - ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); - if (ret_val) - return ret_val; - - /* Options: - * MDI/MDI-X = 0 (default) - * 0 - Auto for all speeds - * 1 - MDI mode - * 2 - MDI-X mode - * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) - */ - phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; - - switch (phy->mdix) { - case 1: - phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; - break; - case 2: - phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; - break; - case 3: - /* M88E1112 does not support this mode) */ - if (phy->id != M88E1112_E_PHY_ID) { - phy_data |= M88E1000_PSCR_AUTO_X_1000T; - break; - } - case 0: - default: - phy_data |= M88E1000_PSCR_AUTO_X_MODE; - break; - } - - /* Options: - * disable_polarity_correction = 0 (default) - * Automatic Correction for Reversed Cable Polarity - * 0 - Disabled - * 1 - Enabled - */ - phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; - if (phy->disable_polarity_correction) - phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; - - /* Enable downshift and setting it to X6 */ - if (phy->id == M88E1543_E_PHY_ID) { - phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE; - ret_val = - phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); - if (ret_val) - return ret_val; - - ret_val = phy->ops.commit(hw); - if (ret_val) { - DEBUGOUT("Error committing the PHY changes\n"); - return ret_val; - } - } - - phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK; - phy_data |= I347AT4_PSCR_DOWNSHIFT_6X; - phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE; - - ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); - if (ret_val) - return ret_val; - - /* Commit the changes. */ - ret_val = phy->ops.commit(hw); - if (ret_val) { - DEBUGOUT("Error committing the PHY changes\n"); - return ret_val; - } - - ret_val = e1000_set_master_slave_mode(hw); - if (ret_val) - return ret_val; - - return E1000_SUCCESS; -} - -/** - * e1000_copper_link_setup_igp - Setup igp PHY's for copper link - * @hw: pointer to the HW structure - * - * Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for - * igp PHY's. - **/ -s32 e1000_copper_link_setup_igp(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data; - - DEBUGFUNC("e1000_copper_link_setup_igp"); - - if (phy->reset_disable) - return E1000_SUCCESS; - - ret_val = hw->phy.ops.reset(hw); - if (ret_val) { - DEBUGOUT("Error resetting the PHY.\n"); - return ret_val; - } - - /* Wait 100ms for MAC to configure PHY from NVM settings, to avoid - * timeout issues when LFS is enabled. - */ - msec_delay(100); - - /* disable lplu d0 during driver init */ - if (hw->phy.ops.set_d0_lplu_state) { - ret_val = hw->phy.ops.set_d0_lplu_state(hw, false); - if (ret_val) { - DEBUGOUT("Error Disabling LPLU D0\n"); - return ret_val; - } - } - /* Configure mdi-mdix settings */ - ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data); - if (ret_val) - return ret_val; - - data &= ~IGP01E1000_PSCR_AUTO_MDIX; - - switch (phy->mdix) { - case 1: - data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; - break; - case 2: - data |= IGP01E1000_PSCR_FORCE_MDI_MDIX; - break; - case 0: - default: - data |= IGP01E1000_PSCR_AUTO_MDIX; - break; - } - ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data); - if (ret_val) - return ret_val; - - /* set auto-master slave resolution settings */ - if (hw->mac.autoneg) { - /* when autonegotiation advertisement is only 1000Mbps then we - * should disable SmartSpeed and enable Auto MasterSlave - * resolution as hardware default. - */ - if (phy->autoneg_advertised == ADVERTISE_1000_FULL) { - /* Disable SmartSpeed */ - ret_val = phy->ops.read_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - &data); - if (ret_val) - return ret_val; - - data &= ~IGP01E1000_PSCFR_SMART_SPEED; - ret_val = phy->ops.write_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - data); - if (ret_val) - return ret_val; - - /* Set auto Master/Slave resolution process */ - ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data); - if (ret_val) - return ret_val; - - data &= ~CR_1000T_MS_ENABLE; - ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data); - if (ret_val) - return ret_val; - } - - ret_val = e1000_set_master_slave_mode(hw); - } - - return ret_val; -} - -/** - * e1000_phy_setup_autoneg - Configure PHY for auto-negotiation - * @hw: pointer to the HW structure - * - * Reads the MII auto-neg advertisement register and/or the 1000T control - * register and if the PHY is already setup for auto-negotiation, then - * return successful. Otherwise, setup advertisement and flow control to - * the appropriate values for the wanted auto-negotiation. - **/ -static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 mii_autoneg_adv_reg; - u16 mii_1000t_ctrl_reg = 0; - - DEBUGFUNC("e1000_phy_setup_autoneg"); - - phy->autoneg_advertised &= phy->autoneg_mask; - - /* Read the MII Auto-Neg Advertisement Register (Address 4). */ - ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); - if (ret_val) - return ret_val; - - if (phy->autoneg_mask & ADVERTISE_1000_FULL) { - /* Read the MII 1000Base-T Control Register (Address 9). */ - ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, - &mii_1000t_ctrl_reg); - if (ret_val) - return ret_val; - } - - /* Need to parse both autoneg_advertised and fc and set up - * the appropriate PHY registers. First we will parse for - * autoneg_advertised software override. Since we can advertise - * a plethora of combinations, we need to check each bit - * individually. - */ - - /* First we clear all the 10/100 mb speed bits in the Auto-Neg - * Advertisement Register (Address 4) and the 1000 mb speed bits in - * the 1000Base-T Control Register (Address 9). - */ - mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS | - NWAY_AR_100TX_HD_CAPS | - NWAY_AR_10T_FD_CAPS | - NWAY_AR_10T_HD_CAPS); - mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS); - - DEBUGOUT1("autoneg_advertised %x\n", phy->autoneg_advertised); - - /* Do we want to advertise 10 Mb Half Duplex? */ - if (phy->autoneg_advertised & ADVERTISE_10_HALF) { - DEBUGOUT("Advertise 10mb Half duplex\n"); - mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; - } - - /* Do we want to advertise 10 Mb Full Duplex? */ - if (phy->autoneg_advertised & ADVERTISE_10_FULL) { - DEBUGOUT("Advertise 10mb Full duplex\n"); - mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; - } - - /* Do we want to advertise 100 Mb Half Duplex? */ - if (phy->autoneg_advertised & ADVERTISE_100_HALF) { - DEBUGOUT("Advertise 100mb Half duplex\n"); - mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; - } - - /* Do we want to advertise 100 Mb Full Duplex? */ - if (phy->autoneg_advertised & ADVERTISE_100_FULL) { - DEBUGOUT("Advertise 100mb Full duplex\n"); - mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; - } - - /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ - if (phy->autoneg_advertised & ADVERTISE_1000_HALF) - DEBUGOUT("Advertise 1000mb Half duplex request denied!\n"); - - /* Do we want to advertise 1000 Mb Full Duplex? */ - if (phy->autoneg_advertised & ADVERTISE_1000_FULL) { - DEBUGOUT("Advertise 1000mb Full duplex\n"); - mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; - } - - /* Check for a software override of the flow control settings, and - * setup the PHY advertisement registers accordingly. If - * auto-negotiation is enabled, then software will have to set the - * "PAUSE" bits to the correct value in the Auto-Negotiation - * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto- - * negotiation. - * - * The possible values of the "fc" parameter are: - * 0: Flow control is completely disabled - * 1: Rx flow control is enabled (we can receive pause frames - * but not send pause frames). - * 2: Tx flow control is enabled (we can send pause frames - * but we do not support receiving pause frames). - * 3: Both Rx and Tx flow control (symmetric) are enabled. - * other: No software override. The flow control configuration - * in the EEPROM is used. - */ - switch (hw->fc.current_mode) { - case e1000_fc_none: - /* Flow control (Rx & Tx) is completely disabled by a - * software over-ride. - */ - mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); - break; - case e1000_fc_rx_pause: - /* Rx Flow control is enabled, and Tx Flow control is - * disabled, by a software over-ride. - * - * Since there really isn't a way to advertise that we are - * capable of Rx Pause ONLY, we will advertise that we - * support both symmetric and asymmetric Rx PAUSE. Later - * (in e1000_config_fc_after_link_up) we will disable the - * hw's ability to send PAUSE frames. - */ - mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); - break; - case e1000_fc_tx_pause: - /* Tx Flow control is enabled, and Rx Flow control is - * disabled, by a software over-ride. - */ - mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; - mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; - break; - case e1000_fc_full: - /* Flow control (both Rx and Tx) is enabled by a software - * over-ride. - */ - mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); - break; - default: - DEBUGOUT("Flow control param set incorrectly\n"); - return -E1000_ERR_CONFIG; - } - - ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); - if (ret_val) - return ret_val; - - DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); - - if (phy->autoneg_mask & ADVERTISE_1000_FULL) - ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, - mii_1000t_ctrl_reg); - - return ret_val; -} - -/** - * e1000_copper_link_autoneg - Setup/Enable autoneg for copper link - * @hw: pointer to the HW structure - * - * Performs initial bounds checking on autoneg advertisement parameter, then - * configure to advertise the full capability. Setup the PHY to autoneg - * and restart the negotiation process between the link partner. If - * autoneg_wait_to_complete, then wait for autoneg to complete before exiting. - **/ -static s32 e1000_copper_link_autoneg(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_ctrl; - - DEBUGFUNC("e1000_copper_link_autoneg"); - - /* Perform some bounds checking on the autoneg advertisement - * parameter. - */ - phy->autoneg_advertised &= phy->autoneg_mask; - - /* If autoneg_advertised is zero, we assume it was not defaulted - * by the calling code so we set to advertise full capability. - */ - if (!phy->autoneg_advertised) - phy->autoneg_advertised = phy->autoneg_mask; - - DEBUGOUT("Reconfiguring auto-neg advertisement params\n"); - ret_val = e1000_phy_setup_autoneg(hw); - if (ret_val) { - DEBUGOUT("Error Setting up Auto-Negotiation\n"); - return ret_val; - } - DEBUGOUT("Restarting Auto-Neg\n"); - - /* Restart auto-negotiation by setting the Auto Neg Enable bit and - * the Auto Neg Restart bit in the PHY control register. - */ - ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); - if (ret_val) - return ret_val; - - phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); - ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl); - if (ret_val) - return ret_val; - - /* Does the user want to wait for Auto-Neg to complete here, or - * check at a later time (for example, callback routine). - */ - if (phy->autoneg_wait_to_complete) { - ret_val = e1000_wait_autoneg(hw); - if (ret_val) { - DEBUGOUT("Error while waiting for autoneg to complete\n"); - return ret_val; - } - } - - hw->mac.get_link_status = true; - - return ret_val; -} - -/** - * e1000_setup_copper_link_generic - Configure copper link settings - * @hw: pointer to the HW structure - * - * Calls the appropriate function to configure the link for auto-neg or forced - * speed and duplex. Then we check for link, once link is established calls - * to configure collision distance and flow control are called. If link is - * not established, we return -E1000_ERR_PHY (-2). - **/ -s32 e1000_setup_copper_link_generic(struct e1000_hw *hw) -{ - s32 ret_val; - bool link; - - DEBUGFUNC("e1000_setup_copper_link_generic"); - - if (hw->mac.autoneg) { - /* Setup autoneg and flow control advertisement and perform - * autonegotiation. - */ - ret_val = e1000_copper_link_autoneg(hw); - if (ret_val) - return ret_val; - } else { - /* PHY will be set to 10H, 10F, 100H or 100F - * depending on user settings. - */ - DEBUGOUT("Forcing Speed and Duplex\n"); - ret_val = hw->phy.ops.force_speed_duplex(hw); - if (ret_val) { - DEBUGOUT("Error Forcing Speed and Duplex\n"); - return ret_val; - } - } - - /* Check link status. Wait up to 100 microseconds for link to become - * valid. - */ - ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10, - &link); - if (ret_val) - return ret_val; - - if (link) { - DEBUGOUT("Valid link established!!!\n"); - hw->mac.ops.config_collision_dist(hw); - ret_val = e1000_config_fc_after_link_up_generic(hw); - } else { - DEBUGOUT("Unable to establish link!!!\n"); - } - - return ret_val; -} - -/** - * e1000_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY - * @hw: pointer to the HW structure - * - * Calls the PHY setup function to force speed and duplex. Clears the - * auto-crossover to force MDI manually. Waits for link and returns - * successful if link up is successful, else -E1000_ERR_PHY (-2). - **/ -s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data; - bool link; - - DEBUGFUNC("e1000_phy_force_speed_duplex_igp"); - - ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); - if (ret_val) - return ret_val; - - e1000_phy_force_speed_duplex_setup(hw, &phy_data); - - ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); - if (ret_val) - return ret_val; - - /* Clear Auto-Crossover to force MDI manually. IGP requires MDI - * forced whenever speed and duplex are forced. - */ - ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); - if (ret_val) - return ret_val; - - phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; - phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; - - ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); - if (ret_val) - return ret_val; - - DEBUGOUT1("IGP PSCR: %X\n", phy_data); - - usec_delay(1); - - if (phy->autoneg_wait_to_complete) { - DEBUGOUT("Waiting for forced speed/duplex link on IGP phy.\n"); - - ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); - if (ret_val) - return ret_val; - - if (!link) - DEBUGOUT("Link taking longer than expected.\n"); - - /* Try once more */ - ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); - } - - return ret_val; -} - -/** - * e1000_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY - * @hw: pointer to the HW structure - * - * Calls the PHY setup function to force speed and duplex. Clears the - * auto-crossover to force MDI manually. Resets the PHY to commit the - * changes. If time expires while waiting for link up, we reset the DSP. - * After reset, TX_CLK and CRS on Tx must be set. Return successful upon - * successful completion, else return corresponding error code. - **/ -s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data; - bool link; - - DEBUGFUNC("e1000_phy_force_speed_duplex_m88"); - - /* I210 and I211 devices support Auto-Crossover in forced operation. */ - if (phy->type != e1000_phy_i210) { - /* Clear Auto-Crossover to force MDI manually. M88E1000 - * requires MDI forced whenever speed and duplex are forced. - */ - ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, - &phy_data); - if (ret_val) - return ret_val; - - phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; - ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, - phy_data); - if (ret_val) - return ret_val; - } - - DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data); - - ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); - if (ret_val) - return ret_val; - - e1000_phy_force_speed_duplex_setup(hw, &phy_data); - - ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); - if (ret_val) - return ret_val; - - /* Reset the phy to commit changes. */ - ret_val = hw->phy.ops.commit(hw); - if (ret_val) - return ret_val; - - if (phy->autoneg_wait_to_complete) { - DEBUGOUT("Waiting for forced speed/duplex link on M88 phy.\n"); - - ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); - if (ret_val) - return ret_val; - - if (!link) { - bool reset_dsp = true; - - switch (hw->phy.id) { - case I347AT4_E_PHY_ID: - case M88E1340M_E_PHY_ID: - case M88E1112_E_PHY_ID: - case M88E1543_E_PHY_ID: - case I210_I_PHY_ID: - reset_dsp = false; - break; - default: - if (hw->phy.type != e1000_phy_m88) - reset_dsp = false; - break; - } - - if (!reset_dsp) { - DEBUGOUT("Link taking longer than expected.\n"); - } else { - /* We didn't get link. - * Reset the DSP and cross our fingers. - */ - ret_val = phy->ops.write_reg(hw, - M88E1000_PHY_PAGE_SELECT, - 0x001d); - if (ret_val) - return ret_val; - ret_val = e1000_phy_reset_dsp_generic(hw); - if (ret_val) - return ret_val; - } - } - - /* Try once more */ - ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); - if (ret_val) - return ret_val; - } - - if (hw->phy.type != e1000_phy_m88) - return E1000_SUCCESS; - - if (hw->phy.id == I347AT4_E_PHY_ID || - hw->phy.id == M88E1340M_E_PHY_ID || - hw->phy.id == M88E1112_E_PHY_ID) - return E1000_SUCCESS; - if (hw->phy.id == I210_I_PHY_ID) - return E1000_SUCCESS; - if ((hw->phy.id == M88E1543_E_PHY_ID)) - return E1000_SUCCESS; - ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); - if (ret_val) - return ret_val; - - /* Resetting the phy means we need to re-force TX_CLK in the - * Extended PHY Specific Control Register to 25MHz clock from - * the reset value of 2.5MHz. - */ - phy_data |= M88E1000_EPSCR_TX_CLK_25; - ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); - if (ret_val) - return ret_val; - - /* In addition, we must re-enable CRS on Tx for both half and full - * duplex. - */ - ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); - if (ret_val) - return ret_val; - - phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; - ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); - - return ret_val; -} - -/** - * e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex - * @hw: pointer to the HW structure - * - * Forces the speed and duplex settings of the PHY. - * This is a function pointer entry point only called by - * PHY setup routines. - **/ -s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data; - bool link; - - DEBUGFUNC("e1000_phy_force_speed_duplex_ife"); - - ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &data); - if (ret_val) - return ret_val; - - e1000_phy_force_speed_duplex_setup(hw, &data); - - ret_val = phy->ops.write_reg(hw, PHY_CONTROL, data); - if (ret_val) - return ret_val; - - /* Disable MDI-X support for 10/100 */ - ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data); - if (ret_val) - return ret_val; - - data &= ~IFE_PMC_AUTO_MDIX; - data &= ~IFE_PMC_FORCE_MDIX; - - ret_val = phy->ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, data); - if (ret_val) - return ret_val; - - DEBUGOUT1("IFE PMC: %X\n", data); - - usec_delay(1); - - if (phy->autoneg_wait_to_complete) { - DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n"); - - ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); - if (ret_val) - return ret_val; - - if (!link) - DEBUGOUT("Link taking longer than expected.\n"); - - /* Try once more */ - ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); - if (ret_val) - return ret_val; - } - - return E1000_SUCCESS; -} - -/** - * e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex - * @hw: pointer to the HW structure - * @phy_ctrl: pointer to current value of PHY_CONTROL - * - * Forces speed and duplex on the PHY by doing the following: disable flow - * control, force speed/duplex on the MAC, disable auto speed detection, - * disable auto-negotiation, configure duplex, configure speed, configure - * the collision distance, write configuration to CTRL register. The - * caller must write to the PHY_CONTROL register for these settings to - * take affect. - **/ -void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl) -{ - struct e1000_mac_info *mac = &hw->mac; - u32 ctrl; - - DEBUGFUNC("e1000_phy_force_speed_duplex_setup"); - - /* Turn off flow control when forcing speed/duplex */ - hw->fc.current_mode = e1000_fc_none; - - /* Force speed/duplex on the mac */ - ctrl = E1000_READ_REG(hw, E1000_CTRL); - ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); - ctrl &= ~E1000_CTRL_SPD_SEL; - - /* Disable Auto Speed Detection */ - ctrl &= ~E1000_CTRL_ASDE; - - /* Disable autoneg on the phy */ - *phy_ctrl &= ~MII_CR_AUTO_NEG_EN; - - /* Forcing Full or Half Duplex? */ - if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) { - ctrl &= ~E1000_CTRL_FD; - *phy_ctrl &= ~MII_CR_FULL_DUPLEX; - DEBUGOUT("Half Duplex\n"); - } else { - ctrl |= E1000_CTRL_FD; - *phy_ctrl |= MII_CR_FULL_DUPLEX; - DEBUGOUT("Full Duplex\n"); - } - - /* Forcing 10mb or 100mb? */ - if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) { - ctrl |= E1000_CTRL_SPD_100; - *phy_ctrl |= MII_CR_SPEED_100; - *phy_ctrl &= ~MII_CR_SPEED_1000; - DEBUGOUT("Forcing 100mb\n"); - } else { - ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); - *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); - DEBUGOUT("Forcing 10mb\n"); - } - - hw->mac.ops.config_collision_dist(hw); - - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); -} - -/** - * e1000_set_d3_lplu_state_generic - Sets low power link up state for D3 - * @hw: pointer to the HW structure - * @active: boolean used to enable/disable lplu - * - * Success returns 0, Failure returns 1 - * - * The low power link up (lplu) state is set to the power management level D3 - * and SmartSpeed is disabled when active is true, else clear lplu for D3 - * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU - * is used during Dx states where the power conservation is most important. - * During driver activity, SmartSpeed should be enabled so performance is - * maintained. - **/ -s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data; - - DEBUGFUNC("e1000_set_d3_lplu_state_generic"); - - if (!hw->phy.ops.read_reg) - return E1000_SUCCESS; - - ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); - if (ret_val) - return ret_val; - - if (!active) { - data &= ~IGP02E1000_PM_D3_LPLU; - ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, - data); - if (ret_val) - return ret_val; - /* LPLU and SmartSpeed are mutually exclusive. LPLU is used - * during Dx states where the power conservation is most - * important. During driver activity we should enable - * SmartSpeed, so performance is maintained. - */ - if (phy->smart_speed == e1000_smart_speed_on) { - ret_val = phy->ops.read_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - &data); - if (ret_val) - return ret_val; - - data |= IGP01E1000_PSCFR_SMART_SPEED; - ret_val = phy->ops.write_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - data); - if (ret_val) - return ret_val; - } else if (phy->smart_speed == e1000_smart_speed_off) { - ret_val = phy->ops.read_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - &data); - if (ret_val) - return ret_val; - - data &= ~IGP01E1000_PSCFR_SMART_SPEED; - ret_val = phy->ops.write_reg(hw, - IGP01E1000_PHY_PORT_CONFIG, - data); - if (ret_val) - return ret_val; - } - } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || - (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || - (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { - data |= IGP02E1000_PM_D3_LPLU; - ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, - data); - if (ret_val) - return ret_val; - - /* When LPLU is enabled, we should disable SmartSpeed */ - ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, - &data); - if (ret_val) - return ret_val; - - data &= ~IGP01E1000_PSCFR_SMART_SPEED; - ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, - data); - } - - return ret_val; -} - -/** - * e1000_check_downshift_generic - Checks whether a downshift in speed occurred - * @hw: pointer to the HW structure - * - * Success returns 0, Failure returns 1 - * - * A downshift is detected by querying the PHY link health. - **/ -s32 e1000_check_downshift_generic(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data, offset, mask; - - DEBUGFUNC("e1000_check_downshift_generic"); - - switch (phy->type) { - case e1000_phy_i210: - case e1000_phy_m88: - case e1000_phy_gg82563: - offset = M88E1000_PHY_SPEC_STATUS; - mask = M88E1000_PSSR_DOWNSHIFT; - break; - case e1000_phy_igp_2: - case e1000_phy_igp_3: - offset = IGP01E1000_PHY_LINK_HEALTH; - mask = IGP01E1000_PLHR_SS_DOWNGRADE; - break; - default: - /* speed downshift not supported */ - phy->speed_downgraded = false; - return E1000_SUCCESS; - } - - ret_val = phy->ops.read_reg(hw, offset, &phy_data); - - if (!ret_val) - phy->speed_downgraded = !!(phy_data & mask); - - return ret_val; -} - -/** - * e1000_check_polarity_m88 - Checks the polarity. - * @hw: pointer to the HW structure - * - * Success returns 0, Failure returns -E1000_ERR_PHY (-2) - * - * Polarity is determined based on the PHY specific status register. - **/ -s32 e1000_check_polarity_m88(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data; - - DEBUGFUNC("e1000_check_polarity_m88"); - - ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data); - - if (!ret_val) - phy->cable_polarity = ((data & M88E1000_PSSR_REV_POLARITY) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal); - - return ret_val; -} - -/** - * e1000_check_polarity_igp - Checks the polarity. - * @hw: pointer to the HW structure - * - * Success returns 0, Failure returns -E1000_ERR_PHY (-2) - * - * Polarity is determined based on the PHY port status register, and the - * current speed (since there is no polarity at 100Mbps). - **/ -s32 e1000_check_polarity_igp(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data, offset, mask; - - DEBUGFUNC("e1000_check_polarity_igp"); - - /* Polarity is determined based on the speed of - * our connection. - */ - ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); - if (ret_val) - return ret_val; - - if ((data & IGP01E1000_PSSR_SPEED_MASK) == - IGP01E1000_PSSR_SPEED_1000MBPS) { - offset = IGP01E1000_PHY_PCS_INIT_REG; - mask = IGP01E1000_PHY_POLARITY_MASK; - } else { - /* This really only applies to 10Mbps since - * there is no polarity for 100Mbps (always 0). - */ - offset = IGP01E1000_PHY_PORT_STATUS; - mask = IGP01E1000_PSSR_POLARITY_REVERSED; - } - - ret_val = phy->ops.read_reg(hw, offset, &data); - - if (!ret_val) - phy->cable_polarity = ((data & mask) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal); - - return ret_val; -} - -/** - * e1000_check_polarity_ife - Check cable polarity for IFE PHY - * @hw: pointer to the HW structure - * - * Polarity is determined on the polarity reversal feature being enabled. - **/ -s32 e1000_check_polarity_ife(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data, offset, mask; - - DEBUGFUNC("e1000_check_polarity_ife"); - - /* Polarity is determined based on the reversal feature being enabled. - */ - if (phy->polarity_correction) { - offset = IFE_PHY_EXTENDED_STATUS_CONTROL; - mask = IFE_PESC_POLARITY_REVERSED; - } else { - offset = IFE_PHY_SPECIAL_CONTROL; - mask = IFE_PSC_FORCE_POLARITY; - } - - ret_val = phy->ops.read_reg(hw, offset, &phy_data); - - if (!ret_val) - phy->cable_polarity = ((phy_data & mask) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal); - - return ret_val; -} - -/** - * e1000_wait_autoneg - Wait for auto-neg completion - * @hw: pointer to the HW structure - * - * Waits for auto-negotiation to complete or for the auto-negotiation time - * limit to expire, which ever happens first. - **/ -static s32 e1000_wait_autoneg(struct e1000_hw *hw) -{ - s32 ret_val = E1000_SUCCESS; - u16 i, phy_status; - - DEBUGFUNC("e1000_wait_autoneg"); - - if (!hw->phy.ops.read_reg) - return E1000_SUCCESS; - - /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */ - for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) { - ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); - if (ret_val) - break; - ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); - if (ret_val) - break; - if (phy_status & MII_SR_AUTONEG_COMPLETE) - break; - msec_delay(100); - } - - /* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation - * has completed. - */ - return ret_val; -} - -/** - * e1000_phy_has_link_generic - Polls PHY for link - * @hw: pointer to the HW structure - * @iterations: number of times to poll for link - * @usec_interval: delay between polling attempts - * @success: pointer to whether polling was successful or not - * - * Polls the PHY status register for link, 'iterations' number of times. - **/ -s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, - u32 usec_interval, bool *success) -{ - s32 ret_val = E1000_SUCCESS; - u16 i, phy_status; - - DEBUGFUNC("e1000_phy_has_link_generic"); - - if (!hw->phy.ops.read_reg) - return E1000_SUCCESS; - - for (i = 0; i < iterations; i++) { - /* Some PHYs require the PHY_STATUS register to be read - * twice due to the link bit being sticky. No harm doing - * it across the board. - */ - ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); - if (ret_val) - /* If the first read fails, another entity may have - * ownership of the resources, wait and try again to - * see if they have relinquished the resources yet. - */ - usec_delay(usec_interval); - ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); - if (ret_val) - break; - if (phy_status & MII_SR_LINK_STATUS) - break; - if (usec_interval >= 1000) - msec_delay_irq(usec_interval/1000); - else - usec_delay(usec_interval); - } - - *success = (i < iterations); - - return ret_val; -} - -/** - * e1000_get_cable_length_m88 - Determine cable length for m88 PHY - * @hw: pointer to the HW structure - * - * Reads the PHY specific status register to retrieve the cable length - * information. The cable length is determined by averaging the minimum and - * maximum values to get the "average" cable length. The m88 PHY has four - * possible cable length values, which are: - * Register Value Cable Length - * 0 < 50 meters - * 1 50 - 80 meters - * 2 80 - 110 meters - * 3 110 - 140 meters - * 4 > 140 meters - **/ -s32 e1000_get_cable_length_m88(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data, index; - - DEBUGFUNC("e1000_get_cable_length_m88"); - - ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); - if (ret_val) - return ret_val; - - index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> - M88E1000_PSSR_CABLE_LENGTH_SHIFT); - - if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) - return -E1000_ERR_PHY; - - phy->min_cable_length = e1000_m88_cable_length_table[index]; - phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; - - phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; - - return E1000_SUCCESS; -} - -s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data, phy_data2, is_cm; - u16 index, default_page; - - DEBUGFUNC("e1000_get_cable_length_m88_gen2"); - - switch (hw->phy.id) { - case I210_I_PHY_ID: - /* Get cable length from PHY Cable Diagnostics Control Reg */ - ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + - (I347AT4_PCDL + phy->addr), - &phy_data); - if (ret_val) - return ret_val; - - /* Check if the unit of cable length is meters or cm */ - ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + - I347AT4_PCDC, &phy_data2); - if (ret_val) - return ret_val; - - is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); - - /* Populate the phy structure with cable length in meters */ - phy->min_cable_length = phy_data / (is_cm ? 100 : 1); - phy->max_cable_length = phy_data / (is_cm ? 100 : 1); - phy->cable_length = phy_data / (is_cm ? 100 : 1); - break; - case M88E1543_E_PHY_ID: - case M88E1340M_E_PHY_ID: - case I347AT4_E_PHY_ID: - /* Remember the original page select and set it to 7 */ - ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, - &default_page); - if (ret_val) - return ret_val; - - ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07); - if (ret_val) - return ret_val; - - /* Get cable length from PHY Cable Diagnostics Control Reg */ - ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr), - &phy_data); - if (ret_val) - return ret_val; - - /* Check if the unit of cable length is meters or cm */ - ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2); - if (ret_val) - return ret_val; - - is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); - - /* Populate the phy structure with cable length in meters */ - phy->min_cable_length = phy_data / (is_cm ? 100 : 1); - phy->max_cable_length = phy_data / (is_cm ? 100 : 1); - phy->cable_length = phy_data / (is_cm ? 100 : 1); - - /* Reset the page select to its original value */ - ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, - default_page); - if (ret_val) - return ret_val; - break; - - case M88E1112_E_PHY_ID: - /* Remember the original page select and set it to 5 */ - ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, - &default_page); - if (ret_val) - return ret_val; - - ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05); - if (ret_val) - return ret_val; - - ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE, - &phy_data); - if (ret_val) - return ret_val; - - index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> - M88E1000_PSSR_CABLE_LENGTH_SHIFT; - - if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) - return -E1000_ERR_PHY; - - phy->min_cable_length = e1000_m88_cable_length_table[index]; - phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; - - phy->cable_length = (phy->min_cable_length + - phy->max_cable_length) / 2; - - /* Reset the page select to its original value */ - ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, - default_page); - if (ret_val) - return ret_val; - - break; - default: - return -E1000_ERR_PHY; - } - - return ret_val; -} - -/** - * e1000_get_cable_length_igp_2 - Determine cable length for igp2 PHY - * @hw: pointer to the HW structure - * - * The automatic gain control (agc) normalizes the amplitude of the - * received signal, adjusting for the attenuation produced by the - * cable. By reading the AGC registers, which represent the - * combination of coarse and fine gain value, the value can be put - * into a lookup table to obtain the approximate cable length - * for each channel. - **/ -s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data, i, agc_value = 0; - u16 cur_agc_index, max_agc_index = 0; - u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1; - static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = { - IGP02E1000_PHY_AGC_A, - IGP02E1000_PHY_AGC_B, - IGP02E1000_PHY_AGC_C, - IGP02E1000_PHY_AGC_D - }; - - DEBUGFUNC("e1000_get_cable_length_igp_2"); - - /* Read the AGC registers for all channels */ - for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) { - ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data); - if (ret_val) - return ret_val; - - /* Getting bits 15:9, which represent the combination of - * coarse and fine gain values. The result is a number - * that can be put into the lookup table to obtain the - * approximate cable length. - */ - cur_agc_index = ((phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & - IGP02E1000_AGC_LENGTH_MASK); - - /* Array index bound check. */ - if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) || - (cur_agc_index == 0)) - return -E1000_ERR_PHY; - - /* Remove min & max AGC values from calculation. */ - if (e1000_igp_2_cable_length_table[min_agc_index] > - e1000_igp_2_cable_length_table[cur_agc_index]) - min_agc_index = cur_agc_index; - if (e1000_igp_2_cable_length_table[max_agc_index] < - e1000_igp_2_cable_length_table[cur_agc_index]) - max_agc_index = cur_agc_index; - - agc_value += e1000_igp_2_cable_length_table[cur_agc_index]; - } - - agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] + - e1000_igp_2_cable_length_table[max_agc_index]); - agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2); - - /* Calculate cable length with the error range of +/- 10 meters. */ - phy->min_cable_length = (((agc_value - IGP02E1000_AGC_RANGE) > 0) ? - (agc_value - IGP02E1000_AGC_RANGE) : 0); - phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE; - - phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; - - return E1000_SUCCESS; -} - -/** - * e1000_get_phy_info_m88 - Retrieve PHY information - * @hw: pointer to the HW structure - * - * Valid for only copper links. Read the PHY status register (sticky read) - * to verify that link is up. Read the PHY special control register to - * determine the polarity and 10base-T extended distance. Read the PHY - * special status register to determine MDI/MDIx and current speed. If - * speed is 1000, then determine cable length, local and remote receiver. - **/ -s32 e1000_get_phy_info_m88(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data; - bool link; - - DEBUGFUNC("e1000_get_phy_info_m88"); - - if (phy->media_type != e1000_media_type_copper) { - DEBUGOUT("Phy info is only valid for copper media\n"); - return -E1000_ERR_CONFIG; - } - - ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); - if (ret_val) - return ret_val; - - if (!link) { - DEBUGOUT("Phy info is only valid if link is up\n"); - return -E1000_ERR_CONFIG; - } - - ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); - if (ret_val) - return ret_val; - - phy->polarity_correction = !!(phy_data & - M88E1000_PSCR_POLARITY_REVERSAL); - - ret_val = e1000_check_polarity_m88(hw); - if (ret_val) - return ret_val; - - ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); - if (ret_val) - return ret_val; - - phy->is_mdix = !!(phy_data & M88E1000_PSSR_MDIX); - - if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { - ret_val = hw->phy.ops.get_cable_length(hw); - if (ret_val) - return ret_val; - - ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data); - if (ret_val) - return ret_val; - - phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) - ? e1000_1000t_rx_status_ok - : e1000_1000t_rx_status_not_ok; - - phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) - ? e1000_1000t_rx_status_ok - : e1000_1000t_rx_status_not_ok; - } else { - /* Set values to "undefined" */ - phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; - phy->local_rx = e1000_1000t_rx_status_undefined; - phy->remote_rx = e1000_1000t_rx_status_undefined; - } - - return ret_val; -} - -/** - * e1000_get_phy_info_igp - Retrieve igp PHY information - * @hw: pointer to the HW structure - * - * Read PHY status to determine if link is up. If link is up, then - * set/determine 10base-T extended distance and polarity correction. Read - * PHY port status to determine MDI/MDIx and speed. Based on the speed, - * determine on the cable length, local and remote receiver. - **/ -s32 e1000_get_phy_info_igp(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data; - bool link; - - DEBUGFUNC("e1000_get_phy_info_igp"); - - ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); - if (ret_val) - return ret_val; - - if (!link) { - DEBUGOUT("Phy info is only valid if link is up\n"); - return -E1000_ERR_CONFIG; - } - - phy->polarity_correction = true; - - ret_val = e1000_check_polarity_igp(hw); - if (ret_val) - return ret_val; - - ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); - if (ret_val) - return ret_val; - - phy->is_mdix = !!(data & IGP01E1000_PSSR_MDIX); - - if ((data & IGP01E1000_PSSR_SPEED_MASK) == - IGP01E1000_PSSR_SPEED_1000MBPS) { - ret_val = phy->ops.get_cable_length(hw); - if (ret_val) - return ret_val; - - ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); - if (ret_val) - return ret_val; - - phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) - ? e1000_1000t_rx_status_ok - : e1000_1000t_rx_status_not_ok; - - phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) - ? e1000_1000t_rx_status_ok - : e1000_1000t_rx_status_not_ok; - } else { - phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; - phy->local_rx = e1000_1000t_rx_status_undefined; - phy->remote_rx = e1000_1000t_rx_status_undefined; - } - - return ret_val; -} - -/** - * e1000_get_phy_info_ife - Retrieves various IFE PHY states - * @hw: pointer to the HW structure - * - * Populates "phy" structure with various feature states. - **/ -s32 e1000_get_phy_info_ife(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data; - bool link; - - DEBUGFUNC("e1000_get_phy_info_ife"); - - ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); - if (ret_val) - return ret_val; - - if (!link) { - DEBUGOUT("Phy info is only valid if link is up\n"); - return -E1000_ERR_CONFIG; - } - - ret_val = phy->ops.read_reg(hw, IFE_PHY_SPECIAL_CONTROL, &data); - if (ret_val) - return ret_val; - phy->polarity_correction = !(data & IFE_PSC_AUTO_POLARITY_DISABLE); - - if (phy->polarity_correction) { - ret_val = e1000_check_polarity_ife(hw); - if (ret_val) - return ret_val; - } else { - /* Polarity is forced */ - phy->cable_polarity = ((data & IFE_PSC_FORCE_POLARITY) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal); - } - - ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data); - if (ret_val) - return ret_val; - - phy->is_mdix = !!(data & IFE_PMC_MDIX_STATUS); - - /* The following parameters are undefined for 10/100 operation. */ - phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; - phy->local_rx = e1000_1000t_rx_status_undefined; - phy->remote_rx = e1000_1000t_rx_status_undefined; - - return E1000_SUCCESS; -} - -/** - * e1000_phy_sw_reset_generic - PHY software reset - * @hw: pointer to the HW structure - * - * Does a software reset of the PHY by reading the PHY control register and - * setting/write the control register reset bit to the PHY. - **/ -s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw) -{ - s32 ret_val; - u16 phy_ctrl; - - DEBUGFUNC("e1000_phy_sw_reset_generic"); - - if (!hw->phy.ops.read_reg) - return E1000_SUCCESS; - - ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); - if (ret_val) - return ret_val; - - phy_ctrl |= MII_CR_RESET; - ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl); - if (ret_val) - return ret_val; - - usec_delay(1); - - return ret_val; -} - -/** - * e1000_phy_hw_reset_generic - PHY hardware reset - * @hw: pointer to the HW structure - * - * Verify the reset block is not blocking us from resetting. Acquire - * semaphore (if necessary) and read/set/write the device control reset - * bit in the PHY. Wait the appropriate delay time for the device to - * reset and release the semaphore (if necessary). - **/ -s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u32 ctrl; - - DEBUGFUNC("e1000_phy_hw_reset_generic"); - - if (phy->ops.check_reset_block) { - ret_val = phy->ops.check_reset_block(hw); - if (ret_val) - return E1000_SUCCESS; - } - - ret_val = phy->ops.acquire(hw); - if (ret_val) - return ret_val; - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PHY_RST); - E1000_WRITE_FLUSH(hw); - - usec_delay(phy->reset_delay_us); - - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - E1000_WRITE_FLUSH(hw); - - usec_delay(150); - - phy->ops.release(hw); - - return phy->ops.get_cfg_done(hw); -} - -/** - * e1000_get_cfg_done_generic - Generic configuration done - * @hw: pointer to the HW structure - * - * Generic function to wait 10 milli-seconds for configuration to complete - * and return success. - **/ -s32 e1000_get_cfg_done_generic(struct e1000_hw E1000_UNUSEDARG *hw) -{ - DEBUGFUNC("e1000_get_cfg_done_generic"); - - msec_delay_irq(10); - - return E1000_SUCCESS; -} - -/** - * e1000_phy_init_script_igp3 - Inits the IGP3 PHY - * @hw: pointer to the HW structure - * - * Initializes a Intel Gigabit PHY3 when an EEPROM is not present. - **/ -s32 e1000_phy_init_script_igp3(struct e1000_hw *hw) -{ - DEBUGOUT("Running IGP 3 PHY init script\n"); - - /* PHY init IGP 3 */ - /* Enable rise/fall, 10-mode work in class-A */ - hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018); - /* Remove all caps from Replica path filter */ - hw->phy.ops.write_reg(hw, 0x2F52, 0x0000); - /* Bias trimming for ADC, AFE and Driver (Default) */ - hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24); - /* Increase Hybrid poly bias */ - hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0); - /* Add 4% to Tx amplitude in Gig mode */ - hw->phy.ops.write_reg(hw, 0x2010, 0x10B0); - /* Disable trimming (TTT) */ - hw->phy.ops.write_reg(hw, 0x2011, 0x0000); - /* Poly DC correction to 94.6% + 2% for all channels */ - hw->phy.ops.write_reg(hw, 0x20DD, 0x249A); - /* ABS DC correction to 95.9% */ - hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3); - /* BG temp curve trim */ - hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE); - /* Increasing ADC OPAMP stage 1 currents to max */ - hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4); - /* Force 1000 ( required for enabling PHY regs configuration) */ - hw->phy.ops.write_reg(hw, 0x0000, 0x0140); - /* Set upd_freq to 6 */ - hw->phy.ops.write_reg(hw, 0x1F30, 0x1606); - /* Disable NPDFE */ - hw->phy.ops.write_reg(hw, 0x1F31, 0xB814); - /* Disable adaptive fixed FFE (Default) */ - hw->phy.ops.write_reg(hw, 0x1F35, 0x002A); - /* Enable FFE hysteresis */ - hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067); - /* Fixed FFE for short cable lengths */ - hw->phy.ops.write_reg(hw, 0x1F54, 0x0065); - /* Fixed FFE for medium cable lengths */ - hw->phy.ops.write_reg(hw, 0x1F55, 0x002A); - /* Fixed FFE for long cable lengths */ - hw->phy.ops.write_reg(hw, 0x1F56, 0x002A); - /* Enable Adaptive Clip Threshold */ - hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0); - /* AHT reset limit to 1 */ - hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF); - /* Set AHT master delay to 127 msec */ - hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC); - /* Set scan bits for AHT */ - hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF); - /* Set AHT Preset bits */ - hw->phy.ops.write_reg(hw, 0x1F79, 0x0210); - /* Change integ_factor of channel A to 3 */ - hw->phy.ops.write_reg(hw, 0x1895, 0x0003); - /* Change prop_factor of channels BCD to 8 */ - hw->phy.ops.write_reg(hw, 0x1796, 0x0008); - /* Change cg_icount + enable integbp for channels BCD */ - hw->phy.ops.write_reg(hw, 0x1798, 0xD008); - /* Change cg_icount + enable integbp + change prop_factor_master - * to 8 for channel A - */ - hw->phy.ops.write_reg(hw, 0x1898, 0xD918); - /* Disable AHT in Slave mode on channel A */ - hw->phy.ops.write_reg(hw, 0x187A, 0x0800); - /* Enable LPLU and disable AN to 1000 in non-D0a states, - * Enable SPD+B2B - */ - hw->phy.ops.write_reg(hw, 0x0019, 0x008D); - /* Enable restart AN on an1000_dis change */ - hw->phy.ops.write_reg(hw, 0x001B, 0x2080); - /* Enable wh_fifo read clock in 10/100 modes */ - hw->phy.ops.write_reg(hw, 0x0014, 0x0045); - /* Restart AN, Speed selection is 1000 */ - hw->phy.ops.write_reg(hw, 0x0000, 0x1340); - - return E1000_SUCCESS; -} - -/** - * e1000_get_phy_type_from_id - Get PHY type from id - * @phy_id: phy_id read from the phy - * - * Returns the phy type from the id. - **/ -enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id) -{ - enum e1000_phy_type phy_type = e1000_phy_unknown; - - switch (phy_id) { - case M88E1000_I_PHY_ID: - case M88E1000_E_PHY_ID: - case M88E1111_I_PHY_ID: - case M88E1011_I_PHY_ID: - case M88E1543_E_PHY_ID: - case I347AT4_E_PHY_ID: - case M88E1112_E_PHY_ID: - case M88E1340M_E_PHY_ID: - phy_type = e1000_phy_m88; - break; - case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */ - phy_type = e1000_phy_igp_2; - break; - case GG82563_E_PHY_ID: - phy_type = e1000_phy_gg82563; - break; - case IGP03E1000_E_PHY_ID: - phy_type = e1000_phy_igp_3; - break; - case IFE_E_PHY_ID: - case IFE_PLUS_E_PHY_ID: - case IFE_C_E_PHY_ID: - phy_type = e1000_phy_ife; - break; - case I82580_I_PHY_ID: - phy_type = e1000_phy_82580; - break; - case I210_I_PHY_ID: - phy_type = e1000_phy_i210; - break; - default: - phy_type = e1000_phy_unknown; - break; - } - return phy_type; -} - -/** - * e1000_determine_phy_address - Determines PHY address. - * @hw: pointer to the HW structure - * - * This uses a trial and error method to loop through possible PHY - * addresses. It tests each by reading the PHY ID registers and - * checking for a match. - **/ -s32 e1000_determine_phy_address(struct e1000_hw *hw) -{ - u32 phy_addr = 0; - u32 i; - enum e1000_phy_type phy_type = e1000_phy_unknown; - - hw->phy.id = phy_type; - - for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) { - hw->phy.addr = phy_addr; - i = 0; - - do { - e1000_get_phy_id(hw); - phy_type = e1000_get_phy_type_from_id(hw->phy.id); - - /* If phy_type is valid, break - we found our - * PHY address - */ - if (phy_type != e1000_phy_unknown) - return E1000_SUCCESS; - - msec_delay(1); - i++; - } while (i < 10); - } - - return -E1000_ERR_PHY_TYPE; -} - -/** - * e1000_power_up_phy_copper - Restore copper link in case of PHY power down - * @hw: pointer to the HW structure - * - * In the case of a PHY power down to save power, or to turn off link during a - * driver unload, or wake on lan is not enabled, restore the link to previous - * settings. - **/ -void e1000_power_up_phy_copper(struct e1000_hw *hw) -{ - u16 mii_reg = 0; - u16 power_reg = 0; - - /* The PHY will retain its settings across a power down/up cycle */ - hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); - mii_reg &= ~MII_CR_POWER_DOWN; - if (hw->phy.type == e1000_phy_i210) { - hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg); - power_reg &= ~GS40G_CS_POWER_DOWN; - hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg); - } - hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); -} - -/** - * e1000_power_down_phy_copper - Restore copper link in case of PHY power down - * @hw: pointer to the HW structure - * - * In the case of a PHY power down to save power, or to turn off link during a - * driver unload, or wake on lan is not enabled, restore the link to previous - * settings. - **/ -void e1000_power_down_phy_copper(struct e1000_hw *hw) -{ - u16 mii_reg = 0; - u16 power_reg = 0; - - /* The PHY will retain its settings across a power down/up cycle */ - hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); - mii_reg |= MII_CR_POWER_DOWN; - /* i210 Phy requires an additional bit for power up/down */ - if (hw->phy.type == e1000_phy_i210) { - hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg); - power_reg |= GS40G_CS_POWER_DOWN; - hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg); - } - hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); - msec_delay(1); -} - -/** - * e1000_check_polarity_82577 - Checks the polarity. - * @hw: pointer to the HW structure - * - * Success returns 0, Failure returns -E1000_ERR_PHY (-2) - * - * Polarity is determined based on the PHY specific status register. - **/ -s32 e1000_check_polarity_82577(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data; - - DEBUGFUNC("e1000_check_polarity_82577"); - - ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data); - - if (!ret_val) - phy->cable_polarity = ((data & I82577_PHY_STATUS2_REV_POLARITY) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal); - - return ret_val; -} - -/** - * e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY - * @hw: pointer to the HW structure - * - * Calls the PHY setup function to force speed and duplex. - **/ -s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data; - bool link; - - DEBUGFUNC("e1000_phy_force_speed_duplex_82577"); - - ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); - if (ret_val) - return ret_val; - - e1000_phy_force_speed_duplex_setup(hw, &phy_data); - - ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); - if (ret_val) - return ret_val; - - usec_delay(1); - - if (phy->autoneg_wait_to_complete) { - DEBUGOUT("Waiting for forced speed/duplex link on 82577 phy\n"); - - ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); - if (ret_val) - return ret_val; - - if (!link) - DEBUGOUT("Link taking longer than expected.\n"); - - /* Try once more */ - ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); - } - - return ret_val; -} - -/** - * e1000_get_phy_info_82577 - Retrieve I82577 PHY information - * @hw: pointer to the HW structure - * - * Read PHY status to determine if link is up. If link is up, then - * set/determine 10base-T extended distance and polarity correction. Read - * PHY port status to determine MDI/MDIx and speed. Based on the speed, - * determine on the cable length, local and remote receiver. - **/ -s32 e1000_get_phy_info_82577(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 data; - bool link; - - DEBUGFUNC("e1000_get_phy_info_82577"); - - ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); - if (ret_val) - return ret_val; - - if (!link) { - DEBUGOUT("Phy info is only valid if link is up\n"); - return -E1000_ERR_CONFIG; - } - - phy->polarity_correction = true; - - ret_val = e1000_check_polarity_82577(hw); - if (ret_val) - return ret_val; - - ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data); - if (ret_val) - return ret_val; - - phy->is_mdix = !!(data & I82577_PHY_STATUS2_MDIX); - - if ((data & I82577_PHY_STATUS2_SPEED_MASK) == - I82577_PHY_STATUS2_SPEED_1000MBPS) { - ret_val = hw->phy.ops.get_cable_length(hw); - if (ret_val) - return ret_val; - - ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); - if (ret_val) - return ret_val; - - phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) - ? e1000_1000t_rx_status_ok - : e1000_1000t_rx_status_not_ok; - - phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) - ? e1000_1000t_rx_status_ok - : e1000_1000t_rx_status_not_ok; - } else { - phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; - phy->local_rx = e1000_1000t_rx_status_undefined; - phy->remote_rx = e1000_1000t_rx_status_undefined; - } - - return E1000_SUCCESS; -} - -/** - * e1000_get_cable_length_82577 - Determine cable length for 82577 PHY - * @hw: pointer to the HW structure - * - * Reads the diagnostic status register and verifies result is valid before - * placing it in the phy_cable_length field. - **/ -s32 e1000_get_cable_length_82577(struct e1000_hw *hw) -{ - struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; - u16 phy_data, length; - - DEBUGFUNC("e1000_get_cable_length_82577"); - - ret_val = phy->ops.read_reg(hw, I82577_PHY_DIAG_STATUS, &phy_data); - if (ret_val) - return ret_val; - - length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >> - I82577_DSTATUS_CABLE_LENGTH_SHIFT); - - if (length == E1000_CABLE_LENGTH_UNDEFINED) - return -E1000_ERR_PHY; - - phy->cable_length = length; - - return E1000_SUCCESS; -} - -/** - * e1000_write_phy_reg_gs40g - Write GS40G PHY register - * @hw: pointer to the HW structure - * @offset: register offset to write to - * @data: data to write at register offset - * - * Acquires semaphore, if necessary, then writes the data to PHY register - * at the offset. Release any acquired semaphores before exiting. - **/ -s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data) -{ - s32 ret_val; - u16 page = offset >> GS40G_PAGE_SHIFT; - - DEBUGFUNC("e1000_write_phy_reg_gs40g"); - - offset = offset & GS40G_OFFSET_MASK; - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - - ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); - if (ret_val) - goto release; - ret_val = e1000_write_phy_reg_mdic(hw, offset, data); - -release: - hw->phy.ops.release(hw); - return ret_val; -} - -/** - * e1000_read_phy_reg_gs40g - Read GS40G PHY register - * @hw: pointer to the HW structure - * @offset: lower half is register offset to read to - * upper half is page to use. - * @data: data to read at register offset - * - * Acquires semaphore, if necessary, then reads the data in the PHY register - * at the offset. Release any acquired semaphores before exiting. - **/ -s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data) -{ - s32 ret_val; - u16 page = offset >> GS40G_PAGE_SHIFT; - - DEBUGFUNC("e1000_read_phy_reg_gs40g"); - - offset = offset & GS40G_OFFSET_MASK; - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - - ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); - if (ret_val) - goto release; - ret_val = e1000_read_phy_reg_mdic(hw, offset, data); - -release: - hw->phy.ops.release(hw); - return ret_val; -} - -/** - * e1000_read_phy_reg_mphy - Read mPHY control register - * @hw: pointer to the HW structure - * @address: address to be read - * @data: pointer to the read data - * - * Reads the mPHY control register in the PHY at offset and stores the - * information read to data. - **/ -s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data) -{ - u32 mphy_ctrl = 0; - bool locked = false; - bool ready = false; - - DEBUGFUNC("e1000_read_phy_reg_mphy"); - - /* Check if mPHY is ready to read/write operations */ - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - - /* Check if mPHY access is disabled and enable it if so */ - mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); - if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) { - locked = true; - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - mphy_ctrl |= E1000_MPHY_ENA_ACCESS; - E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); - } - - /* Set the address that we want to read */ - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - - /* We mask address, because we want to use only current lane */ - mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK & - ~E1000_MPHY_ADDRESS_FNC_OVERRIDE) | - (address & E1000_MPHY_ADDRESS_MASK); - E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); - - /* Read data from the address */ - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - *data = E1000_READ_REG(hw, E1000_MPHY_DATA); - - /* Disable access to mPHY if it was originally disabled */ - if (locked) { - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, - E1000_MPHY_DIS_ACCESS); - } - - return E1000_SUCCESS; -} - -/** - * e1000_write_phy_reg_mphy - Write mPHY control register - * @hw: pointer to the HW structure - * @address: address to write to - * @data: data to write to register at offset - * @line_override: used when we want to use different line than default one - * - * Writes data to mPHY control register. - **/ -s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data, - bool line_override) -{ - u32 mphy_ctrl = 0; - bool locked = false; - bool ready = false; - - DEBUGFUNC("e1000_write_phy_reg_mphy"); - - /* Check if mPHY is ready to read/write operations */ - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - - /* Check if mPHY access is disabled and enable it if so */ - mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); - if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) { - locked = true; - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - mphy_ctrl |= E1000_MPHY_ENA_ACCESS; - E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); - } - - /* Set the address that we want to read */ - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - - /* We mask address, because we want to use only current lane */ - if (line_override) - mphy_ctrl |= E1000_MPHY_ADDRESS_FNC_OVERRIDE; - else - mphy_ctrl &= ~E1000_MPHY_ADDRESS_FNC_OVERRIDE; - mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK) | - (address & E1000_MPHY_ADDRESS_MASK); - E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); - - /* Read data from the address */ - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - E1000_WRITE_REG(hw, E1000_MPHY_DATA, data); - - /* Disable access to mPHY if it was originally disabled */ - if (locked) { - ready = e1000_is_mphy_ready(hw); - if (!ready) - return -E1000_ERR_PHY; - E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, - E1000_MPHY_DIS_ACCESS); - } - - return E1000_SUCCESS; -} - -/** - * e1000_is_mphy_ready - Check if mPHY control register is not busy - * @hw: pointer to the HW structure - * - * Returns mPHY control register status. - **/ -bool e1000_is_mphy_ready(struct e1000_hw *hw) -{ - u16 retry_count = 0; - u32 mphy_ctrl = 0; - bool ready = false; - - while (retry_count < 2) { - mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); - if (mphy_ctrl & E1000_MPHY_BUSY) { - usec_delay(20); - retry_count++; - continue; - } - ready = true; - break; - } - - if (!ready) - DEBUGOUT("ERROR READING mPHY control register, phy is busy.\n"); - - return ready; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h deleted file mode 100644 index 67e9ba77..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h +++ /dev/null @@ -1,241 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_PHY_H_ -#define _E1000_PHY_H_ - -void e1000_init_phy_ops_generic(struct e1000_hw *hw); -s32 e1000_null_read_reg(struct e1000_hw *hw, u32 offset, u16 *data); -void e1000_null_phy_generic(struct e1000_hw *hw); -s32 e1000_null_lplu_state(struct e1000_hw *hw, bool active); -s32 e1000_null_write_reg(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_null_set_page(struct e1000_hw *hw, u16 data); -s32 e1000_read_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 *data); -s32 e1000_write_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 data); -s32 e1000_check_downshift_generic(struct e1000_hw *hw); -s32 e1000_check_polarity_m88(struct e1000_hw *hw); -s32 e1000_check_polarity_igp(struct e1000_hw *hw); -s32 e1000_check_polarity_ife(struct e1000_hw *hw); -s32 e1000_check_reset_block_generic(struct e1000_hw *hw); -s32 e1000_copper_link_setup_igp(struct e1000_hw *hw); -s32 e1000_copper_link_setup_m88(struct e1000_hw *hw); -s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw); -s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw); -s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw); -s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw); -s32 e1000_get_cable_length_m88(struct e1000_hw *hw); -s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw); -s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw); -s32 e1000_get_cfg_done_generic(struct e1000_hw *hw); -s32 e1000_get_phy_id(struct e1000_hw *hw); -s32 e1000_get_phy_info_igp(struct e1000_hw *hw); -s32 e1000_get_phy_info_m88(struct e1000_hw *hw); -s32 e1000_get_phy_info_ife(struct e1000_hw *hw); -s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw); -void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl); -s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw); -s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw); -s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page); -s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active); -s32 e1000_setup_copper_link_generic(struct e1000_hw *hw); -s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, - u32 usec_interval, bool *success); -s32 e1000_phy_init_script_igp3(struct e1000_hw *hw); -enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id); -s32 e1000_determine_phy_address(struct e1000_hw *hw); -s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg); -s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg); -void e1000_power_up_phy_copper(struct e1000_hw *hw); -void e1000_power_down_phy_copper(struct e1000_hw *hw); -s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data); -s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data); -s32 e1000_copper_link_setup_82577(struct e1000_hw *hw); -s32 e1000_check_polarity_82577(struct e1000_hw *hw); -s32 e1000_get_phy_info_82577(struct e1000_hw *hw); -s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw); -s32 e1000_get_cable_length_82577(struct e1000_hw *hw); -s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data); -s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data); -s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data); -s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data, - bool line_override); -bool e1000_is_mphy_ready(struct e1000_hw *hw); - -#define E1000_MAX_PHY_ADDR 8 - -/* IGP01E1000 Specific Registers */ -#define IGP01E1000_PHY_PORT_CONFIG 0x10 /* Port Config */ -#define IGP01E1000_PHY_PORT_STATUS 0x11 /* Status */ -#define IGP01E1000_PHY_PORT_CTRL 0x12 /* Control */ -#define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health */ -#define IGP02E1000_PHY_POWER_MGMT 0x19 /* Power Management */ -#define IGP01E1000_PHY_PAGE_SELECT 0x1F /* Page Select */ -#define BM_PHY_PAGE_SELECT 22 /* Page Select for BM */ -#define IGP_PAGE_SHIFT 5 -#define PHY_REG_MASK 0x1F - -/* GS40G - I210 PHY defines */ -#define GS40G_PAGE_SELECT 0x16 -#define GS40G_PAGE_SHIFT 16 -#define GS40G_OFFSET_MASK 0xFFFF -#define GS40G_PAGE_2 0x20000 -#define GS40G_MAC_REG2 0x15 -#define GS40G_MAC_LB 0x4140 -#define GS40G_MAC_SPEED_1G 0X0006 -#define GS40G_COPPER_SPEC 0x0010 -#define GS40G_CS_POWER_DOWN 0x0002 - -#define HV_INTC_FC_PAGE_START 768 -#define I82578_ADDR_REG 29 -#define I82577_ADDR_REG 16 -#define I82577_CFG_REG 22 -#define I82577_CFG_ASSERT_CRS_ON_TX (1 << 15) -#define I82577_CFG_ENABLE_DOWNSHIFT (3 << 10) /* auto downshift */ -#define I82577_CTRL_REG 23 - -/* 82577 specific PHY registers */ -#define I82577_PHY_CTRL_2 18 -#define I82577_PHY_LBK_CTRL 19 -#define I82577_PHY_STATUS_2 26 -#define I82577_PHY_DIAG_STATUS 31 - -/* I82577 PHY Status 2 */ -#define I82577_PHY_STATUS2_REV_POLARITY 0x0400 -#define I82577_PHY_STATUS2_MDIX 0x0800 -#define I82577_PHY_STATUS2_SPEED_MASK 0x0300 -#define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200 - -/* I82577 PHY Control 2 */ -#define I82577_PHY_CTRL2_MANUAL_MDIX 0x0200 -#define I82577_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 -#define I82577_PHY_CTRL2_MDIX_CFG_MASK 0x0600 - -/* I82577 PHY Diagnostics Status */ -#define I82577_DSTATUS_CABLE_LENGTH 0x03FC -#define I82577_DSTATUS_CABLE_LENGTH_SHIFT 2 - -/* 82580 PHY Power Management */ -#define E1000_82580_PHY_POWER_MGMT 0xE14 -#define E1000_82580_PM_SPD 0x0001 /* Smart Power Down */ -#define E1000_82580_PM_D0_LPLU 0x0002 /* For D0a states */ -#define E1000_82580_PM_D3_LPLU 0x0004 /* For all other states */ -#define E1000_82580_PM_GO_LINKD 0x0020 /* Go Link Disconnect */ - -#define E1000_MPHY_DIS_ACCESS 0x80000000 /* disable_access bit */ -#define E1000_MPHY_ENA_ACCESS 0x40000000 /* enable_access bit */ -#define E1000_MPHY_BUSY 0x00010000 /* busy bit */ -#define E1000_MPHY_ADDRESS_FNC_OVERRIDE 0x20000000 /* fnc_override bit */ -#define E1000_MPHY_ADDRESS_MASK 0x0000FFFF /* address mask */ - -#define IGP01E1000_PHY_PCS_INIT_REG 0x00B4 -#define IGP01E1000_PHY_POLARITY_MASK 0x0078 - -#define IGP01E1000_PSCR_AUTO_MDIX 0x1000 -#define IGP01E1000_PSCR_FORCE_MDI_MDIX 0x2000 /* 0=MDI, 1=MDIX */ - -#define IGP01E1000_PSCFR_SMART_SPEED 0x0080 - -#define IGP02E1000_PM_SPD 0x0001 /* Smart Power Down */ -#define IGP02E1000_PM_D0_LPLU 0x0002 /* For D0a states */ -#define IGP02E1000_PM_D3_LPLU 0x0004 /* For all other states */ - -#define IGP01E1000_PLHR_SS_DOWNGRADE 0x8000 - -#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002 -#define IGP01E1000_PSSR_MDIX 0x0800 -#define IGP01E1000_PSSR_SPEED_MASK 0xC000 -#define IGP01E1000_PSSR_SPEED_1000MBPS 0xC000 - -#define IGP02E1000_PHY_CHANNEL_NUM 4 -#define IGP02E1000_PHY_AGC_A 0x11B1 -#define IGP02E1000_PHY_AGC_B 0x12B1 -#define IGP02E1000_PHY_AGC_C 0x14B1 -#define IGP02E1000_PHY_AGC_D 0x18B1 - -#define IGP02E1000_AGC_LENGTH_SHIFT 9 /* Course=15:13, Fine=12:9 */ -#define IGP02E1000_AGC_LENGTH_MASK 0x7F -#define IGP02E1000_AGC_RANGE 15 - -#define E1000_CABLE_LENGTH_UNDEFINED 0xFF - -#define E1000_KMRNCTRLSTA_OFFSET 0x001F0000 -#define E1000_KMRNCTRLSTA_OFFSET_SHIFT 16 -#define E1000_KMRNCTRLSTA_REN 0x00200000 -#define E1000_KMRNCTRLSTA_DIAG_OFFSET 0x3 /* Kumeran Diagnostic */ -#define E1000_KMRNCTRLSTA_TIMEOUTS 0x4 /* Kumeran Timeouts */ -#define E1000_KMRNCTRLSTA_INBAND_PARAM 0x9 /* Kumeran InBand Parameters */ -#define E1000_KMRNCTRLSTA_IBIST_DISABLE 0x0200 /* Kumeran IBIST Disable */ -#define E1000_KMRNCTRLSTA_DIAG_NELPBK 0x1000 /* Nearend Loopback mode */ - -#define IFE_PHY_EXTENDED_STATUS_CONTROL 0x10 -#define IFE_PHY_SPECIAL_CONTROL 0x11 /* 100BaseTx PHY Special Ctrl */ -#define IFE_PHY_SPECIAL_CONTROL_LED 0x1B /* PHY Special and LED Ctrl */ -#define IFE_PHY_MDIX_CONTROL 0x1C /* MDI/MDI-X Control */ - -/* IFE PHY Extended Status Control */ -#define IFE_PESC_POLARITY_REVERSED 0x0100 - -/* IFE PHY Special Control */ -#define IFE_PSC_AUTO_POLARITY_DISABLE 0x0010 -#define IFE_PSC_FORCE_POLARITY 0x0020 - -/* IFE PHY Special Control and LED Control */ -#define IFE_PSCL_PROBE_MODE 0x0020 -#define IFE_PSCL_PROBE_LEDS_OFF 0x0006 /* Force LEDs 0 and 2 off */ -#define IFE_PSCL_PROBE_LEDS_ON 0x0007 /* Force LEDs 0 and 2 on */ - -/* IFE PHY MDIX Control */ -#define IFE_PMC_MDIX_STATUS 0x0020 /* 1=MDI-X, 0=MDI */ -#define IFE_PMC_FORCE_MDIX 0x0040 /* 1=force MDI-X, 0=force MDI */ -#define IFE_PMC_AUTO_MDIX 0x0080 /* 1=enable auto, 0=disable */ - -/* SFP modules ID memory locations */ -#define E1000_SFF_IDENTIFIER_OFFSET 0x00 -#define E1000_SFF_IDENTIFIER_SFF 0x02 -#define E1000_SFF_IDENTIFIER_SFP 0x03 - -#define E1000_SFF_ETH_FLAGS_OFFSET 0x06 -/* Flags for SFP modules compatible with ETH up to 1Gb */ -struct sfp_e1000_flags { - u8 e1000_base_sx:1; - u8 e1000_base_lx:1; - u8 e1000_base_cx:1; - u8 e1000_base_t:1; - u8 e100_base_lx:1; - u8 e100_base_fx:1; - u8 e10_base_bx10:1; - u8 e10_base_px:1; -}; - -/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */ -#define E1000_SFF_VENDOR_OUI_TYCO 0x00407600 -#define E1000_SFF_VENDOR_OUI_FTL 0x00906500 -#define E1000_SFF_VENDOR_OUI_AVAGO 0x00176A00 -#define E1000_SFF_VENDOR_OUI_INTEL 0x001B2100 - -#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h deleted file mode 100644 index f5c7e031..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h +++ /dev/null @@ -1,631 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _E1000_REGS_H_ -#define _E1000_REGS_H_ - -#define E1000_CTRL 0x00000 /* Device Control - RW */ -#define E1000_STATUS 0x00008 /* Device Status - RO */ -#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ -#define E1000_EERD 0x00014 /* EEPROM Read - RW */ -#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ -#define E1000_FLA 0x0001C /* Flash Access - RW */ -#define E1000_MDIC 0x00020 /* MDI Control - RW */ -#define E1000_MDICNFG 0x00E04 /* MDI Config - RW */ -#define E1000_REGISTER_SET_SIZE 0x20000 /* CSR Size */ -#define E1000_EEPROM_INIT_CTRL_WORD_2 0x0F /* EEPROM Init Ctrl Word 2 */ -#define E1000_EEPROM_PCIE_CTRL_WORD_2 0x28 /* EEPROM PCIe Ctrl Word 2 */ -#define E1000_BARCTRL 0x5BBC /* BAR ctrl reg */ -#define E1000_BARCTRL_FLSIZE 0x0700 /* BAR ctrl Flsize */ -#define E1000_BARCTRL_CSRSIZE 0x2000 /* BAR ctrl CSR size */ -#define E1000_MPHY_ADDR_CTRL 0x0024 /* GbE MPHY Address Control */ -#define E1000_MPHY_DATA 0x0E10 /* GBE MPHY Data */ -#define E1000_MPHY_STAT 0x0E0C /* GBE MPHY Statistics */ -#define E1000_PPHY_CTRL 0x5b48 /* PCIe PHY Control */ -#define E1000_I350_BARCTRL 0x5BFC /* BAR ctrl reg */ -#define E1000_I350_DTXMXPKTSZ 0x355C /* Maximum sent packet size reg*/ -#define E1000_SCTL 0x00024 /* SerDes Control - RW */ -#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ -#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ -#define E1000_FCT 0x00030 /* Flow Control Type - RW */ -#define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ -#define E1000_VET 0x00038 /* VLAN Ether Type - RW */ -#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ -#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ -#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ -#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ -#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ -#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ -#define E1000_RCTL 0x00100 /* Rx Control - RW */ -#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ -#define E1000_TXCW 0x00178 /* Tx Configuration Word - RW */ -#define E1000_RXCW 0x00180 /* Rx Configuration Word - RO */ -#define E1000_EICR 0x01580 /* Ext. Interrupt Cause Read - R/clr */ -#define E1000_EITR(_n) (0x01680 + (0x4 * (_n))) -#define E1000_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */ -#define E1000_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */ -#define E1000_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */ -#define E1000_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */ -#define E1000_EIAM 0x01530 /* Ext. Interrupt Ack Auto Clear Mask - RW */ -#define E1000_GPIE 0x01514 /* General Purpose Interrupt Enable - RW */ -#define E1000_IVAR0 0x01700 /* Interrupt Vector Allocation (array) - RW */ -#define E1000_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */ -#define E1000_TCTL 0x00400 /* Tx Control - RW */ -#define E1000_TCTL_EXT 0x00404 /* Extended Tx Control - RW */ -#define E1000_TIPG 0x00410 /* Tx Inter-packet gap -RW */ -#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ -#define E1000_LEDCTL 0x00E00 /* LED Control - RW */ -#define E1000_LEDMUX 0x08130 /* LED MUX Control */ -#define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */ -#define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */ -#define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */ -#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ -#define E1000_PBS 0x01008 /* Packet Buffer Size */ -#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ -#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ -#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ -#define E1000_FLOP 0x0103C /* FLASH Opcode Register */ -#define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */ -#define E1000_I2CPARAMS 0x0102C /* SFPI2C Parameters Register - RW */ -#define E1000_I2CBB_EN 0x00000100 /* I2C - Bit Bang Enable */ -#define E1000_I2C_CLK_OUT 0x00000200 /* I2C- Clock */ -#define E1000_I2C_DATA_OUT 0x00000400 /* I2C- Data Out */ -#define E1000_I2C_DATA_OE_N 0x00000800 /* I2C- Data Output Enable */ -#define E1000_I2C_DATA_IN 0x00001000 /* I2C- Data In */ -#define E1000_I2C_CLK_OE_N 0x00002000 /* I2C- Clock Output Enable */ -#define E1000_I2C_CLK_IN 0x00004000 /* I2C- Clock In */ -#define E1000_I2C_CLK_STRETCH_DIS 0x00008000 /* I2C- Dis Clk Stretching */ -#define E1000_WDSTP 0x01040 /* Watchdog Setup - RW */ -#define E1000_SWDSTS 0x01044 /* SW Device Status - RW */ -#define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */ -#define E1000_TCPTIMER 0x0104C /* TCP Timer - RW */ -#define E1000_VPDDIAG 0x01060 /* VPD Diagnostic - RO */ -#define E1000_ICR_V2 0x01500 /* Intr Cause - new location - RC */ -#define E1000_ICS_V2 0x01504 /* Intr Cause Set - new location - WO */ -#define E1000_IMS_V2 0x01508 /* Intr Mask Set/Read - new location - RW */ -#define E1000_IMC_V2 0x0150C /* Intr Mask Clear - new location - WO */ -#define E1000_IAM_V2 0x01510 /* Intr Ack Auto Mask - new location - RW */ -#define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ -#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ -#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ -#define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ -#define E1000_RDFH 0x02410 /* Rx Data FIFO Head - RW */ -#define E1000_RDFT 0x02418 /* Rx Data FIFO Tail - RW */ -#define E1000_RDFHS 0x02420 /* Rx Data FIFO Head Saved - RW */ -#define E1000_RDFTS 0x02428 /* Rx Data FIFO Tail Saved - RW */ -#define E1000_RDFPC 0x02430 /* Rx Data FIFO Packet Count - RW */ -#define E1000_PBRTH 0x02458 /* PB Rx Arbitration Threshold - RW */ -#define E1000_FCRTV 0x02460 /* Flow Control Refresh Timer Value - RW */ -/* Split and Replication Rx Control - RW */ -#define E1000_RDPUMB 0x025CC /* DMA Rx Descriptor uC Mailbox - RW */ -#define E1000_RDPUAD 0x025D0 /* DMA Rx Descriptor uC Addr Command - RW */ -#define E1000_RDPUWD 0x025D4 /* DMA Rx Descriptor uC Data Write - RW */ -#define E1000_RDPURD 0x025D8 /* DMA Rx Descriptor uC Data Read - RW */ -#define E1000_RDPUCTL 0x025DC /* DMA Rx Descriptor uC Control - RW */ -#define E1000_PBDIAG 0x02458 /* Packet Buffer Diagnostic - RW */ -#define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ -#define E1000_IRPBS 0x02404 /* Same as RXPBS, renamed for newer Si - RW */ -#define E1000_PBRWAC 0x024E8 /* Rx packet buffer wrap around counter - RO */ -#define E1000_RDTR 0x02820 /* Rx Delay Timer - RW */ -#define E1000_RADV 0x0282C /* Rx Interrupt Absolute Delay Timer - RW */ -#define E1000_EMIADD 0x10 /* Extended Memory Indirect Address */ -#define E1000_EMIDATA 0x11 /* Extended Memory Indirect Data */ -#define E1000_SRWR 0x12018 /* Shadow Ram Write Register - RW */ -#define E1000_I210_FLMNGCTL 0x12038 -#define E1000_I210_FLMNGDATA 0x1203C -#define E1000_I210_FLMNGCNT 0x12040 - -#define E1000_I210_FLSWCTL 0x12048 -#define E1000_I210_FLSWDATA 0x1204C -#define E1000_I210_FLSWCNT 0x12050 - -#define E1000_I210_FLA 0x1201C - -#define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n)) -#define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */ - -/* QAV Tx mode control register */ -#define E1000_I210_TQAVCTRL 0x3570 - -/* QAV Tx mode control register bitfields masks */ -/* QAV enable */ -#define E1000_TQAVCTRL_MODE (1 << 0) -/* Fetching arbitration type */ -#define E1000_TQAVCTRL_FETCH_ARB (1 << 4) -/* Fetching timer enable */ -#define E1000_TQAVCTRL_FETCH_TIMER_ENABLE (1 << 5) -/* Launch arbitration type */ -#define E1000_TQAVCTRL_LAUNCH_ARB (1 << 8) -/* Launch timer enable */ -#define E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE (1 << 9) -/* SP waits for SR enable */ -#define E1000_TQAVCTRL_SP_WAIT_SR (1 << 10) -/* Fetching timer correction */ -#define E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET 16 -#define E1000_TQAVCTRL_FETCH_TIMER_DELTA \ - (0xFFFF << E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET) - -/* High credit registers where _n can be 0 or 1. */ -#define E1000_I210_TQAVHC(_n) (0x300C + 0x40 * (_n)) - -/* Queues fetch arbitration priority control register */ -#define E1000_I210_TQAVARBCTRL 0x3574 -/* Queues priority masks where _n and _p can be 0-3. */ -#define E1000_TQAVARBCTRL_QUEUE_PRI(_n, _p) ((_p) << (2 * _n)) -/* QAV Tx mode control registers where _n can be 0 or 1. */ -#define E1000_I210_TQAVCC(_n) (0x3004 + 0x40 * (_n)) - -/* QAV Tx mode control register bitfields masks */ -#define E1000_TQAVCC_IDLE_SLOPE 0xFFFF /* Idle slope */ -#define E1000_TQAVCC_KEEP_CREDITS (1 << 30) /* Keep credits opt enable */ -#define E1000_TQAVCC_QUEUE_MODE (1 << 31) /* SP vs. SR Tx mode */ - -/* Good transmitted packets counter registers */ -#define E1000_PQGPTC(_n) (0x010014 + (0x100 * (_n))) - -/* Queues packet buffer size masks where _n can be 0-3 and _s 0-63 [kB] */ -#define E1000_I210_TXPBS_SIZE(_n, _s) ((_s) << (6 * _n)) - -#define E1000_MMDAC 13 /* MMD Access Control */ -#define E1000_MMDAAD 14 /* MMD Access Address/Data */ - -/* Convenience macros - * - * Note: "_n" is the queue number of the register to be written to. - * - * Example usage: - * E1000_RDBAL_REG(current_rx_queue) - */ -#define E1000_RDBAL(_n) ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : \ - (0x0C000 + ((_n) * 0x40))) -#define E1000_RDBAH(_n) ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : \ - (0x0C004 + ((_n) * 0x40))) -#define E1000_RDLEN(_n) ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : \ - (0x0C008 + ((_n) * 0x40))) -#define E1000_SRRCTL(_n) ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : \ - (0x0C00C + ((_n) * 0x40))) -#define E1000_RDH(_n) ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : \ - (0x0C010 + ((_n) * 0x40))) -#define E1000_RXCTL(_n) ((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \ - (0x0C014 + ((_n) * 0x40))) -#define E1000_DCA_RXCTRL(_n) E1000_RXCTL(_n) -#define E1000_RDT(_n) ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : \ - (0x0C018 + ((_n) * 0x40))) -#define E1000_RXDCTL(_n) ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : \ - (0x0C028 + ((_n) * 0x40))) -#define E1000_RQDPC(_n) ((_n) < 4 ? (0x02830 + ((_n) * 0x100)) : \ - (0x0C030 + ((_n) * 0x40))) -#define E1000_TDBAL(_n) ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : \ - (0x0E000 + ((_n) * 0x40))) -#define E1000_TDBAH(_n) ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : \ - (0x0E004 + ((_n) * 0x40))) -#define E1000_TDLEN(_n) ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : \ - (0x0E008 + ((_n) * 0x40))) -#define E1000_TDH(_n) ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : \ - (0x0E010 + ((_n) * 0x40))) -#define E1000_TXCTL(_n) ((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \ - (0x0E014 + ((_n) * 0x40))) -#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n) -#define E1000_TDT(_n) ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : \ - (0x0E018 + ((_n) * 0x40))) -#define E1000_TXDCTL(_n) ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : \ - (0x0E028 + ((_n) * 0x40))) -#define E1000_TDWBAL(_n) ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) : \ - (0x0E038 + ((_n) * 0x40))) -#define E1000_TDWBAH(_n) ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) : \ - (0x0E03C + ((_n) * 0x40))) -#define E1000_TARC(_n) (0x03840 + ((_n) * 0x100)) -#define E1000_RSRPD 0x02C00 /* Rx Small Packet Detect - RW */ -#define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ -#define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ -#define E1000_PSRTYPE(_i) (0x05480 + ((_i) * 4)) -#define E1000_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ - (0x054E0 + ((_i - 16) * 8))) -#define E1000_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ - (0x054E4 + ((_i - 16) * 8))) -#define E1000_SHRAL(_i) (0x05438 + ((_i) * 8)) -#define E1000_SHRAH(_i) (0x0543C + ((_i) * 8)) -#define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8)) -#define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4)) -#define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) -#define E1000_FFMT_REG(_i) (0x09000 + ((_i) * 8)) -#define E1000_FFVT_REG(_i) (0x09800 + ((_i) * 8)) -#define E1000_FFLT_REG(_i) (0x05F00 + ((_i) * 8)) -#define E1000_PBSLAC 0x03100 /* Pkt Buffer Slave Access Control */ -#define E1000_PBSLAD(_n) (0x03110 + (0x4 * (_n))) /* Pkt Buffer DWORD */ -#define E1000_TXPBS 0x03404 /* Tx Packet Buffer Size - RW */ -/* Same as TXPBS, renamed for newer Si - RW */ -#define E1000_ITPBS 0x03404 -#define E1000_TDFH 0x03410 /* Tx Data FIFO Head - RW */ -#define E1000_TDFT 0x03418 /* Tx Data FIFO Tail - RW */ -#define E1000_TDFHS 0x03420 /* Tx Data FIFO Head Saved - RW */ -#define E1000_TDFTS 0x03428 /* Tx Data FIFO Tail Saved - RW */ -#define E1000_TDFPC 0x03430 /* Tx Data FIFO Packet Count - RW */ -#define E1000_TDPUMB 0x0357C /* DMA Tx Desc uC Mail Box - RW */ -#define E1000_TDPUAD 0x03580 /* DMA Tx Desc uC Addr Command - RW */ -#define E1000_TDPUWD 0x03584 /* DMA Tx Desc uC Data Write - RW */ -#define E1000_TDPURD 0x03588 /* DMA Tx Desc uC Data Read - RW */ -#define E1000_TDPUCTL 0x0358C /* DMA Tx Desc uC Control - RW */ -#define E1000_DTXCTL 0x03590 /* DMA Tx Control - RW */ -#define E1000_DTXTCPFLGL 0x0359C /* DMA Tx Control flag low - RW */ -#define E1000_DTXTCPFLGH 0x035A0 /* DMA Tx Control flag high - RW */ -/* DMA Tx Max Total Allow Size Reqs - RW */ -#define E1000_DTXMXSZRQ 0x03540 -#define E1000_TIDV 0x03820 /* Tx Interrupt Delay Value - RW */ -#define E1000_TADV 0x0382C /* Tx Interrupt Absolute Delay Val - RW */ -#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ -#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ -#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ -#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ -#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ -#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ -#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ -#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ -#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ -#define E1000_COLC 0x04028 /* Collision Count - R/clr */ -#define E1000_DC 0x04030 /* Defer Count - R/clr */ -#define E1000_TNCRS 0x04034 /* Tx-No CRS - R/clr */ -#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ -#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ -#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ -#define E1000_XONRXC 0x04048 /* XON Rx Count - R/clr */ -#define E1000_XONTXC 0x0404C /* XON Tx Count - R/clr */ -#define E1000_XOFFRXC 0x04050 /* XOFF Rx Count - R/clr */ -#define E1000_XOFFTXC 0x04054 /* XOFF Tx Count - R/clr */ -#define E1000_FCRUC 0x04058 /* Flow Control Rx Unsupported Count- R/clr */ -#define E1000_PRC64 0x0405C /* Packets Rx (64 bytes) - R/clr */ -#define E1000_PRC127 0x04060 /* Packets Rx (65-127 bytes) - R/clr */ -#define E1000_PRC255 0x04064 /* Packets Rx (128-255 bytes) - R/clr */ -#define E1000_PRC511 0x04068 /* Packets Rx (255-511 bytes) - R/clr */ -#define E1000_PRC1023 0x0406C /* Packets Rx (512-1023 bytes) - R/clr */ -#define E1000_PRC1522 0x04070 /* Packets Rx (1024-1522 bytes) - R/clr */ -#define E1000_GPRC 0x04074 /* Good Packets Rx Count - R/clr */ -#define E1000_BPRC 0x04078 /* Broadcast Packets Rx Count - R/clr */ -#define E1000_MPRC 0x0407C /* Multicast Packets Rx Count - R/clr */ -#define E1000_GPTC 0x04080 /* Good Packets Tx Count - R/clr */ -#define E1000_GORCL 0x04088 /* Good Octets Rx Count Low - R/clr */ -#define E1000_GORCH 0x0408C /* Good Octets Rx Count High - R/clr */ -#define E1000_GOTCL 0x04090 /* Good Octets Tx Count Low - R/clr */ -#define E1000_GOTCH 0x04094 /* Good Octets Tx Count High - R/clr */ -#define E1000_RNBC 0x040A0 /* Rx No Buffers Count - R/clr */ -#define E1000_RUC 0x040A4 /* Rx Undersize Count - R/clr */ -#define E1000_RFC 0x040A8 /* Rx Fragment Count - R/clr */ -#define E1000_ROC 0x040AC /* Rx Oversize Count - R/clr */ -#define E1000_RJC 0x040B0 /* Rx Jabber Count - R/clr */ -#define E1000_MGTPRC 0x040B4 /* Management Packets Rx Count - R/clr */ -#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ -#define E1000_MGTPTC 0x040BC /* Management Packets Tx Count - R/clr */ -#define E1000_TORL 0x040C0 /* Total Octets Rx Low - R/clr */ -#define E1000_TORH 0x040C4 /* Total Octets Rx High - R/clr */ -#define E1000_TOTL 0x040C8 /* Total Octets Tx Low - R/clr */ -#define E1000_TOTH 0x040CC /* Total Octets Tx High - R/clr */ -#define E1000_TPR 0x040D0 /* Total Packets Rx - R/clr */ -#define E1000_TPT 0x040D4 /* Total Packets Tx - R/clr */ -#define E1000_PTC64 0x040D8 /* Packets Tx (64 bytes) - R/clr */ -#define E1000_PTC127 0x040DC /* Packets Tx (65-127 bytes) - R/clr */ -#define E1000_PTC255 0x040E0 /* Packets Tx (128-255 bytes) - R/clr */ -#define E1000_PTC511 0x040E4 /* Packets Tx (256-511 bytes) - R/clr */ -#define E1000_PTC1023 0x040E8 /* Packets Tx (512-1023 bytes) - R/clr */ -#define E1000_PTC1522 0x040EC /* Packets Tx (1024-1522 Bytes) - R/clr */ -#define E1000_MPTC 0x040F0 /* Multicast Packets Tx Count - R/clr */ -#define E1000_BPTC 0x040F4 /* Broadcast Packets Tx Count - R/clr */ -#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context Tx - R/clr */ -#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context Tx Fail - R/clr */ -#define E1000_IAC 0x04100 /* Interrupt Assertion Count */ -#define E1000_ICRXPTC 0x04104 /* Interrupt Cause Rx Pkt Timer Expire Count */ -#define E1000_ICRXATC 0x04108 /* Interrupt Cause Rx Abs Timer Expire Count */ -#define E1000_ICTXPTC 0x0410C /* Interrupt Cause Tx Pkt Timer Expire Count */ -#define E1000_ICTXATC 0x04110 /* Interrupt Cause Tx Abs Timer Expire Count */ -#define E1000_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */ -#define E1000_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Min Thresh Count */ -#define E1000_ICRXDMTC 0x04120 /* Interrupt Cause Rx Desc Min Thresh Count */ -#define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ - -/* Virtualization statistical counters */ -#define E1000_PFVFGPRC(_n) (0x010010 + (0x100 * (_n))) -#define E1000_PFVFGPTC(_n) (0x010014 + (0x100 * (_n))) -#define E1000_PFVFGORC(_n) (0x010018 + (0x100 * (_n))) -#define E1000_PFVFGOTC(_n) (0x010034 + (0x100 * (_n))) -#define E1000_PFVFMPRC(_n) (0x010038 + (0x100 * (_n))) -#define E1000_PFVFGPRLBC(_n) (0x010040 + (0x100 * (_n))) -#define E1000_PFVFGPTLBC(_n) (0x010044 + (0x100 * (_n))) -#define E1000_PFVFGORLBC(_n) (0x010048 + (0x100 * (_n))) -#define E1000_PFVFGOTLBC(_n) (0x010050 + (0x100 * (_n))) - -/* LinkSec */ -#define E1000_LSECTXUT 0x04300 /* Tx Untagged Pkt Cnt */ -#define E1000_LSECTXPKTE 0x04304 /* Encrypted Tx Pkts Cnt */ -#define E1000_LSECTXPKTP 0x04308 /* Protected Tx Pkt Cnt */ -#define E1000_LSECTXOCTE 0x0430C /* Encrypted Tx Octets Cnt */ -#define E1000_LSECTXOCTP 0x04310 /* Protected Tx Octets Cnt */ -#define E1000_LSECRXUT 0x04314 /* Untagged non-Strict Rx Pkt Cnt */ -#define E1000_LSECRXOCTD 0x0431C /* Rx Octets Decrypted Count */ -#define E1000_LSECRXOCTV 0x04320 /* Rx Octets Validated */ -#define E1000_LSECRXBAD 0x04324 /* Rx Bad Tag */ -#define E1000_LSECRXNOSCI 0x04328 /* Rx Packet No SCI Count */ -#define E1000_LSECRXUNSCI 0x0432C /* Rx Packet Unknown SCI Count */ -#define E1000_LSECRXUNCH 0x04330 /* Rx Unchecked Packets Count */ -#define E1000_LSECRXDELAY 0x04340 /* Rx Delayed Packet Count */ -#define E1000_LSECRXLATE 0x04350 /* Rx Late Packets Count */ -#define E1000_LSECRXOK(_n) (0x04360 + (0x04 * (_n))) /* Rx Pkt OK Cnt */ -#define E1000_LSECRXINV(_n) (0x04380 + (0x04 * (_n))) /* Rx Invalid Cnt */ -#define E1000_LSECRXNV(_n) (0x043A0 + (0x04 * (_n))) /* Rx Not Valid Cnt */ -#define E1000_LSECRXUNSA 0x043C0 /* Rx Unused SA Count */ -#define E1000_LSECRXNUSA 0x043D0 /* Rx Not Using SA Count */ -#define E1000_LSECTXCAP 0x0B000 /* Tx Capabilities Register - RO */ -#define E1000_LSECRXCAP 0x0B300 /* Rx Capabilities Register - RO */ -#define E1000_LSECTXCTRL 0x0B004 /* Tx Control - RW */ -#define E1000_LSECRXCTRL 0x0B304 /* Rx Control - RW */ -#define E1000_LSECTXSCL 0x0B008 /* Tx SCI Low - RW */ -#define E1000_LSECTXSCH 0x0B00C /* Tx SCI High - RW */ -#define E1000_LSECTXSA 0x0B010 /* Tx SA0 - RW */ -#define E1000_LSECTXPN0 0x0B018 /* Tx SA PN 0 - RW */ -#define E1000_LSECTXPN1 0x0B01C /* Tx SA PN 1 - RW */ -#define E1000_LSECRXSCL 0x0B3D0 /* Rx SCI Low - RW */ -#define E1000_LSECRXSCH 0x0B3E0 /* Rx SCI High - RW */ -/* LinkSec Tx 128-bit Key 0 - WO */ -#define E1000_LSECTXKEY0(_n) (0x0B020 + (0x04 * (_n))) -/* LinkSec Tx 128-bit Key 1 - WO */ -#define E1000_LSECTXKEY1(_n) (0x0B030 + (0x04 * (_n))) -#define E1000_LSECRXSA(_n) (0x0B310 + (0x04 * (_n))) /* Rx SAs - RW */ -#define E1000_LSECRXPN(_n) (0x0B330 + (0x04 * (_n))) /* Rx SAs - RW */ -/* LinkSec Rx Keys - where _n is the SA no. and _m the 4 dwords of the 128 bit - * key - RW. - */ -#define E1000_LSECRXKEY(_n, _m) (0x0B350 + (0x10 * (_n)) + (0x04 * (_m))) - -#define E1000_SSVPC 0x041A0 /* Switch Security Violation Pkt Cnt */ -#define E1000_IPSCTRL 0xB430 /* IpSec Control Register */ -#define E1000_IPSRXCMD 0x0B408 /* IPSec Rx Command Register - RW */ -#define E1000_IPSRXIDX 0x0B400 /* IPSec Rx Index - RW */ -/* IPSec Rx IPv4/v6 Address - RW */ -#define E1000_IPSRXIPADDR(_n) (0x0B420 + (0x04 * (_n))) -/* IPSec Rx 128-bit Key - RW */ -#define E1000_IPSRXKEY(_n) (0x0B410 + (0x04 * (_n))) -#define E1000_IPSRXSALT 0x0B404 /* IPSec Rx Salt - RW */ -#define E1000_IPSRXSPI 0x0B40C /* IPSec Rx SPI - RW */ -/* IPSec Tx 128-bit Key - RW */ -#define E1000_IPSTXKEY(_n) (0x0B460 + (0x04 * (_n))) -#define E1000_IPSTXSALT 0x0B454 /* IPSec Tx Salt - RW */ -#define E1000_IPSTXIDX 0x0B450 /* IPSec Tx SA IDX - RW */ -#define E1000_PCS_CFG0 0x04200 /* PCS Configuration 0 - RW */ -#define E1000_PCS_LCTL 0x04208 /* PCS Link Control - RW */ -#define E1000_PCS_LSTAT 0x0420C /* PCS Link Status - RO */ -#define E1000_CBTMPC 0x0402C /* Circuit Breaker Tx Packet Count */ -#define E1000_HTDPMC 0x0403C /* Host Transmit Discarded Packets */ -#define E1000_CBRDPC 0x04044 /* Circuit Breaker Rx Dropped Count */ -#define E1000_CBRMPC 0x040FC /* Circuit Breaker Rx Packet Count */ -#define E1000_RPTHC 0x04104 /* Rx Packets To Host */ -#define E1000_HGPTC 0x04118 /* Host Good Packets Tx Count */ -#define E1000_HTCBDPC 0x04124 /* Host Tx Circuit Breaker Dropped Count */ -#define E1000_HGORCL 0x04128 /* Host Good Octets Received Count Low */ -#define E1000_HGORCH 0x0412C /* Host Good Octets Received Count High */ -#define E1000_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ -#define E1000_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ -#define E1000_LENERRS 0x04138 /* Length Errors Count */ -#define E1000_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */ -#define E1000_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */ -#define E1000_PCS_ANADV 0x04218 /* AN advertisement - RW */ -#define E1000_PCS_LPAB 0x0421C /* Link Partner Ability - RW */ -#define E1000_PCS_NPTX 0x04220 /* AN Next Page Transmit - RW */ -#define E1000_PCS_LPABNP 0x04224 /* Link Partner Ability Next Pg - RW */ -#define E1000_RXCSUM 0x05000 /* Rx Checksum Control - RW */ -#define E1000_RLPML 0x05004 /* Rx Long Packet Max Length */ -#define E1000_RFCTL 0x05008 /* Receive Filter Control*/ -#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ -#define E1000_RA 0x05400 /* Receive Address - RW Array */ -#define E1000_RA2 0x054E0 /* 2nd half of Rx address array - RW Array */ -#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ -#define E1000_VT_CTL 0x0581C /* VMDq Control - RW */ -#define E1000_CIAA 0x05B88 /* Config Indirect Access Address - RW */ -#define E1000_CIAD 0x05B8C /* Config Indirect Access Data - RW */ -#define E1000_VFQA0 0x0B000 /* VLAN Filter Queue Array 0 - RW Array */ -#define E1000_VFQA1 0x0B200 /* VLAN Filter Queue Array 1 - RW Array */ -#define E1000_WUC 0x05800 /* Wakeup Control - RW */ -#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ -#define E1000_WUS 0x05810 /* Wakeup Status - RO */ -#define E1000_MANC 0x05820 /* Management Control - RW */ -#define E1000_IPAV 0x05838 /* IP Address Valid - RW */ -#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */ -#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ -#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ -#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ -#define E1000_PBACL 0x05B68 /* MSIx PBA Clear - Read/Write 1's to clear */ -#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ -#define E1000_HOST_IF 0x08800 /* Host Interface */ -#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ -#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ -#define E1000_HIBBA 0x8F40 /* Host Interface Buffer Base Address */ -/* Flexible Host Filter Table */ -#define E1000_FHFT(_n) (0x09000 + ((_n) * 0x100)) -/* Ext Flexible Host Filter Table */ -#define E1000_FHFT_EXT(_n) (0x09A00 + ((_n) * 0x100)) - - -#define E1000_KMRNCTRLSTA 0x00034 /* MAC-PHY interface - RW */ -#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ -/* Management Decision Filters */ -#define E1000_MDEF(_n) (0x05890 + (4 * (_n))) -#define E1000_SW_FW_SYNC 0x05B5C /* SW-FW Synchronization - RW */ -#define E1000_CCMCTL 0x05B48 /* CCM Control Register */ -#define E1000_GIOCTL 0x05B44 /* GIO Analog Control Register */ -#define E1000_SCCTL 0x05B4C /* PCIc PLL Configuration Register */ -#define E1000_GCR 0x05B00 /* PCI-Ex Control */ -#define E1000_GCR2 0x05B64 /* PCI-Ex Control #2 */ -#define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ -#define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ -#define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ -#define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ -#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ -#define E1000_SWSM 0x05B50 /* SW Semaphore */ -#define E1000_FWSM 0x05B54 /* FW Semaphore */ -/* Driver-only SW semaphore (not used by BOOT agents) */ -#define E1000_SWSM2 0x05B58 -#define E1000_DCA_ID 0x05B70 /* DCA Requester ID Information - RO */ -#define E1000_DCA_CTRL 0x05B74 /* DCA Control - RW */ -#define E1000_UFUSE 0x05B78 /* UFUSE - RO */ -#define E1000_FFLT_DBG 0x05F04 /* Debug Register */ -#define E1000_HICR 0x08F00 /* Host Interface Control */ -#define E1000_FWSTS 0x08F0C /* FW Status */ - -/* RSS registers */ -#define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ -#define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ -#define E1000_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */ -#define E1000_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/ -#define E1000_IMIRVP 0x05AC0 /* Immediate INT Rx VLAN Priority -RW */ -#define E1000_MSIXBM(_i) (0x01600 + ((_i) * 4)) /* MSI-X Alloc Reg -RW */ -#define E1000_RETA(_i) (0x05C00 + ((_i) * 4)) /* Redirection Table - RW */ -#define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW */ -#define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ -#define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ -/* VT Registers */ -#define E1000_SWPBS 0x03004 /* Switch Packet Buffer Size - RW */ -#define E1000_MBVFICR 0x00C80 /* Mailbox VF Cause - RWC */ -#define E1000_MBVFIMR 0x00C84 /* Mailbox VF int Mask - RW */ -#define E1000_VFLRE 0x00C88 /* VF Register Events - RWC */ -#define E1000_VFRE 0x00C8C /* VF Receive Enables */ -#define E1000_VFTE 0x00C90 /* VF Transmit Enables */ -#define E1000_QDE 0x02408 /* Queue Drop Enable - RW */ -#define E1000_DTXSWC 0x03500 /* DMA Tx Switch Control - RW */ -#define E1000_WVBR 0x03554 /* VM Wrong Behavior - RWS */ -#define E1000_RPLOLR 0x05AF0 /* Replication Offload - RW */ -#define E1000_UTA 0x0A000 /* Unicast Table Array - RW */ -#define E1000_IOVTCL 0x05BBC /* IOV Control Register */ -#define E1000_VMRCTL 0X05D80 /* Virtual Mirror Rule Control */ -#define E1000_VMRVLAN 0x05D90 /* Virtual Mirror Rule VLAN */ -#define E1000_VMRVM 0x05DA0 /* Virtual Mirror Rule VM */ -#define E1000_MDFB 0x03558 /* Malicious Driver free block */ -#define E1000_LVMMC 0x03548 /* Last VM Misbehavior cause */ -#define E1000_TXSWC 0x05ACC /* Tx Switch Control */ -#define E1000_SCCRL 0x05DB0 /* Storm Control Control */ -#define E1000_BSCTRH 0x05DB8 /* Broadcast Storm Control Threshold */ -#define E1000_MSCTRH 0x05DBC /* Multicast Storm Control Threshold */ -/* These act per VF so an array friendly macro is used */ -#define E1000_V2PMAILBOX(_n) (0x00C40 + (4 * (_n))) -#define E1000_P2VMAILBOX(_n) (0x00C00 + (4 * (_n))) -#define E1000_VMBMEM(_n) (0x00800 + (64 * (_n))) -#define E1000_VFVMBMEM(_n) (0x00800 + (_n)) -#define E1000_VMOLR(_n) (0x05AD0 + (4 * (_n))) -/* VLAN Virtual Machine Filter - RW */ -#define E1000_VLVF(_n) (0x05D00 + (4 * (_n))) -#define E1000_VMVIR(_n) (0x03700 + (4 * (_n))) -#define E1000_DVMOLR(_n) (0x0C038 + (0x40 * (_n))) /* DMA VM offload */ -#define E1000_VTCTRL(_n) (0x10000 + (0x100 * (_n))) /* VT Control */ -#define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */ -#define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ -#define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */ -#define E1000_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */ -#define E1000_RXSTMPH 0x0B628 /* Rx timestamp High - RO */ -#define E1000_RXSATRL 0x0B62C /* Rx timestamp attribute low - RO */ -#define E1000_RXSATRH 0x0B630 /* Rx timestamp attribute high - RO */ -#define E1000_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ -#define E1000_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ -#define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ -#define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ -#define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ -#define E1000_TIMADJL 0x0B60C /* Time sync time adjustment offset Low - RW */ -#define E1000_TIMADJH 0x0B610 /* Time sync time adjustment offset High - RW */ -#define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ -#define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ -#define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ -#define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ - -/* Filtering Registers */ -#define E1000_SAQF(_n) (0x05980 + (4 * (_n))) /* Source Address Queue Fltr */ -#define E1000_DAQF(_n) (0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */ -#define E1000_SPQF(_n) (0x059C0 + (4 * (_n))) /* Source Port Queue Fltr */ -#define E1000_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */ -#define E1000_TTQF(_n) (0x059E0 + (4 * (_n))) /* 2-tuple Queue Fltr */ -#define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */ -#define E1000_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ - -#define E1000_RTTDCS 0x3600 /* Reedtown Tx Desc plane control and status */ -#define E1000_RTTPCS 0x3474 /* Reedtown Tx Packet Plane control and status */ -#define E1000_RTRPCS 0x2474 /* Rx packet plane control and status */ -#define E1000_RTRUP2TC 0x05AC4 /* Rx User Priority to Traffic Class */ -#define E1000_RTTUP2TC 0x0418 /* Transmit User Priority to Traffic Class */ -/* Tx Desc plane TC Rate-scheduler config */ -#define E1000_RTTDTCRC(_n) (0x3610 + ((_n) * 4)) -/* Tx Packet plane TC Rate-Scheduler Config */ -#define E1000_RTTPTCRC(_n) (0x3480 + ((_n) * 4)) -/* Rx Packet plane TC Rate-Scheduler Config */ -#define E1000_RTRPTCRC(_n) (0x2480 + ((_n) * 4)) -/* Tx Desc Plane TC Rate-Scheduler Status */ -#define E1000_RTTDTCRS(_n) (0x3630 + ((_n) * 4)) -/* Tx Desc Plane TC Rate-Scheduler MMW */ -#define E1000_RTTDTCRM(_n) (0x3650 + ((_n) * 4)) -/* Tx Packet plane TC Rate-Scheduler Status */ -#define E1000_RTTPTCRS(_n) (0x34A0 + ((_n) * 4)) -/* Tx Packet plane TC Rate-scheduler MMW */ -#define E1000_RTTPTCRM(_n) (0x34C0 + ((_n) * 4)) -/* Rx Packet plane TC Rate-Scheduler Status */ -#define E1000_RTRPTCRS(_n) (0x24A0 + ((_n) * 4)) -/* Rx Packet plane TC Rate-Scheduler MMW */ -#define E1000_RTRPTCRM(_n) (0x24C0 + ((_n) * 4)) -/* Tx Desc plane VM Rate-Scheduler MMW*/ -#define E1000_RTTDVMRM(_n) (0x3670 + ((_n) * 4)) -/* Tx BCN Rate-Scheduler MMW */ -#define E1000_RTTBCNRM(_n) (0x3690 + ((_n) * 4)) -#define E1000_RTTDQSEL 0x3604 /* Tx Desc Plane Queue Select */ -#define E1000_RTTDVMRC 0x3608 /* Tx Desc Plane VM Rate-Scheduler Config */ -#define E1000_RTTDVMRS 0x360C /* Tx Desc Plane VM Rate-Scheduler Status */ -#define E1000_RTTBCNRC 0x36B0 /* Tx BCN Rate-Scheduler Config */ -#define E1000_RTTBCNRS 0x36B4 /* Tx BCN Rate-Scheduler Status */ -#define E1000_RTTBCNCR 0xB200 /* Tx BCN Control Register */ -#define E1000_RTTBCNTG 0x35A4 /* Tx BCN Tagging */ -#define E1000_RTTBCNCP 0xB208 /* Tx BCN Congestion point */ -#define E1000_RTRBCNCR 0xB20C /* Rx BCN Control Register */ -#define E1000_RTTBCNRD 0x36B8 /* Tx BCN Rate Drift */ -#define E1000_PFCTOP 0x1080 /* Priority Flow Control Type and Opcode */ -#define E1000_RTTBCNIDX 0xB204 /* Tx BCN Congestion Point */ -#define E1000_RTTBCNACH 0x0B214 /* Tx BCN Control High */ -#define E1000_RTTBCNACL 0x0B210 /* Tx BCN Control Low */ - -/* DMA Coalescing registers */ -#define E1000_DMACR 0x02508 /* Control Register */ -#define E1000_DMCTXTH 0x03550 /* Transmit Threshold */ -#define E1000_DMCTLX 0x02514 /* Time to Lx Request */ -#define E1000_DMCRTRH 0x05DD0 /* Receive Packet Rate Threshold */ -#define E1000_DMCCNT 0x05DD4 /* Current Rx Count */ -#define E1000_FCRTC 0x02170 /* Flow Control Rx high watermark */ -#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ - -/* PCIe Parity Status Register */ -#define E1000_PCIEERRSTS 0x05BA8 - -#define E1000_PROXYS 0x5F64 /* Proxying Status */ -#define E1000_PROXYFC 0x5F60 /* Proxying Filter Control */ -/* Thermal sensor configuration and status registers */ -#define E1000_THMJT 0x08100 /* Junction Temperature */ -#define E1000_THLOWTC 0x08104 /* Low Threshold Control */ -#define E1000_THMIDTC 0x08108 /* Mid Threshold Control */ -#define E1000_THHIGHTC 0x0810C /* High Threshold Control */ -#define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ - -/* Energy Efficient Ethernet "EEE" registers */ -#define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ -#define E1000_LTRC 0x01A0 /* Latency Tolerance Reporting Control */ -#define E1000_EEER 0x0E30 /* Energy Efficient Ethernet "EEE"*/ -#define E1000_EEE_SU 0x0E34 /* EEE Setup */ -#define E1000_TLPIC 0x4148 /* EEE Tx LPI Count - TLPIC */ -#define E1000_RLPIC 0x414C /* EEE Rx LPI Count - RLPIC */ - -/* OS2BMC Registers */ -#define E1000_B2OSPC 0x08FE0 /* BMC2OS packets sent by BMC */ -#define E1000_B2OGPRC 0x04158 /* BMC2OS packets received by host */ -#define E1000_O2BGPTC 0x08FE4 /* OS2BMC packets received by BMC */ -#define E1000_O2BSPC 0x0415C /* OS2BMC packets transmitted by host */ - - - -#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h deleted file mode 100644 index 8aa2a308..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h +++ /dev/null @@ -1,844 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/* Linux PRO/1000 Ethernet Driver main header file */ - -#ifndef _IGB_H_ -#define _IGB_H_ - -#include - -#ifndef IGB_NO_LRO -#include -#endif - -#undef HAVE_HW_TIME_STAMP -#ifdef HAVE_HW_TIME_STAMP -#include -#include -#include - -#endif -#ifdef SIOCETHTOOL -#include -#endif - -struct igb_adapter; - -#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) -//#define IGB_DCA -#endif -#ifdef IGB_DCA -#include -#endif - -#include "kcompat.h" - -#ifdef HAVE_SCTP -#include -#endif - -#include "e1000_api.h" -#include "e1000_82575.h" -#include "e1000_manage.h" -#include "e1000_mbx.h" - -#define IGB_ERR(args...) printk(KERN_ERR "igb: " args) - -#define PFX "igb: " -#define DPRINTK(nlevel, klevel, fmt, args...) \ - (void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \ - printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \ - __FUNCTION__ , ## args)) - -#ifdef HAVE_PTP_1588_CLOCK -#include -#include -#include -#endif /* HAVE_PTP_1588_CLOCK */ - -#ifdef HAVE_I2C_SUPPORT -#include -#include -#endif /* HAVE_I2C_SUPPORT */ - -/* Interrupt defines */ -#define IGB_START_ITR 648 /* ~6000 ints/sec */ -#define IGB_4K_ITR 980 -#define IGB_20K_ITR 196 -#define IGB_70K_ITR 56 - -/* Interrupt modes, as used by the IntMode parameter */ -#define IGB_INT_MODE_LEGACY 0 -#define IGB_INT_MODE_MSI 1 -#define IGB_INT_MODE_MSIX 2 - -/* TX/RX descriptor defines */ -#define IGB_DEFAULT_TXD 256 -#define IGB_DEFAULT_TX_WORK 128 -#define IGB_MIN_TXD 80 -#define IGB_MAX_TXD 4096 - -#define IGB_DEFAULT_RXD 256 -#define IGB_MIN_RXD 80 -#define IGB_MAX_RXD 4096 - -#define IGB_MIN_ITR_USECS 10 /* 100k irq/sec */ -#define IGB_MAX_ITR_USECS 8191 /* 120 irq/sec */ - -#define NON_Q_VECTORS 1 -#define MAX_Q_VECTORS 10 - -/* Transmit and receive queues */ -#define IGB_MAX_RX_QUEUES 16 -#define IGB_MAX_TX_QUEUES 16 - -#define IGB_MAX_VF_MC_ENTRIES 30 -#define IGB_MAX_VF_FUNCTIONS 8 -#define IGB_82576_VF_DEV_ID 0x10CA -#define IGB_I350_VF_DEV_ID 0x1520 -#define IGB_MAX_UTA_ENTRIES 128 -#define MAX_EMULATION_MAC_ADDRS 16 -#define OUI_LEN 3 -#define IGB_MAX_VMDQ_QUEUES 8 - - -struct vf_data_storage { - unsigned char vf_mac_addresses[ETH_ALEN]; - u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES]; - u16 num_vf_mc_hashes; - u16 default_vf_vlan_id; - u16 vlans_enabled; - unsigned char em_mac_addresses[MAX_EMULATION_MAC_ADDRS * ETH_ALEN]; - u32 uta_table_copy[IGB_MAX_UTA_ENTRIES]; - u32 flags; - unsigned long last_nack; -#ifdef IFLA_VF_MAX - u16 pf_vlan; /* When set, guest VLAN config not allowed. */ - u16 pf_qos; - u16 tx_rate; -#ifdef HAVE_VF_SPOOFCHK_CONFIGURE - bool spoofchk_enabled; -#endif -#endif -}; - -#define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */ -#define IGB_VF_FLAG_UNI_PROMISC 0x00000002 /* VF has unicast promisc */ -#define IGB_VF_FLAG_MULTI_PROMISC 0x00000004 /* VF has multicast promisc */ -#define IGB_VF_FLAG_PF_SET_MAC 0x00000008 /* PF has set MAC address */ - -/* RX descriptor control thresholds. - * PTHRESH - MAC will consider prefetch if it has fewer than this number of - * descriptors available in its onboard memory. - * Setting this to 0 disables RX descriptor prefetch. - * HTHRESH - MAC will only prefetch if there are at least this many descriptors - * available in host memory. - * If PTHRESH is 0, this should also be 0. - * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back - * descriptors until either it has this many to write back, or the - * ITR timer expires. - */ -#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : 8) -#define IGB_RX_HTHRESH 8 -#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) -#define IGB_TX_HTHRESH 1 -#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ - adapter->msix_entries) ? 1 : 4) - -/* this is the size past which hardware will drop packets when setting LPE=0 */ -#define MAXIMUM_ETHERNET_VLAN_SIZE 1522 - -/* NOTE: netdev_alloc_skb reserves 16 bytes, NET_IP_ALIGN means we - * reserve 2 more, and skb_shared_info adds an additional 384 more, - * this adds roughly 448 bytes of extra data meaning the smallest - * allocation we could have is 1K. - * i.e. RXBUFFER_512 --> size-1024 slab - */ -/* Supported Rx Buffer Sizes */ -#define IGB_RXBUFFER_256 256 -#define IGB_RXBUFFER_2048 2048 -#define IGB_RXBUFFER_16384 16384 -#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 -#if MAX_SKB_FRAGS < 8 -#define IGB_RX_BUFSZ ALIGN(MAX_JUMBO_FRAME_SIZE / MAX_SKB_FRAGS, 1024) -#else -#define IGB_RX_BUFSZ IGB_RXBUFFER_2048 -#endif - - -/* Packet Buffer allocations */ -#define IGB_PBA_BYTES_SHIFT 0xA -#define IGB_TX_HEAD_ADDR_SHIFT 7 -#define IGB_PBA_TX_MASK 0xFFFF0000 - -#define IGB_FC_PAUSE_TIME 0x0680 /* 858 usec */ - -/* How many Rx Buffers do we bundle into one write to the hardware ? */ -#define IGB_RX_BUFFER_WRITE 16 /* Must be power of 2 */ - -#define IGB_EEPROM_APME 0x0400 -#define AUTO_ALL_MODES 0 - -#ifndef IGB_MASTER_SLAVE -/* Switch to override PHY master/slave setting */ -#define IGB_MASTER_SLAVE e1000_ms_hw_default -#endif - -#define IGB_MNG_VLAN_NONE -1 - -#ifndef IGB_NO_LRO -#define IGB_LRO_MAX 32 /*Maximum number of LRO descriptors*/ -struct igb_lro_stats { - u32 flushed; - u32 coal; -}; - -/* - * igb_lro_header - header format to be aggregated by LRO - * @iph: IP header without options - * @tcp: TCP header - * @ts: Optional TCP timestamp data in TCP options - * - * This structure relies on the check above that verifies that the header - * is IPv4 and does not contain any options. - */ -struct igb_lrohdr { - struct iphdr iph; - struct tcphdr th; - __be32 ts[0]; -}; - -struct igb_lro_list { - struct sk_buff_head active; - struct igb_lro_stats stats; -}; - -#endif /* IGB_NO_LRO */ -struct igb_cb { -#ifndef IGB_NO_LRO -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - union { /* Union defining head/tail partner */ - struct sk_buff *head; - struct sk_buff *tail; - }; -#endif - __be32 tsecr; /* timestamp echo response */ - u32 tsval; /* timestamp value in host order */ - u32 next_seq; /* next expected sequence number */ - u16 free; /* 65521 minus total size */ - u16 mss; /* size of data portion of packet */ - u16 append_cnt; /* number of skb's appended */ -#endif /* IGB_NO_LRO */ -#ifdef HAVE_VLAN_RX_REGISTER - u16 vid; /* VLAN tag */ -#endif -}; -#define IGB_CB(skb) ((struct igb_cb *)(skb)->cb) - -enum igb_tx_flags { - /* cmd_type flags */ - IGB_TX_FLAGS_VLAN = 0x01, - IGB_TX_FLAGS_TSO = 0x02, - IGB_TX_FLAGS_TSTAMP = 0x04, - - /* olinfo flags */ - IGB_TX_FLAGS_IPV4 = 0x10, - IGB_TX_FLAGS_CSUM = 0x20, -}; - -/* VLAN info */ -#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 -#define IGB_TX_FLAGS_VLAN_SHIFT 16 - -/* - * The largest size we can write to the descriptor is 65535. In order to - * maintain a power of two alignment we have to limit ourselves to 32K. - */ -#define IGB_MAX_TXD_PWR 15 -#define IGB_MAX_DATA_PER_TXD (1 << IGB_MAX_TXD_PWR) - -/* Tx Descriptors needed, worst case */ -#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD) -#ifndef MAX_SKB_FRAGS -#define DESC_NEEDED 4 -#elif (MAX_SKB_FRAGS < 16) -#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4) -#else -#define DESC_NEEDED (MAX_SKB_FRAGS + 4) -#endif - -/* wrapper around a pointer to a socket buffer, - * so a DMA handle can be stored along with the buffer */ -struct igb_tx_buffer { - union e1000_adv_tx_desc *next_to_watch; - unsigned long time_stamp; - struct sk_buff *skb; - unsigned int bytecount; - u16 gso_segs; - __be16 protocol; - DEFINE_DMA_UNMAP_ADDR(dma); - DEFINE_DMA_UNMAP_LEN(len); - u32 tx_flags; -}; - -struct igb_rx_buffer { - dma_addr_t dma; -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - struct sk_buff *skb; -#else - struct page *page; - u32 page_offset; -#endif -}; - -struct igb_tx_queue_stats { - u64 packets; - u64 bytes; - u64 restart_queue; -}; - -struct igb_rx_queue_stats { - u64 packets; - u64 bytes; - u64 drops; - u64 csum_err; - u64 alloc_failed; - u64 ipv4_packets; /* IPv4 headers processed */ - u64 ipv4e_packets; /* IPv4E headers with extensions processed */ - u64 ipv6_packets; /* IPv6 headers processed */ - u64 ipv6e_packets; /* IPv6E headers with extensions processed */ - u64 tcp_packets; /* TCP headers processed */ - u64 udp_packets; /* UDP headers processed */ - u64 sctp_packets; /* SCTP headers processed */ - u64 nfs_packets; /* NFS headers processe */ -}; - -struct igb_ring_container { - struct igb_ring *ring; /* pointer to linked list of rings */ - unsigned int total_bytes; /* total bytes processed this int */ - unsigned int total_packets; /* total packets processed this int */ - u16 work_limit; /* total work allowed per interrupt */ - u8 count; /* total number of rings in vector */ - u8 itr; /* current ITR setting for ring */ -}; - -struct igb_ring { - struct igb_q_vector *q_vector; /* backlink to q_vector */ - struct net_device *netdev; /* back pointer to net_device */ - struct device *dev; /* device for dma mapping */ - union { /* array of buffer info structs */ - struct igb_tx_buffer *tx_buffer_info; - struct igb_rx_buffer *rx_buffer_info; - }; -#ifdef HAVE_PTP_1588_CLOCK - unsigned long last_rx_timestamp; -#endif /* HAVE_PTP_1588_CLOCK */ - void *desc; /* descriptor ring memory */ - unsigned long flags; /* ring specific flags */ - void __iomem *tail; /* pointer to ring tail register */ - dma_addr_t dma; /* phys address of the ring */ - unsigned int size; /* length of desc. ring in bytes */ - - u16 count; /* number of desc. in the ring */ - u8 queue_index; /* logical index of the ring*/ - u8 reg_idx; /* physical index of the ring */ - - /* everything past this point are written often */ - u16 next_to_clean; - u16 next_to_use; - u16 next_to_alloc; - - union { - /* TX */ - struct { - struct igb_tx_queue_stats tx_stats; - }; - /* RX */ - struct { - struct igb_rx_queue_stats rx_stats; -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - u16 rx_buffer_len; -#else - struct sk_buff *skb; -#endif - }; - }; -#ifdef CONFIG_IGB_VMDQ_NETDEV - struct net_device *vmdq_netdev; - int vqueue_index; /* queue index for virtual netdev */ -#endif -} ____cacheline_internodealigned_in_smp; - -struct igb_q_vector { - struct igb_adapter *adapter; /* backlink */ - int cpu; /* CPU for DCA */ - u32 eims_value; /* EIMS mask value */ - - u16 itr_val; - u8 set_itr; - void __iomem *itr_register; - - struct igb_ring_container rx, tx; - - struct napi_struct napi; -#ifndef IGB_NO_LRO - struct igb_lro_list lrolist; /* LRO list for queue vector*/ -#endif - char name[IFNAMSIZ + 9]; -#ifndef HAVE_NETDEV_NAPI_LIST - struct net_device poll_dev; -#endif - - /* for dynamic allocation of rings associated with this q_vector */ - struct igb_ring ring[0] ____cacheline_internodealigned_in_smp; -}; - -enum e1000_ring_flags_t { -#ifndef HAVE_NDO_SET_FEATURES - IGB_RING_FLAG_RX_CSUM, -#endif - IGB_RING_FLAG_RX_SCTP_CSUM, - IGB_RING_FLAG_RX_LB_VLAN_BSWAP, - IGB_RING_FLAG_TX_CTX_IDX, - IGB_RING_FLAG_TX_DETECT_HANG, -}; - -struct igb_mac_addr { - u8 addr[ETH_ALEN]; - u16 queue; - u16 state; /* bitmask */ -}; -#define IGB_MAC_STATE_DEFAULT 0x1 -#define IGB_MAC_STATE_MODIFIED 0x2 -#define IGB_MAC_STATE_IN_USE 0x4 - -#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS) - -#define IGB_RX_DESC(R, i) \ - (&(((union e1000_adv_rx_desc *)((R)->desc))[i])) -#define IGB_TX_DESC(R, i) \ - (&(((union e1000_adv_tx_desc *)((R)->desc))[i])) -#define IGB_TX_CTXTDESC(R, i) \ - (&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i])) - -#ifdef CONFIG_IGB_VMDQ_NETDEV -#define netdev_ring(ring) \ - ((ring->vmdq_netdev ? ring->vmdq_netdev : ring->netdev)) -#define ring_queue_index(ring) \ - ((ring->vmdq_netdev ? ring->vqueue_index : ring->queue_index)) -#else -#define netdev_ring(ring) (ring->netdev) -#define ring_queue_index(ring) (ring->queue_index) -#endif /* CONFIG_IGB_VMDQ_NETDEV */ - -/* igb_test_staterr - tests bits within Rx descriptor status and error fields */ -static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc, - const u32 stat_err_bits) -{ - return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); -} - -/* igb_desc_unused - calculate if we have unused descriptors */ -static inline u16 igb_desc_unused(const struct igb_ring *ring) -{ - u16 ntc = ring->next_to_clean; - u16 ntu = ring->next_to_use; - - return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; -} - -#ifdef CONFIG_BQL -static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring) -{ - return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); -} -#endif /* CONFIG_BQL */ - -// #ifdef EXT_THERMAL_SENSOR_SUPPORT -// #ifdef IGB_PROCFS -struct igb_therm_proc_data -{ - struct e1000_hw *hw; - struct e1000_thermal_diode_data *sensor_data; -}; - -// #endif /* IGB_PROCFS */ -// #endif /* EXT_THERMAL_SENSOR_SUPPORT */ - -#ifdef IGB_HWMON -#define IGB_HWMON_TYPE_LOC 0 -#define IGB_HWMON_TYPE_TEMP 1 -#define IGB_HWMON_TYPE_CAUTION 2 -#define IGB_HWMON_TYPE_MAX 3 - -struct hwmon_attr { - struct device_attribute dev_attr; - struct e1000_hw *hw; - struct e1000_thermal_diode_data *sensor; - char name[12]; - }; - -struct hwmon_buff { - struct device *device; - struct hwmon_attr *hwmon_list; - unsigned int n_hwmon; - }; -#endif /* IGB_HWMON */ - -/* board specific private data structure */ -struct igb_adapter { -#ifdef HAVE_VLAN_RX_REGISTER - /* vlgrp must be first member of structure */ - struct vlan_group *vlgrp; -#else - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; -#endif - struct net_device *netdev; - - unsigned long state; - unsigned int flags; - - unsigned int num_q_vectors; - struct msix_entry *msix_entries; - - - /* TX */ - u16 tx_work_limit; - u32 tx_timeout_count; - int num_tx_queues; - struct igb_ring *tx_ring[IGB_MAX_TX_QUEUES]; - - /* RX */ - int num_rx_queues; - struct igb_ring *rx_ring[IGB_MAX_RX_QUEUES]; - - struct timer_list watchdog_timer; - struct timer_list dma_err_timer; - struct timer_list phy_info_timer; - u16 mng_vlan_id; - u32 bd_number; - u32 wol; - u32 en_mng_pt; - u16 link_speed; - u16 link_duplex; - u8 port_num; - - /* Interrupt Throttle Rate */ - u32 rx_itr_setting; - u32 tx_itr_setting; - - struct work_struct reset_task; - struct work_struct watchdog_task; - struct work_struct dma_err_task; - bool fc_autoneg; - u8 tx_timeout_factor; - -#ifdef DEBUG - bool tx_hang_detected; - bool disable_hw_reset; -#endif - u32 max_frame_size; - - /* OS defined structs */ - struct pci_dev *pdev; -#ifndef HAVE_NETDEV_STATS_IN_NETDEV - struct net_device_stats net_stats; -#endif -#ifndef IGB_NO_LRO - struct igb_lro_stats lro_stats; -#endif - - /* structs defined in e1000_hw.h */ - struct e1000_hw hw; - struct e1000_hw_stats stats; - struct e1000_phy_info phy_info; - struct e1000_phy_stats phy_stats; - -#ifdef ETHTOOL_TEST - u32 test_icr; - struct igb_ring test_tx_ring; - struct igb_ring test_rx_ring; -#endif - - int msg_enable; - - struct igb_q_vector *q_vector[MAX_Q_VECTORS]; - u32 eims_enable_mask; - u32 eims_other; - - /* to not mess up cache alignment, always add to the bottom */ - u32 *config_space; - u16 tx_ring_count; - u16 rx_ring_count; - struct vf_data_storage *vf_data; -#ifdef IFLA_VF_MAX - int vf_rate_link_speed; -#endif - u32 lli_port; - u32 lli_size; - unsigned int vfs_allocated_count; - /* Malicious Driver Detection flag. Valid only when SR-IOV is enabled */ - bool mdd; - int int_mode; - u32 rss_queues; - u32 vmdq_pools; - char fw_version[43]; - u32 wvbr; - struct igb_mac_addr *mac_table; -#ifdef CONFIG_IGB_VMDQ_NETDEV - struct net_device *vmdq_netdev[IGB_MAX_VMDQ_QUEUES]; -#endif - int vferr_refcount; - int dmac; - u32 *shadow_vfta; - - /* External Thermal Sensor support flag */ - bool ets; -#ifdef IGB_HWMON - struct hwmon_buff igb_hwmon_buff; -#else /* IGB_HWMON */ -#ifdef IGB_PROCFS - struct proc_dir_entry *eth_dir; - struct proc_dir_entry *info_dir; - struct proc_dir_entry *therm_dir[E1000_MAX_SENSORS]; - struct igb_therm_proc_data therm_data[E1000_MAX_SENSORS]; - bool old_lsc; -#endif /* IGB_PROCFS */ -#endif /* IGB_HWMON */ - u32 etrack_id; - -#ifdef HAVE_PTP_1588_CLOCK - struct ptp_clock *ptp_clock; - struct ptp_clock_info ptp_caps; - struct delayed_work ptp_overflow_work; - struct work_struct ptp_tx_work; - struct sk_buff *ptp_tx_skb; - unsigned long ptp_tx_start; - unsigned long last_rx_ptp_check; - spinlock_t tmreg_lock; - struct cyclecounter cc; - struct timecounter tc; - u32 tx_hwtstamp_timeouts; - u32 rx_hwtstamp_cleared; -#endif /* HAVE_PTP_1588_CLOCK */ - -#ifdef HAVE_I2C_SUPPORT - struct i2c_algo_bit_data i2c_algo; - struct i2c_adapter i2c_adap; - struct i2c_client *i2c_client; -#endif /* HAVE_I2C_SUPPORT */ - unsigned long link_check_timeout; - - - int devrc; - - int copper_tries; - u16 eee_advert; -}; - -#ifdef CONFIG_IGB_VMDQ_NETDEV -struct igb_vmdq_adapter { -#ifdef HAVE_VLAN_RX_REGISTER - /* vlgrp must be first member of structure */ - struct vlan_group *vlgrp; -#else - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; -#endif - struct igb_adapter *real_adapter; - struct net_device *vnetdev; - struct net_device_stats net_stats; - struct igb_ring *tx_ring; - struct igb_ring *rx_ring; -}; -#endif - -#define IGB_FLAG_HAS_MSI (1 << 0) -#define IGB_FLAG_DCA_ENABLED (1 << 1) -#define IGB_FLAG_LLI_PUSH (1 << 2) -#define IGB_FLAG_QUAD_PORT_A (1 << 3) -#define IGB_FLAG_QUEUE_PAIRS (1 << 4) -#define IGB_FLAG_EEE (1 << 5) -#define IGB_FLAG_DMAC (1 << 6) -#define IGB_FLAG_DETECT_BAD_DMA (1 << 7) -#define IGB_FLAG_PTP (1 << 8) -#define IGB_FLAG_RSS_FIELD_IPV4_UDP (1 << 9) -#define IGB_FLAG_RSS_FIELD_IPV6_UDP (1 << 10) -#define IGB_FLAG_WOL_SUPPORTED (1 << 11) -#define IGB_FLAG_NEED_LINK_UPDATE (1 << 12) -#define IGB_FLAG_LOOPBACK_ENABLE (1 << 13) -#define IGB_FLAG_MEDIA_RESET (1 << 14) -#define IGB_FLAG_MAS_ENABLE (1 << 15) - -/* Media Auto Sense */ -#define IGB_MAS_ENABLE_0 0X0001 -#define IGB_MAS_ENABLE_1 0X0002 -#define IGB_MAS_ENABLE_2 0X0004 -#define IGB_MAS_ENABLE_3 0X0008 - -#define IGB_MIN_TXPBSIZE 20408 -#define IGB_TX_BUF_4096 4096 - -#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coal Flush */ - -/* DMA Coalescing defines */ -#define IGB_DMAC_DISABLE 0 -#define IGB_DMAC_MIN 250 -#define IGB_DMAC_500 500 -#define IGB_DMAC_EN_DEFAULT 1000 -#define IGB_DMAC_2000 2000 -#define IGB_DMAC_3000 3000 -#define IGB_DMAC_4000 4000 -#define IGB_DMAC_5000 5000 -#define IGB_DMAC_6000 6000 -#define IGB_DMAC_7000 7000 -#define IGB_DMAC_8000 8000 -#define IGB_DMAC_9000 9000 -#define IGB_DMAC_MAX 10000 - -#define IGB_82576_TSYNC_SHIFT 19 -#define IGB_82580_TSYNC_SHIFT 24 -#define IGB_TS_HDR_LEN 16 - -/* CEM Support */ -#define FW_HDR_LEN 0x4 -#define FW_CMD_DRV_INFO 0xDD -#define FW_CMD_DRV_INFO_LEN 0x5 -#define FW_CMD_RESERVED 0X0 -#define FW_RESP_SUCCESS 0x1 -#define FW_UNUSED_VER 0x0 -#define FW_MAX_RETRIES 3 -#define FW_STATUS_SUCCESS 0x1 -#define FW_FAMILY_DRV_VER 0Xffffffff - -#define IGB_MAX_LINK_TRIES 20 - -struct e1000_fw_hdr { - u8 cmd; - u8 buf_len; - union - { - u8 cmd_resv; - u8 ret_status; - } cmd_or_resp; - u8 checksum; -}; - -#pragma pack(push,1) -struct e1000_fw_drv_info { - struct e1000_fw_hdr hdr; - u8 port_num; - u32 drv_version; - u16 pad; /* end spacing to ensure length is mult. of dword */ - u8 pad2; /* end spacing to ensure length is mult. of dword2 */ -}; -#pragma pack(pop) - -enum e1000_state_t { - __IGB_TESTING, - __IGB_RESETTING, - __IGB_DOWN -}; - -extern char igb_driver_name[]; -extern char igb_driver_version[]; - -extern int igb_up(struct igb_adapter *); -extern void igb_down(struct igb_adapter *); -extern void igb_reinit_locked(struct igb_adapter *); -extern void igb_reset(struct igb_adapter *); -extern int igb_set_spd_dplx(struct igb_adapter *, u16); -extern int igb_setup_tx_resources(struct igb_ring *); -extern int igb_setup_rx_resources(struct igb_ring *); -extern void igb_free_tx_resources(struct igb_ring *); -extern void igb_free_rx_resources(struct igb_ring *); -extern void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *); -extern void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *); -extern void igb_setup_tctl(struct igb_adapter *); -extern void igb_setup_rctl(struct igb_adapter *); -extern netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *); -extern void igb_unmap_and_free_tx_resource(struct igb_ring *, - struct igb_tx_buffer *); -extern void igb_alloc_rx_buffers(struct igb_ring *, u16); -extern void igb_clean_rx_ring(struct igb_ring *); -extern void igb_update_stats(struct igb_adapter *); -extern bool igb_has_link(struct igb_adapter *adapter); -extern void igb_set_ethtool_ops(struct net_device *); -extern void igb_check_options(struct igb_adapter *); -extern void igb_power_up_link(struct igb_adapter *); -#ifdef HAVE_PTP_1588_CLOCK -extern void igb_ptp_init(struct igb_adapter *adapter); -extern void igb_ptp_stop(struct igb_adapter *adapter); -extern void igb_ptp_reset(struct igb_adapter *adapter); -extern void igb_ptp_tx_work(struct work_struct *work); -extern void igb_ptp_rx_hang(struct igb_adapter *adapter); -extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter); -extern void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, - struct sk_buff *skb); -extern void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, - unsigned char *va, - struct sk_buff *skb); -static inline void igb_ptp_rx_hwtstamp(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); - skb_pull(skb, IGB_TS_HDR_LEN); -#endif - return; - } - - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS)) - igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb); - - /* Update the last_rx_timestamp timer in order to enable watchdog check - * for error case of latched timestamp on a dropped packet. - */ - rx_ring->last_rx_timestamp = jiffies; -} - -extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, - struct ifreq *ifr, int cmd); -#endif /* HAVE_PTP_1588_CLOCK */ -#ifdef ETHTOOL_OPS_COMPAT -extern int ethtool_ioctl(struct ifreq *); -#endif -extern int igb_write_mc_addr_list(struct net_device *netdev); -extern int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue); -extern int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue); -extern int igb_available_rars(struct igb_adapter *adapter); -extern s32 igb_vlvf_set(struct igb_adapter *, u32, bool, u32); -extern void igb_configure_vt_default_pool(struct igb_adapter *adapter); -extern void igb_enable_vlan_tags(struct igb_adapter *adapter); -#ifndef HAVE_VLAN_RX_REGISTER -extern void igb_vlan_mode(struct net_device *, u32); -#endif - -#define E1000_PCS_CFG_IGN_SD 1 - -#ifdef IGB_HWMON -void igb_sysfs_exit(struct igb_adapter *adapter); -int igb_sysfs_init(struct igb_adapter *adapter); -#else -#ifdef IGB_PROCFS -int igb_procfs_init(struct igb_adapter* adapter); -void igb_procfs_exit(struct igb_adapter* adapter); -int igb_procfs_topdir_init(void); -void igb_procfs_topdir_exit(void); -#endif /* IGB_PROCFS */ -#endif /* IGB_HWMON */ - - - -#endif /* _IGB_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c deleted file mode 100644 index 064528bc..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c +++ /dev/null @@ -1,2842 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/* ethtool support for igb */ - -#include -#include - -#ifdef SIOCETHTOOL -#include -#ifdef CONFIG_PM_RUNTIME -#include -#endif /* CONFIG_PM_RUNTIME */ -#include - -#include "igb.h" -#include "igb_regtest.h" -#include -#ifdef ETHTOOL_GEEE -#include -#endif - -#ifdef ETHTOOL_OPS_COMPAT -#include "kcompat_ethtool.c" -#endif -#ifdef ETHTOOL_GSTATS -struct igb_stats { - char stat_string[ETH_GSTRING_LEN]; - int sizeof_stat; - int stat_offset; -}; - -#define IGB_STAT(_name, _stat) { \ - .stat_string = _name, \ - .sizeof_stat = FIELD_SIZEOF(struct igb_adapter, _stat), \ - .stat_offset = offsetof(struct igb_adapter, _stat) \ -} -static const struct igb_stats igb_gstrings_stats[] = { - IGB_STAT("rx_packets", stats.gprc), - IGB_STAT("tx_packets", stats.gptc), - IGB_STAT("rx_bytes", stats.gorc), - IGB_STAT("tx_bytes", stats.gotc), - IGB_STAT("rx_broadcast", stats.bprc), - IGB_STAT("tx_broadcast", stats.bptc), - IGB_STAT("rx_multicast", stats.mprc), - IGB_STAT("tx_multicast", stats.mptc), - IGB_STAT("multicast", stats.mprc), - IGB_STAT("collisions", stats.colc), - IGB_STAT("rx_crc_errors", stats.crcerrs), - IGB_STAT("rx_no_buffer_count", stats.rnbc), - IGB_STAT("rx_missed_errors", stats.mpc), - IGB_STAT("tx_aborted_errors", stats.ecol), - IGB_STAT("tx_carrier_errors", stats.tncrs), - IGB_STAT("tx_window_errors", stats.latecol), - IGB_STAT("tx_abort_late_coll", stats.latecol), - IGB_STAT("tx_deferred_ok", stats.dc), - IGB_STAT("tx_single_coll_ok", stats.scc), - IGB_STAT("tx_multi_coll_ok", stats.mcc), - IGB_STAT("tx_timeout_count", tx_timeout_count), - IGB_STAT("rx_long_length_errors", stats.roc), - IGB_STAT("rx_short_length_errors", stats.ruc), - IGB_STAT("rx_align_errors", stats.algnerrc), - IGB_STAT("tx_tcp_seg_good", stats.tsctc), - IGB_STAT("tx_tcp_seg_failed", stats.tsctfc), - IGB_STAT("rx_flow_control_xon", stats.xonrxc), - IGB_STAT("rx_flow_control_xoff", stats.xoffrxc), - IGB_STAT("tx_flow_control_xon", stats.xontxc), - IGB_STAT("tx_flow_control_xoff", stats.xofftxc), - IGB_STAT("rx_long_byte_count", stats.gorc), - IGB_STAT("tx_dma_out_of_sync", stats.doosync), -#ifndef IGB_NO_LRO - IGB_STAT("lro_aggregated", lro_stats.coal), - IGB_STAT("lro_flushed", lro_stats.flushed), -#endif /* IGB_LRO */ - IGB_STAT("tx_smbus", stats.mgptc), - IGB_STAT("rx_smbus", stats.mgprc), - IGB_STAT("dropped_smbus", stats.mgpdc), - IGB_STAT("os2bmc_rx_by_bmc", stats.o2bgptc), - IGB_STAT("os2bmc_tx_by_bmc", stats.b2ospc), - IGB_STAT("os2bmc_tx_by_host", stats.o2bspc), - IGB_STAT("os2bmc_rx_by_host", stats.b2ogprc), -#ifdef HAVE_PTP_1588_CLOCK - IGB_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts), - IGB_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), -#endif /* HAVE_PTP_1588_CLOCK */ -}; - -#define IGB_NETDEV_STAT(_net_stat) { \ - .stat_string = #_net_stat, \ - .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \ - .stat_offset = offsetof(struct net_device_stats, _net_stat) \ -} -static const struct igb_stats igb_gstrings_net_stats[] = { - IGB_NETDEV_STAT(rx_errors), - IGB_NETDEV_STAT(tx_errors), - IGB_NETDEV_STAT(tx_dropped), - IGB_NETDEV_STAT(rx_length_errors), - IGB_NETDEV_STAT(rx_over_errors), - IGB_NETDEV_STAT(rx_frame_errors), - IGB_NETDEV_STAT(rx_fifo_errors), - IGB_NETDEV_STAT(tx_fifo_errors), - IGB_NETDEV_STAT(tx_heartbeat_errors) -}; - -#define IGB_GLOBAL_STATS_LEN ARRAY_SIZE(igb_gstrings_stats) -#define IGB_NETDEV_STATS_LEN ARRAY_SIZE(igb_gstrings_net_stats) -#define IGB_RX_QUEUE_STATS_LEN \ - (sizeof(struct igb_rx_queue_stats) / sizeof(u64)) -#define IGB_TX_QUEUE_STATS_LEN \ - (sizeof(struct igb_tx_queue_stats) / sizeof(u64)) -#define IGB_QUEUE_STATS_LEN \ - ((((struct igb_adapter *)netdev_priv(netdev))->num_rx_queues * \ - IGB_RX_QUEUE_STATS_LEN) + \ - (((struct igb_adapter *)netdev_priv(netdev))->num_tx_queues * \ - IGB_TX_QUEUE_STATS_LEN)) -#define IGB_STATS_LEN \ - (IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN) - -#endif /* ETHTOOL_GSTATS */ -#ifdef ETHTOOL_TEST -static const char igb_gstrings_test[][ETH_GSTRING_LEN] = { - "Register test (offline)", "Eeprom test (offline)", - "Interrupt test (offline)", "Loopback test (offline)", - "Link test (on/offline)" -}; -#define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN) -#endif /* ETHTOOL_TEST */ - -static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - u32 status; - - if (hw->phy.media_type == e1000_media_type_copper) { - - ecmd->supported = (SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_1000baseT_Full| - SUPPORTED_Autoneg | - SUPPORTED_TP | - SUPPORTED_Pause); - ecmd->advertising = ADVERTISED_TP; - - if (hw->mac.autoneg == 1) { - ecmd->advertising |= ADVERTISED_Autoneg; - /* the e1000 autoneg seems to match ethtool nicely */ - ecmd->advertising |= hw->phy.autoneg_advertised; - } - - ecmd->port = PORT_TP; - ecmd->phy_address = hw->phy.addr; - ecmd->transceiver = XCVR_INTERNAL; - - } else { - ecmd->supported = (SUPPORTED_1000baseT_Full | - SUPPORTED_100baseT_Full | - SUPPORTED_FIBRE | - SUPPORTED_Autoneg | - SUPPORTED_Pause); - if (hw->mac.type == e1000_i354) - ecmd->supported |= (SUPPORTED_2500baseX_Full); - - ecmd->advertising = ADVERTISED_FIBRE; - - switch (adapter->link_speed) { - case SPEED_2500: - ecmd->advertising = ADVERTISED_2500baseX_Full; - break; - case SPEED_1000: - ecmd->advertising = ADVERTISED_1000baseT_Full; - break; - case SPEED_100: - ecmd->advertising = ADVERTISED_100baseT_Full; - break; - default: - break; - } - - if (hw->mac.autoneg == 1) - ecmd->advertising |= ADVERTISED_Autoneg; - - ecmd->port = PORT_FIBRE; - ecmd->transceiver = XCVR_EXTERNAL; - } - - if (hw->mac.autoneg != 1) - ecmd->advertising &= ~(ADVERTISED_Pause | - ADVERTISED_Asym_Pause); - - if (hw->fc.requested_mode == e1000_fc_full) - ecmd->advertising |= ADVERTISED_Pause; - else if (hw->fc.requested_mode == e1000_fc_rx_pause) - ecmd->advertising |= (ADVERTISED_Pause | - ADVERTISED_Asym_Pause); - else if (hw->fc.requested_mode == e1000_fc_tx_pause) - ecmd->advertising |= ADVERTISED_Asym_Pause; - else - ecmd->advertising &= ~(ADVERTISED_Pause | - ADVERTISED_Asym_Pause); - - status = E1000_READ_REG(hw, E1000_STATUS); - - if (status & E1000_STATUS_LU) { - if ((hw->mac.type == e1000_i354) && - (status & E1000_STATUS_2P5_SKU) && - !(status & E1000_STATUS_2P5_SKU_OVER)) - ecmd->speed = SPEED_2500; - else if (status & E1000_STATUS_SPEED_1000) - ecmd->speed = SPEED_1000; - else if (status & E1000_STATUS_SPEED_100) - ecmd->speed = SPEED_100; - else - ecmd->speed = SPEED_10; - - if ((status & E1000_STATUS_FD) || - hw->phy.media_type != e1000_media_type_copper) - ecmd->duplex = DUPLEX_FULL; - else - ecmd->duplex = DUPLEX_HALF; - - } else { - ecmd->speed = -1; - ecmd->duplex = -1; - } - - if ((hw->phy.media_type == e1000_media_type_fiber) || - hw->mac.autoneg) - ecmd->autoneg = AUTONEG_ENABLE; - else - ecmd->autoneg = AUTONEG_DISABLE; -#ifdef ETH_TP_MDI_X - - /* MDI-X => 2; MDI =>1; Invalid =>0 */ - if (hw->phy.media_type == e1000_media_type_copper) - ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : - ETH_TP_MDI; - else - ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; - -#ifdef ETH_TP_MDI_AUTO - if (hw->phy.mdix == AUTO_ALL_MODES) - ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; - else - ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; - -#endif -#endif /* ETH_TP_MDI_X */ - return 0; -} - -static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - - if (ecmd->duplex == DUPLEX_HALF) { - if (!hw->dev_spec._82575.eee_disable) - dev_info(pci_dev_to_dev(adapter->pdev), "EEE disabled: not supported with half duplex\n"); - hw->dev_spec._82575.eee_disable = true; - } else { - if (hw->dev_spec._82575.eee_disable) - dev_info(pci_dev_to_dev(adapter->pdev), "EEE enabled\n"); - hw->dev_spec._82575.eee_disable = false; - } - - /* When SoL/IDER sessions are active, autoneg/speed/duplex - * cannot be changed */ - if (e1000_check_reset_block(hw)) { - dev_err(pci_dev_to_dev(adapter->pdev), "Cannot change link " - "characteristics when SoL/IDER is active.\n"); - return -EINVAL; - } - -#ifdef ETH_TP_MDI_AUTO - /* - * MDI setting is only allowed when autoneg enabled because - * some hardware doesn't allow MDI setting when speed or - * duplex is forced. - */ - if (ecmd->eth_tp_mdix_ctrl) { - if (hw->phy.media_type != e1000_media_type_copper) - return -EOPNOTSUPP; - - if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && - (ecmd->autoneg != AUTONEG_ENABLE)) { - dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); - return -EINVAL; - } - } - -#endif /* ETH_TP_MDI_AUTO */ - while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - - if (ecmd->autoneg == AUTONEG_ENABLE) { - hw->mac.autoneg = 1; - if (hw->phy.media_type == e1000_media_type_fiber) { - hw->phy.autoneg_advertised = ecmd->advertising | - ADVERTISED_FIBRE | - ADVERTISED_Autoneg; - switch (adapter->link_speed) { - case SPEED_2500: - hw->phy.autoneg_advertised = - ADVERTISED_2500baseX_Full; - break; - case SPEED_1000: - hw->phy.autoneg_advertised = - ADVERTISED_1000baseT_Full; - break; - case SPEED_100: - hw->phy.autoneg_advertised = - ADVERTISED_100baseT_Full; - break; - default: - break; - } - } else { - hw->phy.autoneg_advertised = ecmd->advertising | - ADVERTISED_TP | - ADVERTISED_Autoneg; - } - ecmd->advertising = hw->phy.autoneg_advertised; - if (adapter->fc_autoneg) - hw->fc.requested_mode = e1000_fc_default; - } else { - if (igb_set_spd_dplx(adapter, ecmd->speed + ecmd->duplex)) { - clear_bit(__IGB_RESETTING, &adapter->state); - return -EINVAL; - } - } - -#ifdef ETH_TP_MDI_AUTO - /* MDI-X => 2; MDI => 1; Auto => 3 */ - if (ecmd->eth_tp_mdix_ctrl) { - /* fix up the value for auto (3 => 0) as zero is mapped - * internally to auto - */ - if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) - hw->phy.mdix = AUTO_ALL_MODES; - else - hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; - } - -#endif /* ETH_TP_MDI_AUTO */ - /* reset the link */ - if (netif_running(adapter->netdev)) { - igb_down(adapter); - igb_up(adapter); - } else - igb_reset(adapter); - - clear_bit(__IGB_RESETTING, &adapter->state); - return 0; -} - -static u32 igb_get_link(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_mac_info *mac = &adapter->hw.mac; - - /* - * If the link is not reported up to netdev, interrupts are disabled, - * and so the physical link state may have changed since we last - * looked. Set get_link_status to make sure that the true link - * state is interrogated, rather than pulling a cached and possibly - * stale link state from the driver. - */ - if (!netif_carrier_ok(netdev)) - mac->get_link_status = 1; - - return igb_has_link(adapter); -} - -static void igb_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pause) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - - pause->autoneg = - (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE); - - if (hw->fc.current_mode == e1000_fc_rx_pause) - pause->rx_pause = 1; - else if (hw->fc.current_mode == e1000_fc_tx_pause) - pause->tx_pause = 1; - else if (hw->fc.current_mode == e1000_fc_full) { - pause->rx_pause = 1; - pause->tx_pause = 1; - } -} - -static int igb_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pause) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - int retval = 0; - - adapter->fc_autoneg = pause->autoneg; - - while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - - if (adapter->fc_autoneg == AUTONEG_ENABLE) { - hw->fc.requested_mode = e1000_fc_default; - if (netif_running(adapter->netdev)) { - igb_down(adapter); - igb_up(adapter); - } else { - igb_reset(adapter); - } - } else { - if (pause->rx_pause && pause->tx_pause) - hw->fc.requested_mode = e1000_fc_full; - else if (pause->rx_pause && !pause->tx_pause) - hw->fc.requested_mode = e1000_fc_rx_pause; - else if (!pause->rx_pause && pause->tx_pause) - hw->fc.requested_mode = e1000_fc_tx_pause; - else if (!pause->rx_pause && !pause->tx_pause) - hw->fc.requested_mode = e1000_fc_none; - - hw->fc.current_mode = hw->fc.requested_mode; - - if (hw->phy.media_type == e1000_media_type_fiber) { - retval = hw->mac.ops.setup_link(hw); - /* implicit goto out */ - } else { - retval = e1000_force_mac_fc(hw); - if (retval) - goto out; - e1000_set_fc_watermarks_generic(hw); - } - } - -out: - clear_bit(__IGB_RESETTING, &adapter->state); - return retval; -} - -static u32 igb_get_msglevel(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - return adapter->msg_enable; -} - -static void igb_set_msglevel(struct net_device *netdev, u32 data) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - adapter->msg_enable = data; -} - -static int igb_get_regs_len(struct net_device *netdev) -{ -#define IGB_REGS_LEN 555 - return IGB_REGS_LEN * sizeof(u32); -} - -static void igb_get_regs(struct net_device *netdev, - struct ethtool_regs *regs, void *p) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - u32 *regs_buff = p; - u8 i; - - memset(p, 0, IGB_REGS_LEN * sizeof(u32)); - - regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id; - - /* General Registers */ - regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); - regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); - regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); - regs_buff[3] = E1000_READ_REG(hw, E1000_MDIC); - regs_buff[4] = E1000_READ_REG(hw, E1000_SCTL); - regs_buff[5] = E1000_READ_REG(hw, E1000_CONNSW); - regs_buff[6] = E1000_READ_REG(hw, E1000_VET); - regs_buff[7] = E1000_READ_REG(hw, E1000_LEDCTL); - regs_buff[8] = E1000_READ_REG(hw, E1000_PBA); - regs_buff[9] = E1000_READ_REG(hw, E1000_PBS); - regs_buff[10] = E1000_READ_REG(hw, E1000_FRTIMER); - regs_buff[11] = E1000_READ_REG(hw, E1000_TCPTIMER); - - /* NVM Register */ - regs_buff[12] = E1000_READ_REG(hw, E1000_EECD); - - /* Interrupt */ - /* Reading EICS for EICR because they read the - * same but EICS does not clear on read */ - regs_buff[13] = E1000_READ_REG(hw, E1000_EICS); - regs_buff[14] = E1000_READ_REG(hw, E1000_EICS); - regs_buff[15] = E1000_READ_REG(hw, E1000_EIMS); - regs_buff[16] = E1000_READ_REG(hw, E1000_EIMC); - regs_buff[17] = E1000_READ_REG(hw, E1000_EIAC); - regs_buff[18] = E1000_READ_REG(hw, E1000_EIAM); - /* Reading ICS for ICR because they read the - * same but ICS does not clear on read */ - regs_buff[19] = E1000_READ_REG(hw, E1000_ICS); - regs_buff[20] = E1000_READ_REG(hw, E1000_ICS); - regs_buff[21] = E1000_READ_REG(hw, E1000_IMS); - regs_buff[22] = E1000_READ_REG(hw, E1000_IMC); - regs_buff[23] = E1000_READ_REG(hw, E1000_IAC); - regs_buff[24] = E1000_READ_REG(hw, E1000_IAM); - regs_buff[25] = E1000_READ_REG(hw, E1000_IMIRVP); - - /* Flow Control */ - regs_buff[26] = E1000_READ_REG(hw, E1000_FCAL); - regs_buff[27] = E1000_READ_REG(hw, E1000_FCAH); - regs_buff[28] = E1000_READ_REG(hw, E1000_FCTTV); - regs_buff[29] = E1000_READ_REG(hw, E1000_FCRTL); - regs_buff[30] = E1000_READ_REG(hw, E1000_FCRTH); - regs_buff[31] = E1000_READ_REG(hw, E1000_FCRTV); - - /* Receive */ - regs_buff[32] = E1000_READ_REG(hw, E1000_RCTL); - regs_buff[33] = E1000_READ_REG(hw, E1000_RXCSUM); - regs_buff[34] = E1000_READ_REG(hw, E1000_RLPML); - regs_buff[35] = E1000_READ_REG(hw, E1000_RFCTL); - regs_buff[36] = E1000_READ_REG(hw, E1000_MRQC); - regs_buff[37] = E1000_READ_REG(hw, E1000_VT_CTL); - - /* Transmit */ - regs_buff[38] = E1000_READ_REG(hw, E1000_TCTL); - regs_buff[39] = E1000_READ_REG(hw, E1000_TCTL_EXT); - regs_buff[40] = E1000_READ_REG(hw, E1000_TIPG); - regs_buff[41] = E1000_READ_REG(hw, E1000_DTXCTL); - - /* Wake Up */ - regs_buff[42] = E1000_READ_REG(hw, E1000_WUC); - regs_buff[43] = E1000_READ_REG(hw, E1000_WUFC); - regs_buff[44] = E1000_READ_REG(hw, E1000_WUS); - regs_buff[45] = E1000_READ_REG(hw, E1000_IPAV); - regs_buff[46] = E1000_READ_REG(hw, E1000_WUPL); - - /* MAC */ - regs_buff[47] = E1000_READ_REG(hw, E1000_PCS_CFG0); - regs_buff[48] = E1000_READ_REG(hw, E1000_PCS_LCTL); - regs_buff[49] = E1000_READ_REG(hw, E1000_PCS_LSTAT); - regs_buff[50] = E1000_READ_REG(hw, E1000_PCS_ANADV); - regs_buff[51] = E1000_READ_REG(hw, E1000_PCS_LPAB); - regs_buff[52] = E1000_READ_REG(hw, E1000_PCS_NPTX); - regs_buff[53] = E1000_READ_REG(hw, E1000_PCS_LPABNP); - - /* Statistics */ - regs_buff[54] = adapter->stats.crcerrs; - regs_buff[55] = adapter->stats.algnerrc; - regs_buff[56] = adapter->stats.symerrs; - regs_buff[57] = adapter->stats.rxerrc; - regs_buff[58] = adapter->stats.mpc; - regs_buff[59] = adapter->stats.scc; - regs_buff[60] = adapter->stats.ecol; - regs_buff[61] = adapter->stats.mcc; - regs_buff[62] = adapter->stats.latecol; - regs_buff[63] = adapter->stats.colc; - regs_buff[64] = adapter->stats.dc; - regs_buff[65] = adapter->stats.tncrs; - regs_buff[66] = adapter->stats.sec; - regs_buff[67] = adapter->stats.htdpmc; - regs_buff[68] = adapter->stats.rlec; - regs_buff[69] = adapter->stats.xonrxc; - regs_buff[70] = adapter->stats.xontxc; - regs_buff[71] = adapter->stats.xoffrxc; - regs_buff[72] = adapter->stats.xofftxc; - regs_buff[73] = adapter->stats.fcruc; - regs_buff[74] = adapter->stats.prc64; - regs_buff[75] = adapter->stats.prc127; - regs_buff[76] = adapter->stats.prc255; - regs_buff[77] = adapter->stats.prc511; - regs_buff[78] = adapter->stats.prc1023; - regs_buff[79] = adapter->stats.prc1522; - regs_buff[80] = adapter->stats.gprc; - regs_buff[81] = adapter->stats.bprc; - regs_buff[82] = adapter->stats.mprc; - regs_buff[83] = adapter->stats.gptc; - regs_buff[84] = adapter->stats.gorc; - regs_buff[86] = adapter->stats.gotc; - regs_buff[88] = adapter->stats.rnbc; - regs_buff[89] = adapter->stats.ruc; - regs_buff[90] = adapter->stats.rfc; - regs_buff[91] = adapter->stats.roc; - regs_buff[92] = adapter->stats.rjc; - regs_buff[93] = adapter->stats.mgprc; - regs_buff[94] = adapter->stats.mgpdc; - regs_buff[95] = adapter->stats.mgptc; - regs_buff[96] = adapter->stats.tor; - regs_buff[98] = adapter->stats.tot; - regs_buff[100] = adapter->stats.tpr; - regs_buff[101] = adapter->stats.tpt; - regs_buff[102] = adapter->stats.ptc64; - regs_buff[103] = adapter->stats.ptc127; - regs_buff[104] = adapter->stats.ptc255; - regs_buff[105] = adapter->stats.ptc511; - regs_buff[106] = adapter->stats.ptc1023; - regs_buff[107] = adapter->stats.ptc1522; - regs_buff[108] = adapter->stats.mptc; - regs_buff[109] = adapter->stats.bptc; - regs_buff[110] = adapter->stats.tsctc; - regs_buff[111] = adapter->stats.iac; - regs_buff[112] = adapter->stats.rpthc; - regs_buff[113] = adapter->stats.hgptc; - regs_buff[114] = adapter->stats.hgorc; - regs_buff[116] = adapter->stats.hgotc; - regs_buff[118] = adapter->stats.lenerrs; - regs_buff[119] = adapter->stats.scvpc; - regs_buff[120] = adapter->stats.hrmpc; - - for (i = 0; i < 4; i++) - regs_buff[121 + i] = E1000_READ_REG(hw, E1000_SRRCTL(i)); - for (i = 0; i < 4; i++) - regs_buff[125 + i] = E1000_READ_REG(hw, E1000_PSRTYPE(i)); - for (i = 0; i < 4; i++) - regs_buff[129 + i] = E1000_READ_REG(hw, E1000_RDBAL(i)); - for (i = 0; i < 4; i++) - regs_buff[133 + i] = E1000_READ_REG(hw, E1000_RDBAH(i)); - for (i = 0; i < 4; i++) - regs_buff[137 + i] = E1000_READ_REG(hw, E1000_RDLEN(i)); - for (i = 0; i < 4; i++) - regs_buff[141 + i] = E1000_READ_REG(hw, E1000_RDH(i)); - for (i = 0; i < 4; i++) - regs_buff[145 + i] = E1000_READ_REG(hw, E1000_RDT(i)); - for (i = 0; i < 4; i++) - regs_buff[149 + i] = E1000_READ_REG(hw, E1000_RXDCTL(i)); - - for (i = 0; i < 10; i++) - regs_buff[153 + i] = E1000_READ_REG(hw, E1000_EITR(i)); - for (i = 0; i < 8; i++) - regs_buff[163 + i] = E1000_READ_REG(hw, E1000_IMIR(i)); - for (i = 0; i < 8; i++) - regs_buff[171 + i] = E1000_READ_REG(hw, E1000_IMIREXT(i)); - for (i = 0; i < 16; i++) - regs_buff[179 + i] = E1000_READ_REG(hw, E1000_RAL(i)); - for (i = 0; i < 16; i++) - regs_buff[195 + i] = E1000_READ_REG(hw, E1000_RAH(i)); - - for (i = 0; i < 4; i++) - regs_buff[211 + i] = E1000_READ_REG(hw, E1000_TDBAL(i)); - for (i = 0; i < 4; i++) - regs_buff[215 + i] = E1000_READ_REG(hw, E1000_TDBAH(i)); - for (i = 0; i < 4; i++) - regs_buff[219 + i] = E1000_READ_REG(hw, E1000_TDLEN(i)); - for (i = 0; i < 4; i++) - regs_buff[223 + i] = E1000_READ_REG(hw, E1000_TDH(i)); - for (i = 0; i < 4; i++) - regs_buff[227 + i] = E1000_READ_REG(hw, E1000_TDT(i)); - for (i = 0; i < 4; i++) - regs_buff[231 + i] = E1000_READ_REG(hw, E1000_TXDCTL(i)); - for (i = 0; i < 4; i++) - regs_buff[235 + i] = E1000_READ_REG(hw, E1000_TDWBAL(i)); - for (i = 0; i < 4; i++) - regs_buff[239 + i] = E1000_READ_REG(hw, E1000_TDWBAH(i)); - for (i = 0; i < 4; i++) - regs_buff[243 + i] = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); - - for (i = 0; i < 4; i++) - regs_buff[247 + i] = E1000_READ_REG(hw, E1000_IP4AT_REG(i)); - for (i = 0; i < 4; i++) - regs_buff[251 + i] = E1000_READ_REG(hw, E1000_IP6AT_REG(i)); - for (i = 0; i < 32; i++) - regs_buff[255 + i] = E1000_READ_REG(hw, E1000_WUPM_REG(i)); - for (i = 0; i < 128; i++) - regs_buff[287 + i] = E1000_READ_REG(hw, E1000_FFMT_REG(i)); - for (i = 0; i < 128; i++) - regs_buff[415 + i] = E1000_READ_REG(hw, E1000_FFVT_REG(i)); - for (i = 0; i < 4; i++) - regs_buff[543 + i] = E1000_READ_REG(hw, E1000_FFLT_REG(i)); - - regs_buff[547] = E1000_READ_REG(hw, E1000_TDFH); - regs_buff[548] = E1000_READ_REG(hw, E1000_TDFT); - regs_buff[549] = E1000_READ_REG(hw, E1000_TDFHS); - regs_buff[550] = E1000_READ_REG(hw, E1000_TDFPC); - if (hw->mac.type > e1000_82580) { - regs_buff[551] = adapter->stats.o2bgptc; - regs_buff[552] = adapter->stats.b2ospc; - regs_buff[553] = adapter->stats.o2bspc; - regs_buff[554] = adapter->stats.b2ogprc; - } -} - -static int igb_get_eeprom_len(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - return adapter->hw.nvm.word_size * 2; -} - -static int igb_get_eeprom(struct net_device *netdev, - struct ethtool_eeprom *eeprom, u8 *bytes) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - u16 *eeprom_buff; - int first_word, last_word; - int ret_val = 0; - u16 i; - - if (eeprom->len == 0) - return -EINVAL; - - eeprom->magic = hw->vendor_id | (hw->device_id << 16); - - first_word = eeprom->offset >> 1; - last_word = (eeprom->offset + eeprom->len - 1) >> 1; - - eeprom_buff = kmalloc(sizeof(u16) * - (last_word - first_word + 1), GFP_KERNEL); - if (!eeprom_buff) - return -ENOMEM; - - if (hw->nvm.type == e1000_nvm_eeprom_spi) - ret_val = e1000_read_nvm(hw, first_word, - last_word - first_word + 1, - eeprom_buff); - else { - for (i = 0; i < last_word - first_word + 1; i++) { - ret_val = e1000_read_nvm(hw, first_word + i, 1, - &eeprom_buff[i]); - if (ret_val) - break; - } - } - - /* Device's eeprom is always little-endian, word addressable */ - for (i = 0; i < last_word - first_word + 1; i++) - eeprom_buff[i] = le16_to_cpu(eeprom_buff[i]); - - memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), - eeprom->len); - kfree(eeprom_buff); - - return ret_val; -} - -static int igb_set_eeprom(struct net_device *netdev, - struct ethtool_eeprom *eeprom, u8 *bytes) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - u16 *eeprom_buff; - void *ptr; - int max_len, first_word, last_word, ret_val = 0; - u16 i; - - if (eeprom->len == 0) - return -EOPNOTSUPP; - - if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) - return -EFAULT; - - max_len = hw->nvm.word_size * 2; - - first_word = eeprom->offset >> 1; - last_word = (eeprom->offset + eeprom->len - 1) >> 1; - eeprom_buff = kmalloc(max_len, GFP_KERNEL); - if (!eeprom_buff) - return -ENOMEM; - - ptr = (void *)eeprom_buff; - - if (eeprom->offset & 1) { - /* need read/modify/write of first changed EEPROM word */ - /* only the second byte of the word is being modified */ - ret_val = e1000_read_nvm(hw, first_word, 1, - &eeprom_buff[0]); - ptr++; - } - if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) { - /* need read/modify/write of last changed EEPROM word */ - /* only the first byte of the word is being modified */ - ret_val = e1000_read_nvm(hw, last_word, 1, - &eeprom_buff[last_word - first_word]); - } - - /* Device's eeprom is always little-endian, word addressable */ - for (i = 0; i < last_word - first_word + 1; i++) - le16_to_cpus(&eeprom_buff[i]); - - memcpy(ptr, bytes, eeprom->len); - - for (i = 0; i < last_word - first_word + 1; i++) - cpu_to_le16s(&eeprom_buff[i]); - - ret_val = e1000_write_nvm(hw, first_word, - last_word - first_word + 1, eeprom_buff); - - /* Update the checksum if write succeeded. - * and flush shadow RAM for 82573 controllers */ - if (ret_val == 0) - e1000_update_nvm_checksum(hw); - - kfree(eeprom_buff); - return ret_val; -} - -static void igb_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - strncpy(drvinfo->driver, igb_driver_name, sizeof(drvinfo->driver) - 1); - strncpy(drvinfo->version, igb_driver_version, sizeof(drvinfo->version) - 1); - - strncpy(drvinfo->fw_version, adapter->fw_version, - sizeof(drvinfo->fw_version) - 1); - strncpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info) -1); - drvinfo->n_stats = IGB_STATS_LEN; - drvinfo->testinfo_len = IGB_TEST_LEN; - drvinfo->regdump_len = igb_get_regs_len(netdev); - drvinfo->eedump_len = igb_get_eeprom_len(netdev); -} - -static void igb_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - ring->rx_max_pending = IGB_MAX_RXD; - ring->tx_max_pending = IGB_MAX_TXD; - ring->rx_mini_max_pending = 0; - ring->rx_jumbo_max_pending = 0; - ring->rx_pending = adapter->rx_ring_count; - ring->tx_pending = adapter->tx_ring_count; - ring->rx_mini_pending = 0; - ring->rx_jumbo_pending = 0; -} - -static int igb_set_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct igb_ring *temp_ring; - int i, err = 0; - u16 new_rx_count, new_tx_count; - - if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) - return -EINVAL; - - new_rx_count = min(ring->rx_pending, (u32)IGB_MAX_RXD); - new_rx_count = max(new_rx_count, (u16)IGB_MIN_RXD); - new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE); - - new_tx_count = min(ring->tx_pending, (u32)IGB_MAX_TXD); - new_tx_count = max(new_tx_count, (u16)IGB_MIN_TXD); - new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE); - - if ((new_tx_count == adapter->tx_ring_count) && - (new_rx_count == adapter->rx_ring_count)) { - /* nothing to do */ - return 0; - } - - while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - - if (!netif_running(adapter->netdev)) { - for (i = 0; i < adapter->num_tx_queues; i++) - adapter->tx_ring[i]->count = new_tx_count; - for (i = 0; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i]->count = new_rx_count; - adapter->tx_ring_count = new_tx_count; - adapter->rx_ring_count = new_rx_count; - goto clear_reset; - } - - if (adapter->num_tx_queues > adapter->num_rx_queues) - temp_ring = vmalloc(adapter->num_tx_queues * sizeof(struct igb_ring)); - else - temp_ring = vmalloc(adapter->num_rx_queues * sizeof(struct igb_ring)); - - if (!temp_ring) { - err = -ENOMEM; - goto clear_reset; - } - - igb_down(adapter); - - /* - * We can't just free everything and then setup again, - * because the ISRs in MSI-X mode get passed pointers - * to the tx and rx ring structs. - */ - if (new_tx_count != adapter->tx_ring_count) { - for (i = 0; i < adapter->num_tx_queues; i++) { - memcpy(&temp_ring[i], adapter->tx_ring[i], - sizeof(struct igb_ring)); - - temp_ring[i].count = new_tx_count; - err = igb_setup_tx_resources(&temp_ring[i]); - if (err) { - while (i) { - i--; - igb_free_tx_resources(&temp_ring[i]); - } - goto err_setup; - } - } - - for (i = 0; i < adapter->num_tx_queues; i++) { - igb_free_tx_resources(adapter->tx_ring[i]); - - memcpy(adapter->tx_ring[i], &temp_ring[i], - sizeof(struct igb_ring)); - } - - adapter->tx_ring_count = new_tx_count; - } - - if (new_rx_count != adapter->rx_ring_count) { - for (i = 0; i < adapter->num_rx_queues; i++) { - memcpy(&temp_ring[i], adapter->rx_ring[i], - sizeof(struct igb_ring)); - - temp_ring[i].count = new_rx_count; - err = igb_setup_rx_resources(&temp_ring[i]); - if (err) { - while (i) { - i--; - igb_free_rx_resources(&temp_ring[i]); - } - goto err_setup; - } - - } - - for (i = 0; i < adapter->num_rx_queues; i++) { - igb_free_rx_resources(adapter->rx_ring[i]); - - memcpy(adapter->rx_ring[i], &temp_ring[i], - sizeof(struct igb_ring)); - } - - adapter->rx_ring_count = new_rx_count; - } -err_setup: - igb_up(adapter); - vfree(temp_ring); -clear_reset: - clear_bit(__IGB_RESETTING, &adapter->state); - return err; -} -static bool reg_pattern_test(struct igb_adapter *adapter, u64 *data, - int reg, u32 mask, u32 write) -{ - struct e1000_hw *hw = &adapter->hw; - u32 pat, val; - static const u32 _test[] = - {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; - for (pat = 0; pat < ARRAY_SIZE(_test); pat++) { - E1000_WRITE_REG(hw, reg, (_test[pat] & write)); - val = E1000_READ_REG(hw, reg) & mask; - if (val != (_test[pat] & write & mask)) { - dev_err(pci_dev_to_dev(adapter->pdev), "pattern test reg %04X " - "failed: got 0x%08X expected 0x%08X\n", - E1000_REGISTER(hw, reg), val, (_test[pat] & write & mask)); - *data = E1000_REGISTER(hw, reg); - return 1; - } - } - - return 0; -} - -static bool reg_set_and_check(struct igb_adapter *adapter, u64 *data, - int reg, u32 mask, u32 write) -{ - struct e1000_hw *hw = &adapter->hw; - u32 val; - E1000_WRITE_REG(hw, reg, write & mask); - val = E1000_READ_REG(hw, reg); - if ((write & mask) != (val & mask)) { - dev_err(pci_dev_to_dev(adapter->pdev), "set/check reg %04X test failed:" - " got 0x%08X expected 0x%08X\n", reg, - (val & mask), (write & mask)); - *data = E1000_REGISTER(hw, reg); - return 1; - } - - return 0; -} - -#define REG_PATTERN_TEST(reg, mask, write) \ - do { \ - if (reg_pattern_test(adapter, data, reg, mask, write)) \ - return 1; \ - } while (0) - -#define REG_SET_AND_CHECK(reg, mask, write) \ - do { \ - if (reg_set_and_check(adapter, data, reg, mask, write)) \ - return 1; \ - } while (0) - -static int igb_reg_test(struct igb_adapter *adapter, u64 *data) -{ - struct e1000_hw *hw = &adapter->hw; - struct igb_reg_test *test; - u32 value, before, after; - u32 i, toggle; - - switch (adapter->hw.mac.type) { - case e1000_i350: - case e1000_i354: - test = reg_test_i350; - toggle = 0x7FEFF3FF; - break; - case e1000_i210: - case e1000_i211: - test = reg_test_i210; - toggle = 0x7FEFF3FF; - break; - case e1000_82580: - test = reg_test_82580; - toggle = 0x7FEFF3FF; - break; - case e1000_82576: - test = reg_test_82576; - toggle = 0x7FFFF3FF; - break; - default: - test = reg_test_82575; - toggle = 0x7FFFF3FF; - break; - } - - /* Because the status register is such a special case, - * we handle it separately from the rest of the register - * tests. Some bits are read-only, some toggle, and some - * are writable on newer MACs. - */ - before = E1000_READ_REG(hw, E1000_STATUS); - value = (E1000_READ_REG(hw, E1000_STATUS) & toggle); - E1000_WRITE_REG(hw, E1000_STATUS, toggle); - after = E1000_READ_REG(hw, E1000_STATUS) & toggle; - if (value != after) { - dev_err(pci_dev_to_dev(adapter->pdev), "failed STATUS register test " - "got: 0x%08X expected: 0x%08X\n", after, value); - *data = 1; - return 1; - } - /* restore previous status */ - E1000_WRITE_REG(hw, E1000_STATUS, before); - - /* Perform the remainder of the register test, looping through - * the test table until we either fail or reach the null entry. - */ - while (test->reg) { - for (i = 0; i < test->array_len; i++) { - switch (test->test_type) { - case PATTERN_TEST: - REG_PATTERN_TEST(test->reg + - (i * test->reg_offset), - test->mask, - test->write); - break; - case SET_READ_TEST: - REG_SET_AND_CHECK(test->reg + - (i * test->reg_offset), - test->mask, - test->write); - break; - case WRITE_NO_TEST: - writel(test->write, - (adapter->hw.hw_addr + test->reg) - + (i * test->reg_offset)); - break; - case TABLE32_TEST: - REG_PATTERN_TEST(test->reg + (i * 4), - test->mask, - test->write); - break; - case TABLE64_TEST_LO: - REG_PATTERN_TEST(test->reg + (i * 8), - test->mask, - test->write); - break; - case TABLE64_TEST_HI: - REG_PATTERN_TEST((test->reg + 4) + (i * 8), - test->mask, - test->write); - break; - } - } - test++; - } - - *data = 0; - return 0; -} - -static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data) -{ - *data = 0; - - /* Validate NVM checksum */ - if (e1000_validate_nvm_checksum(&adapter->hw) < 0) - *data = 2; - - return *data; -} - -static irqreturn_t igb_test_intr(int irq, void *data) -{ - struct igb_adapter *adapter = data; - struct e1000_hw *hw = &adapter->hw; - - adapter->test_icr |= E1000_READ_REG(hw, E1000_ICR); - - return IRQ_HANDLED; -} - -static int igb_intr_test(struct igb_adapter *adapter, u64 *data) -{ - struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - u32 mask, ics_mask, i = 0, shared_int = TRUE; - u32 irq = adapter->pdev->irq; - - *data = 0; - - /* Hook up test interrupt handler just for this test */ - if (adapter->msix_entries) { - if (request_irq(adapter->msix_entries[0].vector, - &igb_test_intr, 0, netdev->name, adapter)) { - *data = 1; - return -1; - } - } else if (adapter->flags & IGB_FLAG_HAS_MSI) { - shared_int = FALSE; - if (request_irq(irq, - igb_test_intr, 0, netdev->name, adapter)) { - *data = 1; - return -1; - } - } else if (!request_irq(irq, igb_test_intr, IRQF_PROBE_SHARED, - netdev->name, adapter)) { - shared_int = FALSE; - } else if (request_irq(irq, &igb_test_intr, IRQF_SHARED, - netdev->name, adapter)) { - *data = 1; - return -1; - } - dev_info(pci_dev_to_dev(adapter->pdev), "testing %s interrupt\n", - (shared_int ? "shared" : "unshared")); - - /* Disable all the interrupts */ - E1000_WRITE_REG(hw, E1000_IMC, ~0); - E1000_WRITE_FLUSH(hw); - usleep_range(10000, 20000); - - /* Define all writable bits for ICS */ - switch (hw->mac.type) { - case e1000_82575: - ics_mask = 0x37F47EDD; - break; - case e1000_82576: - ics_mask = 0x77D4FBFD; - break; - case e1000_82580: - ics_mask = 0x77DCFED5; - break; - case e1000_i350: - case e1000_i354: - ics_mask = 0x77DCFED5; - break; - case e1000_i210: - case e1000_i211: - ics_mask = 0x774CFED5; - break; - default: - ics_mask = 0x7FFFFFFF; - break; - } - - /* Test each interrupt */ - for (; i < 31; i++) { - /* Interrupt to test */ - mask = 1 << i; - - if (!(mask & ics_mask)) - continue; - - if (!shared_int) { - /* Disable the interrupt to be reported in - * the cause register and then force the same - * interrupt and see if one gets posted. If - * an interrupt was posted to the bus, the - * test failed. - */ - adapter->test_icr = 0; - - /* Flush any pending interrupts */ - E1000_WRITE_REG(hw, E1000_ICR, ~0); - - E1000_WRITE_REG(hw, E1000_IMC, mask); - E1000_WRITE_REG(hw, E1000_ICS, mask); - E1000_WRITE_FLUSH(hw); - usleep_range(10000, 20000); - - if (adapter->test_icr & mask) { - *data = 3; - break; - } - } - - /* Enable the interrupt to be reported in - * the cause register and then force the same - * interrupt and see if one gets posted. If - * an interrupt was not posted to the bus, the - * test failed. - */ - adapter->test_icr = 0; - - /* Flush any pending interrupts */ - E1000_WRITE_REG(hw, E1000_ICR, ~0); - - E1000_WRITE_REG(hw, E1000_IMS, mask); - E1000_WRITE_REG(hw, E1000_ICS, mask); - E1000_WRITE_FLUSH(hw); - usleep_range(10000, 20000); - - if (!(adapter->test_icr & mask)) { - *data = 4; - break; - } - - if (!shared_int) { - /* Disable the other interrupts to be reported in - * the cause register and then force the other - * interrupts and see if any get posted. If - * an interrupt was posted to the bus, the - * test failed. - */ - adapter->test_icr = 0; - - /* Flush any pending interrupts */ - E1000_WRITE_REG(hw, E1000_ICR, ~0); - - E1000_WRITE_REG(hw, E1000_IMC, ~mask); - E1000_WRITE_REG(hw, E1000_ICS, ~mask); - E1000_WRITE_FLUSH(hw); - usleep_range(10000, 20000); - - if (adapter->test_icr & mask) { - *data = 5; - break; - } - } - } - - /* Disable all the interrupts */ - E1000_WRITE_REG(hw, E1000_IMC, ~0); - E1000_WRITE_FLUSH(hw); - usleep_range(10000, 20000); - - /* Unhook test interrupt handler */ - if (adapter->msix_entries) - free_irq(adapter->msix_entries[0].vector, adapter); - else - free_irq(irq, adapter); - - return *data; -} - -static void igb_free_desc_rings(struct igb_adapter *adapter) -{ - igb_free_tx_resources(&adapter->test_tx_ring); - igb_free_rx_resources(&adapter->test_rx_ring); -} - -static int igb_setup_desc_rings(struct igb_adapter *adapter) -{ - struct igb_ring *tx_ring = &adapter->test_tx_ring; - struct igb_ring *rx_ring = &adapter->test_rx_ring; - struct e1000_hw *hw = &adapter->hw; - int ret_val; - - /* Setup Tx descriptor ring and Tx buffers */ - tx_ring->count = IGB_DEFAULT_TXD; - tx_ring->dev = pci_dev_to_dev(adapter->pdev); - tx_ring->netdev = adapter->netdev; - tx_ring->reg_idx = adapter->vfs_allocated_count; - - if (igb_setup_tx_resources(tx_ring)) { - ret_val = 1; - goto err_nomem; - } - - igb_setup_tctl(adapter); - igb_configure_tx_ring(adapter, tx_ring); - - /* Setup Rx descriptor ring and Rx buffers */ - rx_ring->count = IGB_DEFAULT_RXD; - rx_ring->dev = pci_dev_to_dev(adapter->pdev); - rx_ring->netdev = adapter->netdev; -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - rx_ring->rx_buffer_len = IGB_RX_HDR_LEN; -#endif - rx_ring->reg_idx = adapter->vfs_allocated_count; - - if (igb_setup_rx_resources(rx_ring)) { - ret_val = 2; - goto err_nomem; - } - - /* set the default queue to queue 0 of PF */ - E1000_WRITE_REG(hw, E1000_MRQC, adapter->vfs_allocated_count << 3); - - /* enable receive ring */ - igb_setup_rctl(adapter); - igb_configure_rx_ring(adapter, rx_ring); - - igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring)); - - return 0; - -err_nomem: - igb_free_desc_rings(adapter); - return ret_val; -} - -static void igb_phy_disable_receiver(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - - /* Write out to PHY registers 29 and 30 to disable the Receiver. */ - e1000_write_phy_reg(hw, 29, 0x001F); - e1000_write_phy_reg(hw, 30, 0x8FFC); - e1000_write_phy_reg(hw, 29, 0x001A); - e1000_write_phy_reg(hw, 30, 0x8FF0); -} - -static int igb_integrated_phy_loopback(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 ctrl_reg = 0; - - hw->mac.autoneg = FALSE; - - if (hw->phy.type == e1000_phy_m88) { - if (hw->phy.id != I210_I_PHY_ID) { - /* Auto-MDI/MDIX Off */ - e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808); - /* reset to update Auto-MDI/MDIX */ - e1000_write_phy_reg(hw, PHY_CONTROL, 0x9140); - /* autoneg off */ - e1000_write_phy_reg(hw, PHY_CONTROL, 0x8140); - } else { - /* force 1000, set loopback */ - e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); - e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140); - } - } else { - /* enable MII loopback */ - if (hw->phy.type == e1000_phy_82580) - e1000_write_phy_reg(hw, I82577_PHY_LBK_CTRL, 0x8041); - } - - /* force 1000, set loopback */ - e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140); - - /* Now set up the MAC to the same speed/duplex as the PHY. */ - ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); - ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */ - ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */ - E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */ - E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */ - E1000_CTRL_FD | /* Force Duplex to FULL */ - E1000_CTRL_SLU); /* Set link up enable bit */ - - if (hw->phy.type == e1000_phy_m88) - ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */ - - E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); - - /* Disable the receiver on the PHY so when a cable is plugged in, the - * PHY does not begin to autoneg when a cable is reconnected to the NIC. - */ - if (hw->phy.type == e1000_phy_m88) - igb_phy_disable_receiver(adapter); - - mdelay(500); - return 0; -} - -static int igb_set_phy_loopback(struct igb_adapter *adapter) -{ - return igb_integrated_phy_loopback(adapter); -} - -static int igb_setup_loopback_test(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 reg; - - reg = E1000_READ_REG(hw, E1000_CTRL_EXT); - - /* use CTRL_EXT to identify link type as SGMII can appear as copper */ - if (reg & E1000_CTRL_EXT_LINK_MODE_MASK) { - if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) || - (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) || - (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) || - (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) { - - /* Enable DH89xxCC MPHY for near end loopback */ - reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL); - reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK) | - E1000_MPHY_PCS_CLK_REG_OFFSET; - E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg); - - reg = E1000_READ_REG(hw, E1000_MPHY_DATA); - reg |= E1000_MPHY_PCS_CLK_REG_DIGINELBEN; - E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg); - } - - reg = E1000_READ_REG(hw, E1000_RCTL); - reg |= E1000_RCTL_LBM_TCVR; - E1000_WRITE_REG(hw, E1000_RCTL, reg); - - E1000_WRITE_REG(hw, E1000_SCTL, E1000_ENABLE_SERDES_LOOPBACK); - - reg = E1000_READ_REG(hw, E1000_CTRL); - reg &= ~(E1000_CTRL_RFCE | - E1000_CTRL_TFCE | - E1000_CTRL_LRST); - reg |= E1000_CTRL_SLU | - E1000_CTRL_FD; - E1000_WRITE_REG(hw, E1000_CTRL, reg); - - /* Unset switch control to serdes energy detect */ - reg = E1000_READ_REG(hw, E1000_CONNSW); - reg &= ~E1000_CONNSW_ENRGSRC; - E1000_WRITE_REG(hw, E1000_CONNSW, reg); - - /* Unset sigdetect for SERDES loopback on - * 82580 and newer devices - */ - if (hw->mac.type >= e1000_82580) { - reg = E1000_READ_REG(hw, E1000_PCS_CFG0); - reg |= E1000_PCS_CFG_IGN_SD; - E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); - } - - /* Set PCS register for forced speed */ - reg = E1000_READ_REG(hw, E1000_PCS_LCTL); - reg &= ~E1000_PCS_LCTL_AN_ENABLE; /* Disable Autoneg*/ - reg |= E1000_PCS_LCTL_FLV_LINK_UP | /* Force link up */ - E1000_PCS_LCTL_FSV_1000 | /* Force 1000 */ - E1000_PCS_LCTL_FDV_FULL | /* SerDes Full duplex */ - E1000_PCS_LCTL_FSD | /* Force Speed */ - E1000_PCS_LCTL_FORCE_LINK; /* Force Link */ - E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg); - - return 0; - } - - return igb_set_phy_loopback(adapter); -} - -static void igb_loopback_cleanup(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 rctl; - u16 phy_reg; - - if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) || - (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) || - (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) || - (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) { - u32 reg; - - /* Disable near end loopback on DH89xxCC */ - reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL); - reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK ) | - E1000_MPHY_PCS_CLK_REG_OFFSET; - E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg); - - reg = E1000_READ_REG(hw, E1000_MPHY_DATA); - reg &= ~E1000_MPHY_PCS_CLK_REG_DIGINELBEN; - E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg); - } - - rctl = E1000_READ_REG(hw, E1000_RCTL); - rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - - hw->mac.autoneg = TRUE; - e1000_read_phy_reg(hw, PHY_CONTROL, &phy_reg); - if (phy_reg & MII_CR_LOOPBACK) { - phy_reg &= ~MII_CR_LOOPBACK; - if (hw->phy.type == I210_I_PHY_ID) - e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); - e1000_write_phy_reg(hw, PHY_CONTROL, phy_reg); - e1000_phy_commit(hw); - } -} -static void igb_create_lbtest_frame(struct sk_buff *skb, - unsigned int frame_size) -{ - memset(skb->data, 0xFF, frame_size); - frame_size /= 2; - memset(&skb->data[frame_size], 0xAA, frame_size - 1); - memset(&skb->data[frame_size + 10], 0xBE, 1); - memset(&skb->data[frame_size + 12], 0xAF, 1); -} - -static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer, - unsigned int frame_size) -{ - unsigned char *data; - bool match = true; - - frame_size >>= 1; - -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - data = rx_buffer->skb->data; -#else - data = kmap(rx_buffer->page); -#endif - - if (data[3] != 0xFF || - data[frame_size + 10] != 0xBE || - data[frame_size + 12] != 0xAF) - match = false; - -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT - kunmap(rx_buffer->page); - -#endif - return match; -} - -static u16 igb_clean_test_rings(struct igb_ring *rx_ring, - struct igb_ring *tx_ring, - unsigned int size) -{ - union e1000_adv_rx_desc *rx_desc; - struct igb_rx_buffer *rx_buffer_info; - struct igb_tx_buffer *tx_buffer_info; - u16 rx_ntc, tx_ntc, count = 0; - - /* initialize next to clean and descriptor values */ - rx_ntc = rx_ring->next_to_clean; - tx_ntc = tx_ring->next_to_clean; - rx_desc = IGB_RX_DESC(rx_ring, rx_ntc); - - while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { - /* check rx buffer */ - rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc]; - - /* sync Rx buffer for CPU read */ - dma_sync_single_for_cpu(rx_ring->dev, - rx_buffer_info->dma, -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - IGB_RX_HDR_LEN, -#else - IGB_RX_BUFSZ, -#endif - DMA_FROM_DEVICE); - - /* verify contents of skb */ - if (igb_check_lbtest_frame(rx_buffer_info, size)) - count++; - - /* sync Rx buffer for device write */ - dma_sync_single_for_device(rx_ring->dev, - rx_buffer_info->dma, -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - IGB_RX_HDR_LEN, -#else - IGB_RX_BUFSZ, -#endif - DMA_FROM_DEVICE); - - /* unmap buffer on tx side */ - tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc]; - igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - - /* increment rx/tx next to clean counters */ - rx_ntc++; - if (rx_ntc == rx_ring->count) - rx_ntc = 0; - tx_ntc++; - if (tx_ntc == tx_ring->count) - tx_ntc = 0; - - /* fetch next descriptor */ - rx_desc = IGB_RX_DESC(rx_ring, rx_ntc); - } - - /* re-map buffers to ring, store next to clean values */ - igb_alloc_rx_buffers(rx_ring, count); - rx_ring->next_to_clean = rx_ntc; - tx_ring->next_to_clean = tx_ntc; - - return count; -} - -static int igb_run_loopback_test(struct igb_adapter *adapter) -{ - struct igb_ring *tx_ring = &adapter->test_tx_ring; - struct igb_ring *rx_ring = &adapter->test_rx_ring; - u16 i, j, lc, good_cnt; - int ret_val = 0; - unsigned int size = IGB_RX_HDR_LEN; - netdev_tx_t tx_ret_val; - struct sk_buff *skb; - - /* allocate test skb */ - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) - return 11; - - /* place data into test skb */ - igb_create_lbtest_frame(skb, size); - skb_put(skb, size); - - /* - * Calculate the loop count based on the largest descriptor ring - * The idea is to wrap the largest ring a number of times using 64 - * send/receive pairs during each loop - */ - - if (rx_ring->count <= tx_ring->count) - lc = ((tx_ring->count / 64) * 2) + 1; - else - lc = ((rx_ring->count / 64) * 2) + 1; - - for (j = 0; j <= lc; j++) { /* loop count loop */ - /* reset count of good packets */ - good_cnt = 0; - - /* place 64 packets on the transmit queue*/ - for (i = 0; i < 64; i++) { - skb_get(skb); - tx_ret_val = igb_xmit_frame_ring(skb, tx_ring); - if (tx_ret_val == NETDEV_TX_OK) - good_cnt++; - } - - if (good_cnt != 64) { - ret_val = 12; - break; - } - - /* allow 200 milliseconds for packets to go from tx to rx */ - msleep(200); - - good_cnt = igb_clean_test_rings(rx_ring, tx_ring, size); - if (good_cnt != 64) { - ret_val = 13; - break; - } - } /* end loop count loop */ - - /* free the original skb */ - kfree_skb(skb); - - return ret_val; -} - -static int igb_loopback_test(struct igb_adapter *adapter, u64 *data) -{ - /* PHY loopback cannot be performed if SoL/IDER - * sessions are active */ - if (e1000_check_reset_block(&adapter->hw)) { - dev_err(pci_dev_to_dev(adapter->pdev), - "Cannot do PHY loopback test " - "when SoL/IDER is active.\n"); - *data = 0; - goto out; - } - if (adapter->hw.mac.type == e1000_i354) { - dev_info(&adapter->pdev->dev, - "Loopback test not supported on i354.\n"); - *data = 0; - goto out; - } - *data = igb_setup_desc_rings(adapter); - if (*data) - goto out; - *data = igb_setup_loopback_test(adapter); - if (*data) - goto err_loopback; - *data = igb_run_loopback_test(adapter); - - igb_loopback_cleanup(adapter); - -err_loopback: - igb_free_desc_rings(adapter); -out: - return *data; -} - -static int igb_link_test(struct igb_adapter *adapter, u64 *data) -{ - u32 link; - int i, time; - - *data = 0; - time = 0; - if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { - int i = 0; - adapter->hw.mac.serdes_has_link = FALSE; - - /* On some blade server designs, link establishment - * could take as long as 2-3 minutes */ - do { - e1000_check_for_link(&adapter->hw); - if (adapter->hw.mac.serdes_has_link) - goto out; - msleep(20); - } while (i++ < 3750); - - *data = 1; - } else { - for (i=0; i < IGB_MAX_LINK_TRIES; i++) { - link = igb_has_link(adapter); - if (link) - goto out; - else { - time++; - msleep(1000); - } - } - if (!link) - *data = 1; - } - out: - return *data; -} - -static void igb_diag_test(struct net_device *netdev, - struct ethtool_test *eth_test, u64 *data) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - u16 autoneg_advertised; - u8 forced_speed_duplex, autoneg; - bool if_running = netif_running(netdev); - - set_bit(__IGB_TESTING, &adapter->state); - if (eth_test->flags == ETH_TEST_FL_OFFLINE) { - /* Offline tests */ - - /* save speed, duplex, autoneg settings */ - autoneg_advertised = adapter->hw.phy.autoneg_advertised; - forced_speed_duplex = adapter->hw.mac.forced_speed_duplex; - autoneg = adapter->hw.mac.autoneg; - - dev_info(pci_dev_to_dev(adapter->pdev), "offline testing starting\n"); - - /* power up link for link test */ - igb_power_up_link(adapter); - - /* Link test performed before hardware reset so autoneg doesn't - * interfere with test result */ - if (igb_link_test(adapter, &data[4])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - if (if_running) - /* indicate we're in test mode */ - dev_close(netdev); - else - igb_reset(adapter); - - if (igb_reg_test(adapter, &data[0])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - igb_reset(adapter); - if (igb_eeprom_test(adapter, &data[1])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - igb_reset(adapter); - if (igb_intr_test(adapter, &data[2])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - igb_reset(adapter); - - /* power up link for loopback test */ - igb_power_up_link(adapter); - - if (igb_loopback_test(adapter, &data[3])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - /* restore speed, duplex, autoneg settings */ - adapter->hw.phy.autoneg_advertised = autoneg_advertised; - adapter->hw.mac.forced_speed_duplex = forced_speed_duplex; - adapter->hw.mac.autoneg = autoneg; - - /* force this routine to wait until autoneg complete/timeout */ - adapter->hw.phy.autoneg_wait_to_complete = TRUE; - igb_reset(adapter); - adapter->hw.phy.autoneg_wait_to_complete = FALSE; - - clear_bit(__IGB_TESTING, &adapter->state); - if (if_running) - dev_open(netdev); - } else { - dev_info(pci_dev_to_dev(adapter->pdev), "online testing starting\n"); - - /* PHY is powered down when interface is down */ - if (if_running && igb_link_test(adapter, &data[4])) - eth_test->flags |= ETH_TEST_FL_FAILED; - else - data[4] = 0; - - /* Online tests aren't run; pass by default */ - data[0] = 0; - data[1] = 0; - data[2] = 0; - data[3] = 0; - - clear_bit(__IGB_TESTING, &adapter->state); - } - msleep_interruptible(4 * 1000); -} - -static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - wol->supported = WAKE_UCAST | WAKE_MCAST | - WAKE_BCAST | WAKE_MAGIC | - WAKE_PHY; - wol->wolopts = 0; - - if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) - return; - - /* apply any specific unsupported masks here */ - switch (adapter->hw.device_id) { - default: - break; - } - - if (adapter->wol & E1000_WUFC_EX) - wol->wolopts |= WAKE_UCAST; - if (adapter->wol & E1000_WUFC_MC) - wol->wolopts |= WAKE_MCAST; - if (adapter->wol & E1000_WUFC_BC) - wol->wolopts |= WAKE_BCAST; - if (adapter->wol & E1000_WUFC_MAG) - wol->wolopts |= WAKE_MAGIC; - if (adapter->wol & E1000_WUFC_LNKC) - wol->wolopts |= WAKE_PHY; -} - -static int igb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE)) - return -EOPNOTSUPP; - - if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) - return wol->wolopts ? -EOPNOTSUPP : 0; - - /* these settings will always override what we currently have */ - adapter->wol = 0; - - if (wol->wolopts & WAKE_UCAST) - adapter->wol |= E1000_WUFC_EX; - if (wol->wolopts & WAKE_MCAST) - adapter->wol |= E1000_WUFC_MC; - if (wol->wolopts & WAKE_BCAST) - adapter->wol |= E1000_WUFC_BC; - if (wol->wolopts & WAKE_MAGIC) - adapter->wol |= E1000_WUFC_MAG; - if (wol->wolopts & WAKE_PHY) - adapter->wol |= E1000_WUFC_LNKC; - device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); - - return 0; -} - -/* bit defines for adapter->led_status */ -#ifdef HAVE_ETHTOOL_SET_PHYS_ID -static int igb_set_phys_id(struct net_device *netdev, - enum ethtool_phys_id_state state) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - - switch (state) { - case ETHTOOL_ID_ACTIVE: - e1000_blink_led(hw); - return 2; - case ETHTOOL_ID_ON: - e1000_led_on(hw); - break; - case ETHTOOL_ID_OFF: - e1000_led_off(hw); - break; - case ETHTOOL_ID_INACTIVE: - e1000_led_off(hw); - e1000_cleanup_led(hw); - break; - } - - return 0; -} -#else -static int igb_phys_id(struct net_device *netdev, u32 data) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - unsigned long timeout; - - timeout = data * 1000; - - /* - * msleep_interruptable only accepts unsigned int so we are limited - * in how long a duration we can wait - */ - if (!timeout || timeout > UINT_MAX) - timeout = UINT_MAX; - - e1000_blink_led(hw); - msleep_interruptible(timeout); - - e1000_led_off(hw); - e1000_cleanup_led(hw); - - return 0; -} -#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ - -static int igb_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - int i; - - if ((ec->rx_coalesce_usecs > IGB_MAX_ITR_USECS) || - ((ec->rx_coalesce_usecs > 3) && - (ec->rx_coalesce_usecs < IGB_MIN_ITR_USECS)) || - (ec->rx_coalesce_usecs == 2)) - { - printk("set_coalesce:invalid parameter.."); - return -EINVAL; - } - - if ((ec->tx_coalesce_usecs > IGB_MAX_ITR_USECS) || - ((ec->tx_coalesce_usecs > 3) && - (ec->tx_coalesce_usecs < IGB_MIN_ITR_USECS)) || - (ec->tx_coalesce_usecs == 2)) - return -EINVAL; - - if ((adapter->flags & IGB_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs) - return -EINVAL; - - if (ec->tx_max_coalesced_frames_irq) - adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq; - - /* If ITR is disabled, disable DMAC */ - if (ec->rx_coalesce_usecs == 0) { - adapter->dmac = IGB_DMAC_DISABLE; - } - - /* convert to rate of irq's per second */ - if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3) - adapter->rx_itr_setting = ec->rx_coalesce_usecs; - else - adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2; - - /* convert to rate of irq's per second */ - if (adapter->flags & IGB_FLAG_QUEUE_PAIRS) - adapter->tx_itr_setting = adapter->rx_itr_setting; - else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3) - adapter->tx_itr_setting = ec->tx_coalesce_usecs; - else - adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2; - - for (i = 0; i < adapter->num_q_vectors; i++) { - struct igb_q_vector *q_vector = adapter->q_vector[i]; - q_vector->tx.work_limit = adapter->tx_work_limit; - if (q_vector->rx.ring) - q_vector->itr_val = adapter->rx_itr_setting; - else - q_vector->itr_val = adapter->tx_itr_setting; - if (q_vector->itr_val && q_vector->itr_val <= 3) - q_vector->itr_val = IGB_START_ITR; - q_vector->set_itr = 1; - } - - return 0; -} - -static int igb_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - if (adapter->rx_itr_setting <= 3) - ec->rx_coalesce_usecs = adapter->rx_itr_setting; - else - ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; - - ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit; - - if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) { - if (adapter->tx_itr_setting <= 3) - ec->tx_coalesce_usecs = adapter->tx_itr_setting; - else - ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; - } - - return 0; -} - -static int igb_nway_reset(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - if (netif_running(netdev)) - igb_reinit_locked(adapter); - return 0; -} - -#ifdef HAVE_ETHTOOL_GET_SSET_COUNT -static int igb_get_sset_count(struct net_device *netdev, int sset) -{ - switch (sset) { - case ETH_SS_STATS: - return IGB_STATS_LEN; - case ETH_SS_TEST: - return IGB_TEST_LEN; - default: - return -ENOTSUPP; - } -} -#else -static int igb_get_stats_count(struct net_device *netdev) -{ - return IGB_STATS_LEN; -} - -static int igb_diag_test_count(struct net_device *netdev) -{ - return IGB_TEST_LEN; -} -#endif - -static void igb_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats *stats, u64 *data) -{ - struct igb_adapter *adapter = netdev_priv(netdev); -#ifdef HAVE_NETDEV_STATS_IN_NETDEV - struct net_device_stats *net_stats = &netdev->stats; -#else - struct net_device_stats *net_stats = &adapter->net_stats; -#endif - u64 *queue_stat; - int i, j, k; - char *p; - - igb_update_stats(adapter); - - for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { - p = (char *)adapter + igb_gstrings_stats[i].stat_offset; - data[i] = (igb_gstrings_stats[i].sizeof_stat == - sizeof(u64)) ? *(u64 *)p : *(u32 *)p; - } - for (j = 0; j < IGB_NETDEV_STATS_LEN; j++, i++) { - p = (char *)net_stats + igb_gstrings_net_stats[j].stat_offset; - data[i] = (igb_gstrings_net_stats[j].sizeof_stat == - sizeof(u64)) ? *(u64 *)p : *(u32 *)p; - } - for (j = 0; j < adapter->num_tx_queues; j++) { - queue_stat = (u64 *)&adapter->tx_ring[j]->tx_stats; - for (k = 0; k < IGB_TX_QUEUE_STATS_LEN; k++, i++) - data[i] = queue_stat[k]; - } - for (j = 0; j < adapter->num_rx_queues; j++) { - queue_stat = (u64 *)&adapter->rx_ring[j]->rx_stats; - for (k = 0; k < IGB_RX_QUEUE_STATS_LEN; k++, i++) - data[i] = queue_stat[k]; - } -} - -static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - u8 *p = data; - int i; - - switch (stringset) { - case ETH_SS_TEST: - memcpy(data, *igb_gstrings_test, - IGB_TEST_LEN*ETH_GSTRING_LEN); - break; - case ETH_SS_STATS: - for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { - memcpy(p, igb_gstrings_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) { - memcpy(p, igb_gstrings_net_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < adapter->num_tx_queues; i++) { - sprintf(p, "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_restart", i); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < adapter->num_rx_queues; i++) { - sprintf(p, "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_drops", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_csum_err", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_alloc_failed", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_ipv4_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_ipv4e_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_ipv6_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_ipv6e_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_tcp_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_udp_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_sctp_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_nfs_packets", i); - p += ETH_GSTRING_LEN; - } -/* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */ - break; - } -} - -#ifdef HAVE_ETHTOOL_GET_TS_INFO -static int igb_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) -{ - struct igb_adapter *adapter = netdev_priv(dev); - - switch (adapter->hw.mac.type) { -#ifdef HAVE_PTP_1588_CLOCK - case e1000_82575: - info->so_timestamping = - SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; - return 0; - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - info->so_timestamping = - SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; - - if (adapter->ptp_clock) - info->phc_index = ptp_clock_index(adapter->ptp_clock); - else - info->phc_index = -1; - - info->tx_types = - (1 << HWTSTAMP_TX_OFF) | - (1 << HWTSTAMP_TX_ON); - - info->rx_filters = 1 << HWTSTAMP_FILTER_NONE; - - /* 82576 does not support timestamping all packets. */ - if (adapter->hw.mac.type >= e1000_82580) - info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL; - else - info->rx_filters |= - (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | - (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | - (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | - (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | - (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | - (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | - (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); - - return 0; -#endif /* HAVE_PTP_1588_CLOCK */ - default: - return -EOPNOTSUPP; - } -} -#endif /* HAVE_ETHTOOL_GET_TS_INFO */ - -#ifdef CONFIG_PM_RUNTIME -static int igb_ethtool_begin(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - pm_runtime_get_sync(&adapter->pdev->dev); - - return 0; -} - -static void igb_ethtool_complete(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - pm_runtime_put(&adapter->pdev->dev); -} -#endif /* CONFIG_PM_RUNTIME */ - -#ifndef HAVE_NDO_SET_FEATURES -static u32 igb_get_rx_csum(struct net_device *netdev) -{ - return !!(netdev->features & NETIF_F_RXCSUM); -} - -static int igb_set_rx_csum(struct net_device *netdev, u32 data) -{ - const u32 feature_list = NETIF_F_RXCSUM; - - if (data) - netdev->features |= feature_list; - else - netdev->features &= ~feature_list; - - return 0; -} - -static int igb_set_tx_csum(struct net_device *netdev, u32 data) -{ - struct igb_adapter *adapter = netdev_priv(netdev); -#ifdef NETIF_F_IPV6_CSUM - u32 feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; -#else - u32 feature_list = NETIF_F_IP_CSUM; -#endif - - if (adapter->hw.mac.type >= e1000_82576) - feature_list |= NETIF_F_SCTP_CSUM; - - if (data) - netdev->features |= feature_list; - else - netdev->features &= ~feature_list; - - return 0; -} - -#ifdef NETIF_F_TSO -static int igb_set_tso(struct net_device *netdev, u32 data) -{ -#ifdef NETIF_F_TSO6 - const u32 feature_list = NETIF_F_TSO | NETIF_F_TSO6; -#else - const u32 feature_list = NETIF_F_TSO; -#endif - - if (data) - netdev->features |= feature_list; - else - netdev->features &= ~feature_list; - -#ifndef HAVE_NETDEV_VLAN_FEATURES - if (!data) { - struct igb_adapter *adapter = netdev_priv(netdev); - struct net_device *v_netdev; - int i; - - /* disable TSO on all VLANs if they're present */ - if (!adapter->vlgrp) - goto tso_out; - - for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { - v_netdev = vlan_group_get_device(adapter->vlgrp, i); - if (!v_netdev) - continue; - - v_netdev->features &= ~feature_list; - vlan_group_set_device(adapter->vlgrp, i, v_netdev); - } - } - -tso_out: - -#endif /* HAVE_NETDEV_VLAN_FEATURES */ - return 0; -} - -#endif /* NETIF_F_TSO */ -#ifdef ETHTOOL_GFLAGS -static int igb_set_flags(struct net_device *netdev, u32 data) -{ - u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | - ETH_FLAG_RXHASH; -#ifndef HAVE_VLAN_RX_REGISTER - u32 changed = netdev->features ^ data; -#endif - int rc; -#ifndef IGB_NO_LRO - - supported_flags |= ETH_FLAG_LRO; -#endif - /* - * Since there is no support for separate tx vlan accel - * enabled make sure tx flag is cleared if rx is. - */ - if (!(data & ETH_FLAG_RXVLAN)) - data &= ~ETH_FLAG_TXVLAN; - - rc = ethtool_op_set_flags(netdev, data, supported_flags); - if (rc) - return rc; -#ifndef HAVE_VLAN_RX_REGISTER - - if (changed & ETH_FLAG_RXVLAN) - igb_vlan_mode(netdev, data); -#endif - - return 0; -} - -#endif /* ETHTOOL_GFLAGS */ -#endif /* HAVE_NDO_SET_FEATURES */ -#ifdef ETHTOOL_SADV_COAL -static int igb_set_adv_coal(struct net_device *netdev, struct ethtool_value *edata) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - switch (edata->data) { - case IGB_DMAC_DISABLE: - adapter->dmac = edata->data; - break; - case IGB_DMAC_MIN: - adapter->dmac = edata->data; - break; - case IGB_DMAC_500: - adapter->dmac = edata->data; - break; - case IGB_DMAC_EN_DEFAULT: - adapter->dmac = edata->data; - break; - case IGB_DMAC_2000: - adapter->dmac = edata->data; - break; - case IGB_DMAC_3000: - adapter->dmac = edata->data; - break; - case IGB_DMAC_4000: - adapter->dmac = edata->data; - break; - case IGB_DMAC_5000: - adapter->dmac = edata->data; - break; - case IGB_DMAC_6000: - adapter->dmac = edata->data; - break; - case IGB_DMAC_7000: - adapter->dmac = edata->data; - break; - case IGB_DMAC_8000: - adapter->dmac = edata->data; - break; - case IGB_DMAC_9000: - adapter->dmac = edata->data; - break; - case IGB_DMAC_MAX: - adapter->dmac = edata->data; - break; - default: - adapter->dmac = IGB_DMAC_DISABLE; - printk("set_dmac: invalid setting, setting DMAC to %d\n", - adapter->dmac); - } - printk("%s: setting DMAC to %d\n", netdev->name, adapter->dmac); - return 0; -} -#endif /* ETHTOOL_SADV_COAL */ -#ifdef ETHTOOL_GADV_COAL -static void igb_get_dmac(struct net_device *netdev, - struct ethtool_value *edata) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - edata->data = adapter->dmac; - - return; -} -#endif - -#ifdef ETHTOOL_GEEE -static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - u32 ret_val; - u16 phy_data; - - if ((hw->mac.type < e1000_i350) || - (hw->phy.media_type != e1000_media_type_copper)) - return -EOPNOTSUPP; - - edata->supported = (SUPPORTED_1000baseT_Full | - SUPPORTED_100baseT_Full); - - if (!hw->dev_spec._82575.eee_disable) - edata->advertised = - mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert); - - /* The IPCNFG and EEER registers are not supported on I354. */ - if (hw->mac.type == e1000_i354) { - e1000_get_eee_status_i354(hw, (bool *)&edata->eee_active); - } else { - u32 eeer; - - eeer = E1000_READ_REG(hw, E1000_EEER); - - /* EEE status on negotiated link */ - if (eeer & E1000_EEER_EEE_NEG) - edata->eee_active = true; - - if (eeer & E1000_EEER_TX_LPI_EN) - edata->tx_lpi_enabled = true; - } - - /* EEE Link Partner Advertised */ - switch (hw->mac.type) { - case e1000_i350: - ret_val = e1000_read_emi_reg(hw, E1000_EEE_LP_ADV_ADDR_I350, - &phy_data); - if (ret_val) - return -ENODATA; - - edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); - - break; - case e1000_i354: - case e1000_i210: - case e1000_i211: - ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_LP_ADV_ADDR_I210, - E1000_EEE_LP_ADV_DEV_I210, - &phy_data); - if (ret_val) - return -ENODATA; - - edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); - - break; - default: - break; - } - - edata->eee_enabled = !hw->dev_spec._82575.eee_disable; - - if ((hw->mac.type == e1000_i354) && - (edata->eee_enabled)) - edata->tx_lpi_enabled = true; - - /* - * report correct negotiated EEE status for devices that - * wrongly report EEE at half-duplex - */ - if (adapter->link_duplex == HALF_DUPLEX) { - edata->eee_enabled = false; - edata->eee_active = false; - edata->tx_lpi_enabled = false; - edata->advertised &= ~edata->advertised; - } - - return 0; -} -#endif - -#ifdef ETHTOOL_SEEE -static int igb_set_eee(struct net_device *netdev, - struct ethtool_eee *edata) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct ethtool_eee eee_curr; - s32 ret_val; - - if ((hw->mac.type < e1000_i350) || - (hw->phy.media_type != e1000_media_type_copper)) - return -EOPNOTSUPP; - - ret_val = igb_get_eee(netdev, &eee_curr); - if (ret_val) - return ret_val; - - if (eee_curr.eee_enabled) { - if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) { - dev_err(pci_dev_to_dev(adapter->pdev), - "Setting EEE tx-lpi is not supported\n"); - return -EINVAL; - } - - /* Tx LPI time is not implemented currently */ - if (edata->tx_lpi_timer) { - dev_err(pci_dev_to_dev(adapter->pdev), - "Setting EEE Tx LPI timer is not supported\n"); - return -EINVAL; - } - - if (edata->advertised & - ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) { - dev_err(pci_dev_to_dev(adapter->pdev), - "EEE Advertisement supports only 100Tx and or 100T full duplex\n"); - return -EINVAL; - } - - } else if (!edata->eee_enabled) { - dev_err(pci_dev_to_dev(adapter->pdev), - "Setting EEE options is not supported with EEE disabled\n"); - return -EINVAL; - } - - adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised); - - if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) { - hw->dev_spec._82575.eee_disable = !edata->eee_enabled; - - /* reset link */ - if (netif_running(netdev)) - igb_reinit_locked(adapter); - else - igb_reset(adapter); - } - - return 0; -} -#endif /* ETHTOOL_SEEE */ - -#ifdef ETHTOOL_GRXRINGS -static int igb_get_rss_hash_opts(struct igb_adapter *adapter, - struct ethtool_rxnfc *cmd) -{ - cmd->data = 0; - - /* Report default options for RSS on igb */ - switch (cmd->flow_type) { - case TCP_V4_FLOW: - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - case UDP_V4_FLOW: - if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - case SCTP_V4_FLOW: - case AH_ESP_V4_FLOW: - case AH_V4_FLOW: - case ESP_V4_FLOW: - case IPV4_FLOW: - cmd->data |= RXH_IP_SRC | RXH_IP_DST; - break; - case TCP_V6_FLOW: - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - case UDP_V6_FLOW: - if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - case SCTP_V6_FLOW: - case AH_ESP_V6_FLOW: - case AH_V6_FLOW: - case ESP_V6_FLOW: - case IPV6_FLOW: - cmd->data |= RXH_IP_SRC | RXH_IP_DST; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, -#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS - void *rule_locs) -#else - u32 *rule_locs) -#endif -{ - struct igb_adapter *adapter = netdev_priv(dev); - int ret = -EOPNOTSUPP; - - switch (cmd->cmd) { - case ETHTOOL_GRXRINGS: - cmd->data = adapter->num_rx_queues; - ret = 0; - break; - case ETHTOOL_GRXFH: - ret = igb_get_rss_hash_opts(adapter, cmd); - break; - default: - break; - } - - return ret; -} - -#define UDP_RSS_FLAGS (IGB_FLAG_RSS_FIELD_IPV4_UDP | \ - IGB_FLAG_RSS_FIELD_IPV6_UDP) -static int igb_set_rss_hash_opt(struct igb_adapter *adapter, - struct ethtool_rxnfc *nfc) -{ - u32 flags = adapter->flags; - - /* - * RSS does not support anything other than hashing - * to queues on src and dst IPs and ports - */ - if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | - RXH_L4_B_0_1 | RXH_L4_B_2_3)) - return -EINVAL; - - switch (nfc->flow_type) { - case TCP_V4_FLOW: - case TCP_V6_FLOW: - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST) || - !(nfc->data & RXH_L4_B_0_1) || - !(nfc->data & RXH_L4_B_2_3)) - return -EINVAL; - break; - case UDP_V4_FLOW: - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST)) - return -EINVAL; - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - flags &= ~IGB_FLAG_RSS_FIELD_IPV4_UDP; - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - flags |= IGB_FLAG_RSS_FIELD_IPV4_UDP; - break; - default: - return -EINVAL; - } - break; - case UDP_V6_FLOW: - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST)) - return -EINVAL; - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - flags &= ~IGB_FLAG_RSS_FIELD_IPV6_UDP; - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - flags |= IGB_FLAG_RSS_FIELD_IPV6_UDP; - break; - default: - return -EINVAL; - } - break; - case AH_ESP_V4_FLOW: - case AH_V4_FLOW: - case ESP_V4_FLOW: - case SCTP_V4_FLOW: - case AH_ESP_V6_FLOW: - case AH_V6_FLOW: - case ESP_V6_FLOW: - case SCTP_V6_FLOW: - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST) || - (nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) - return -EINVAL; - break; - default: - return -EINVAL; - } - - /* if we changed something we need to update flags */ - if (flags != adapter->flags) { - struct e1000_hw *hw = &adapter->hw; - u32 mrqc = E1000_READ_REG(hw, E1000_MRQC); - - if ((flags & UDP_RSS_FLAGS) && - !(adapter->flags & UDP_RSS_FLAGS)) - DPRINTK(DRV, WARNING, - "enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n"); - - adapter->flags = flags; - - /* Perform hash on these packet types */ - mrqc |= E1000_MRQC_RSS_FIELD_IPV4 | - E1000_MRQC_RSS_FIELD_IPV4_TCP | - E1000_MRQC_RSS_FIELD_IPV6 | - E1000_MRQC_RSS_FIELD_IPV6_TCP; - - mrqc &= ~(E1000_MRQC_RSS_FIELD_IPV4_UDP | - E1000_MRQC_RSS_FIELD_IPV6_UDP); - - if (flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) - mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP; - - if (flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) - mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP; - - E1000_WRITE_REG(hw, E1000_MRQC, mrqc); - } - - return 0; -} - -static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) -{ - struct igb_adapter *adapter = netdev_priv(dev); - int ret = -EOPNOTSUPP; - - switch (cmd->cmd) { - case ETHTOOL_SRXFH: - ret = igb_set_rss_hash_opt(adapter, cmd); - break; - default: - break; - } - - return ret; -} -#endif /* ETHTOOL_GRXRINGS */ - -static const struct ethtool_ops igb_ethtool_ops = { - .get_settings = igb_get_settings, - .set_settings = igb_set_settings, - .get_drvinfo = igb_get_drvinfo, - .get_regs_len = igb_get_regs_len, - .get_regs = igb_get_regs, - .get_wol = igb_get_wol, - .set_wol = igb_set_wol, - .get_msglevel = igb_get_msglevel, - .set_msglevel = igb_set_msglevel, - .nway_reset = igb_nway_reset, - .get_link = igb_get_link, - .get_eeprom_len = igb_get_eeprom_len, - .get_eeprom = igb_get_eeprom, - .set_eeprom = igb_set_eeprom, - .get_ringparam = igb_get_ringparam, - .set_ringparam = igb_set_ringparam, - .get_pauseparam = igb_get_pauseparam, - .set_pauseparam = igb_set_pauseparam, - .self_test = igb_diag_test, - .get_strings = igb_get_strings, -#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT -#ifdef HAVE_ETHTOOL_SET_PHYS_ID - .set_phys_id = igb_set_phys_id, -#else - .phys_id = igb_phys_id, -#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ -#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ -#ifdef HAVE_ETHTOOL_GET_SSET_COUNT - .get_sset_count = igb_get_sset_count, -#else - .get_stats_count = igb_get_stats_count, - .self_test_count = igb_diag_test_count, -#endif - .get_ethtool_stats = igb_get_ethtool_stats, -#ifdef HAVE_ETHTOOL_GET_PERM_ADDR - .get_perm_addr = ethtool_op_get_perm_addr, -#endif - .get_coalesce = igb_get_coalesce, - .set_coalesce = igb_set_coalesce, -#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT -#ifdef HAVE_ETHTOOL_GET_TS_INFO - .get_ts_info = igb_get_ts_info, -#endif /* HAVE_ETHTOOL_GET_TS_INFO */ -#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ -#ifdef CONFIG_PM_RUNTIME - .begin = igb_ethtool_begin, - .complete = igb_ethtool_complete, -#endif /* CONFIG_PM_RUNTIME */ -#ifndef HAVE_NDO_SET_FEATURES - .get_rx_csum = igb_get_rx_csum, - .set_rx_csum = igb_set_rx_csum, - .get_tx_csum = ethtool_op_get_tx_csum, - .set_tx_csum = igb_set_tx_csum, - .get_sg = ethtool_op_get_sg, - .set_sg = ethtool_op_set_sg, -#ifdef NETIF_F_TSO - .get_tso = ethtool_op_get_tso, - .set_tso = igb_set_tso, -#endif -#ifdef ETHTOOL_GFLAGS - .get_flags = ethtool_op_get_flags, - .set_flags = igb_set_flags, -#endif /* ETHTOOL_GFLAGS */ -#endif /* HAVE_NDO_SET_FEATURES */ -#ifdef ETHTOOL_GADV_COAL - .get_advcoal = igb_get_adv_coal, - .set_advcoal = igb_set_dmac_coal, -#endif /* ETHTOOL_GADV_COAL */ -#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT -#ifdef ETHTOOL_GEEE - .get_eee = igb_get_eee, -#endif -#ifdef ETHTOOL_SEEE - .set_eee = igb_set_eee, -#endif -#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ -#ifdef ETHTOOL_GRXRINGS - .get_rxnfc = igb_get_rxnfc, - .set_rxnfc = igb_set_rxnfc, -#endif -}; - -#ifdef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT -static const struct ethtool_ops_ext igb_ethtool_ops_ext = { - .size = sizeof(struct ethtool_ops_ext), - .get_ts_info = igb_get_ts_info, - .set_phys_id = igb_set_phys_id, - .get_eee = igb_get_eee, - .set_eee = igb_set_eee, -}; - -void igb_set_ethtool_ops(struct net_device *netdev) -{ - SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops); - set_ethtool_ops_ext(netdev, &igb_ethtool_ops_ext); -} -#else -void igb_set_ethtool_ops(struct net_device *netdev) -{ - /* have to "undeclare" const on this struct to remove warnings */ - SET_ETHTOOL_OPS(netdev, (struct ethtool_ops *)&igb_ethtool_ops); -} -#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ -#endif /* SIOCETHTOOL */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c deleted file mode 100644 index af378d2f..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c +++ /dev/null @@ -1,10344 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#ifdef NETIF_F_TSO -#include -#ifdef NETIF_F_TSO6 -#include -#include -#endif -#endif -#ifdef SIOCGMIIPHY -#include -#endif -#ifdef SIOCETHTOOL -#include -#endif -#include -#ifdef CONFIG_PM_RUNTIME -#include -#endif /* CONFIG_PM_RUNTIME */ - -#include -#include "igb.h" -#include "igb_vmdq.h" - -#include - -#if defined(DEBUG) || defined (DEBUG_DUMP) || defined (DEBUG_ICR) || defined(DEBUG_ITR) -#define DRV_DEBUG "_debug" -#else -#define DRV_DEBUG -#endif -#define DRV_HW_PERF -#define VERSION_SUFFIX - -#define MAJ 5 -#define MIN 0 -#define BUILD 6 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." __stringify(BUILD) VERSION_SUFFIX DRV_DEBUG DRV_HW_PERF - -char igb_driver_name[] = "igb"; -char igb_driver_version[] = DRV_VERSION; -static const char igb_driver_string[] = - "Intel(R) Gigabit Ethernet Network Driver"; -static const char igb_copyright[] = - "Copyright (c) 2007-2013 Intel Corporation."; - -const struct pci_device_id igb_pci_tbl[] = { - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) }, - /* required last entry */ - {0, } -}; - -//MODULE_DEVICE_TABLE(pci, igb_pci_tbl); -static void igb_set_sriov_capability(struct igb_adapter *adapter) __attribute__((__unused__)); -void igb_reset(struct igb_adapter *); -static int igb_setup_all_tx_resources(struct igb_adapter *); -static int igb_setup_all_rx_resources(struct igb_adapter *); -static void igb_free_all_tx_resources(struct igb_adapter *); -static void igb_free_all_rx_resources(struct igb_adapter *); -static void igb_setup_mrqc(struct igb_adapter *); -void igb_update_stats(struct igb_adapter *); -static int igb_probe(struct pci_dev *, const struct pci_device_id *); -static void __devexit igb_remove(struct pci_dev *pdev); -static int igb_sw_init(struct igb_adapter *); -static int igb_open(struct net_device *); -static int igb_close(struct net_device *); -static void igb_configure(struct igb_adapter *); -static void igb_configure_tx(struct igb_adapter *); -static void igb_configure_rx(struct igb_adapter *); -static void igb_clean_all_tx_rings(struct igb_adapter *); -static void igb_clean_all_rx_rings(struct igb_adapter *); -static void igb_clean_tx_ring(struct igb_ring *); -static void igb_set_rx_mode(struct net_device *); -#ifdef HAVE_TIMER_SETUP -static void igb_update_phy_info(struct timer_list *); -static void igb_watchdog(struct timer_list *); -#else -static void igb_update_phy_info(unsigned long); -static void igb_watchdog(unsigned long); -#endif -static void igb_watchdog_task(struct work_struct *); -static void igb_dma_err_task(struct work_struct *); -#ifdef HAVE_TIMER_SETUP -static void igb_dma_err_timer(struct timer_list *); -#else -static void igb_dma_err_timer(unsigned long data); -#endif -static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); -static struct net_device_stats *igb_get_stats(struct net_device *); -static int igb_change_mtu(struct net_device *, int); -void igb_full_sync_mac_table(struct igb_adapter *adapter); -static int igb_set_mac(struct net_device *, void *); -static void igb_set_uta(struct igb_adapter *adapter); -static irqreturn_t igb_intr(int irq, void *); -static irqreturn_t igb_intr_msi(int irq, void *); -static irqreturn_t igb_msix_other(int irq, void *); -static irqreturn_t igb_msix_ring(int irq, void *); -#ifdef IGB_DCA -static void igb_update_dca(struct igb_q_vector *); -static void igb_setup_dca(struct igb_adapter *); -#endif /* IGB_DCA */ -static int igb_poll(struct napi_struct *, int); -static bool igb_clean_tx_irq(struct igb_q_vector *); -static bool igb_clean_rx_irq(struct igb_q_vector *, int); -static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); -static void igb_tx_timeout(struct net_device *); -static void igb_reset_task(struct work_struct *); -#ifdef HAVE_VLAN_RX_REGISTER -static void igb_vlan_mode(struct net_device *, struct vlan_group *); -#endif -#ifdef HAVE_VLAN_PROTOCOL -static int igb_vlan_rx_add_vid(struct net_device *, - __be16 proto, u16); -static int igb_vlan_rx_kill_vid(struct net_device *, - __be16 proto, u16); -#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID -#ifdef NETIF_F_HW_VLAN_CTAG_RX -static int igb_vlan_rx_add_vid(struct net_device *, - __always_unused __be16 proto, u16); -static int igb_vlan_rx_kill_vid(struct net_device *, - __always_unused __be16 proto, u16); -#else -static int igb_vlan_rx_add_vid(struct net_device *, u16); -static int igb_vlan_rx_kill_vid(struct net_device *, u16); -#endif -#else -static void igb_vlan_rx_add_vid(struct net_device *, u16); -static void igb_vlan_rx_kill_vid(struct net_device *, u16); -#endif -static void igb_restore_vlan(struct igb_adapter *); -void igb_rar_set(struct igb_adapter *adapter, u32 index); -static void igb_ping_all_vfs(struct igb_adapter *); -static void igb_msg_task(struct igb_adapter *); -static void igb_vmm_control(struct igb_adapter *); -static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); -static void igb_restore_vf_multicasts(struct igb_adapter *adapter); -static void igb_process_mdd_event(struct igb_adapter *); -#ifdef IFLA_VF_MAX -static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac); -static int igb_ndo_set_vf_vlan(struct net_device *netdev, -#ifdef HAVE_VF_VLAN_PROTO - int vf, u16 vlan, u8 qos, __be16 vlan_proto); -#else - int vf, u16 vlan, u8 qos); -#endif -#ifdef HAVE_VF_SPOOFCHK_CONFIGURE -static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, - bool setting); -#endif -#ifdef HAVE_VF_MIN_MAX_TXRATE -static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); -#else /* HAVE_VF_MIN_MAX_TXRATE */ -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); -#endif /* HAVE_VF_MIN_MAX_TXRATE */ -static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, - struct ifla_vf_info *ivi); -static void igb_check_vf_rate_limit(struct igb_adapter *); -#endif -static int igb_vf_configure(struct igb_adapter *adapter, int vf); -#ifdef CONFIG_PM -#ifdef HAVE_SYSTEM_SLEEP_PM_OPS -static int igb_suspend(struct device *dev); -static int igb_resume(struct device *dev); -#ifdef CONFIG_PM_RUNTIME -static int igb_runtime_suspend(struct device *dev); -static int igb_runtime_resume(struct device *dev); -static int igb_runtime_idle(struct device *dev); -#endif /* CONFIG_PM_RUNTIME */ -static const struct dev_pm_ops igb_pm_ops = { -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) - .suspend = igb_suspend, - .resume = igb_resume, - .freeze = igb_suspend, - .thaw = igb_resume, - .poweroff = igb_suspend, - .restore = igb_resume, -#ifdef CONFIG_PM_RUNTIME - .runtime_suspend = igb_runtime_suspend, - .runtime_resume = igb_runtime_resume, - .runtime_idle = igb_runtime_idle, -#endif -#else /* Linux >= 2.6.34 */ - SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) -#ifdef CONFIG_PM_RUNTIME - SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, - igb_runtime_idle) -#endif /* CONFIG_PM_RUNTIME */ -#endif /* Linux version */ -}; -#else -static int igb_suspend(struct pci_dev *pdev, pm_message_t state); -static int igb_resume(struct pci_dev *pdev); -#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ -#endif /* CONFIG_PM */ -#ifndef USE_REBOOT_NOTIFIER -static void igb_shutdown(struct pci_dev *); -#else -static int igb_notify_reboot(struct notifier_block *, unsigned long, void *); -static struct notifier_block igb_notifier_reboot = { - .notifier_call = igb_notify_reboot, - .next = NULL, - .priority = 0 -}; -#endif -#ifdef IGB_DCA -static int igb_notify_dca(struct notifier_block *, unsigned long, void *); -static struct notifier_block dca_notifier = { - .notifier_call = igb_notify_dca, - .next = NULL, - .priority = 0 -}; -#endif -#ifdef CONFIG_NET_POLL_CONTROLLER -/* for netdump / net console */ -static void igb_netpoll(struct net_device *); -#endif - -#ifdef HAVE_PCI_ERS -static pci_ers_result_t igb_io_error_detected(struct pci_dev *, - pci_channel_state_t); -static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); -static void igb_io_resume(struct pci_dev *); - -static struct pci_error_handlers igb_err_handler = { - .error_detected = igb_io_error_detected, - .slot_reset = igb_io_slot_reset, - .resume = igb_io_resume, -}; -#endif - -static void igb_init_fw(struct igb_adapter *adapter); -static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); - -static struct pci_driver igb_driver = { - .name = igb_driver_name, - .id_table = igb_pci_tbl, - .probe = igb_probe, - .remove = __devexit_p(igb_remove), -#ifdef CONFIG_PM -#ifdef HAVE_SYSTEM_SLEEP_PM_OPS - .driver.pm = &igb_pm_ops, -#else - .suspend = igb_suspend, - .resume = igb_resume, -#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ -#endif /* CONFIG_PM */ -#ifndef USE_REBOOT_NOTIFIER - .shutdown = igb_shutdown, -#endif -#ifdef HAVE_PCI_ERS - .err_handler = &igb_err_handler -#endif -}; - -//MODULE_AUTHOR("Intel Corporation, "); -//MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); -//MODULE_LICENSE("GPL"); -//MODULE_VERSION(DRV_VERSION); - -static void igb_vfta_set(struct igb_adapter *adapter, u32 vid, bool add) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_host_mng_dhcp_cookie *mng_cookie = &hw->mng_cookie; - u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK; - u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK); - u32 vfta; - - /* - * if this is the management vlan the only option is to add it in so - * that the management pass through will continue to work - */ - if ((mng_cookie->status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && - (vid == mng_cookie->vlan_id)) - add = TRUE; - - vfta = adapter->shadow_vfta[index]; - - if (add) - vfta |= mask; - else - vfta &= ~mask; - - e1000_write_vfta(hw, index, vfta); - adapter->shadow_vfta[index] = vfta; -} - -static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE; -//module_param(debug, int, 0); -//MODULE_PARM_DESC(debug, "Debug level (0=none, ..., 16=all)"); - -/** - * igb_init_module - Driver Registration Routine - * - * igb_init_module is the first routine called when the driver is - * loaded. All it does is register with the PCI subsystem. - **/ -static int __init igb_init_module(void) -{ - int ret; - - printk(KERN_INFO "%s - version %s\n", - igb_driver_string, igb_driver_version); - - printk(KERN_INFO "%s\n", igb_copyright); -#ifdef IGB_HWMON -/* only use IGB_PROCFS if IGB_HWMON is not defined */ -#else -#ifdef IGB_PROCFS - if (igb_procfs_topdir_init()) - printk(KERN_INFO "Procfs failed to initialize topdir\n"); -#endif /* IGB_PROCFS */ -#endif /* IGB_HWMON */ - -#ifdef IGB_DCA - dca_register_notify(&dca_notifier); -#endif - ret = pci_register_driver(&igb_driver); -#ifdef USE_REBOOT_NOTIFIER - if (ret >= 0) { - register_reboot_notifier(&igb_notifier_reboot); - } -#endif - return ret; -} - -#undef module_init -#define module_init(x) static int x(void) __attribute__((__unused__)); -module_init(igb_init_module); - -/** - * igb_exit_module - Driver Exit Cleanup Routine - * - * igb_exit_module is called just before the driver is removed - * from memory. - **/ -static void __exit igb_exit_module(void) -{ -#ifdef IGB_DCA - dca_unregister_notify(&dca_notifier); -#endif -#ifdef USE_REBOOT_NOTIFIER - unregister_reboot_notifier(&igb_notifier_reboot); -#endif - pci_unregister_driver(&igb_driver); - -#ifdef IGB_HWMON -/* only compile IGB_PROCFS if IGB_HWMON is not defined */ -#else -#ifdef IGB_PROCFS - igb_procfs_topdir_exit(); -#endif /* IGB_PROCFS */ -#endif /* IGB_HWMON */ -} - -#undef module_exit -#define module_exit(x) static void x(void) __attribute__((__unused__)); -module_exit(igb_exit_module); - -#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) -/** - * igb_cache_ring_register - Descriptor ring to register mapping - * @adapter: board private structure to initialize - * - * Once we know the feature-set enabled for the device, we'll cache - * the register offset the descriptor ring is assigned to. - **/ -static void igb_cache_ring_register(struct igb_adapter *adapter) -{ - int i = 0, j = 0; - u32 rbase_offset = adapter->vfs_allocated_count; - - switch (adapter->hw.mac.type) { - case e1000_82576: - /* The queues are allocated for virtualization such that VF 0 - * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc. - * In order to avoid collision we start at the first free queue - * and continue consuming queues in the same sequence - */ - if ((adapter->rss_queues > 1) && adapter->vmdq_pools) { - for (; i < adapter->rss_queues; i++) - adapter->rx_ring[i]->reg_idx = rbase_offset + - Q_IDX_82576(i); - } - case e1000_82575: - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - default: - for (; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i]->reg_idx = rbase_offset + i; - for (; j < adapter->num_tx_queues; j++) - adapter->tx_ring[j]->reg_idx = rbase_offset + j; - break; - } -} - -static void igb_configure_lli(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u16 port; - - /* LLI should only be enabled for MSI-X or MSI interrupts */ - if (!adapter->msix_entries && !(adapter->flags & IGB_FLAG_HAS_MSI)) - return; - - if (adapter->lli_port) { - /* use filter 0 for port */ - port = htons((u16)adapter->lli_port); - E1000_WRITE_REG(hw, E1000_IMIR(0), - (port | E1000_IMIR_PORT_IM_EN)); - E1000_WRITE_REG(hw, E1000_IMIREXT(0), - (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); - } - - if (adapter->flags & IGB_FLAG_LLI_PUSH) { - /* use filter 1 for push flag */ - E1000_WRITE_REG(hw, E1000_IMIR(1), - (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN)); - E1000_WRITE_REG(hw, E1000_IMIREXT(1), - (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_PSH)); - } - - if (adapter->lli_size) { - /* use filter 2 for size */ - E1000_WRITE_REG(hw, E1000_IMIR(2), - (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN)); - E1000_WRITE_REG(hw, E1000_IMIREXT(2), - (adapter->lli_size | E1000_IMIREXT_CTRL_BP)); - } - -} - -/** - * igb_write_ivar - configure ivar for given MSI-X vector - * @hw: pointer to the HW structure - * @msix_vector: vector number we are allocating to a given ring - * @index: row index of IVAR register to write within IVAR table - * @offset: column offset of in IVAR, should be multiple of 8 - * - * This function is intended to handle the writing of the IVAR register - * for adapters 82576 and newer. The IVAR table consists of 2 columns, - * each containing an cause allocation for an Rx and Tx ring, and a - * variable number of rows depending on the number of queues supported. - **/ -static void igb_write_ivar(struct e1000_hw *hw, int msix_vector, - int index, int offset) -{ - u32 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); - - /* clear any bits that are currently set */ - ivar &= ~((u32)0xFF << offset); - - /* write vector and valid bit */ - ivar |= (msix_vector | E1000_IVAR_VALID) << offset; - - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); -} - -#define IGB_N0_QUEUE -1 -static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - int rx_queue = IGB_N0_QUEUE; - int tx_queue = IGB_N0_QUEUE; - u32 msixbm = 0; - - if (q_vector->rx.ring) - rx_queue = q_vector->rx.ring->reg_idx; - if (q_vector->tx.ring) - tx_queue = q_vector->tx.ring->reg_idx; - - switch (hw->mac.type) { - case e1000_82575: - /* The 82575 assigns vectors using a bitmask, which matches the - bitmask for the EICR/EIMS/EIMC registers. To assign one - or more queues to a vector, we write the appropriate bits - into the MSIXBM register for that vector. */ - if (rx_queue > IGB_N0_QUEUE) - msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; - if (tx_queue > IGB_N0_QUEUE) - msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; - if (!adapter->msix_entries && msix_vector == 0) - msixbm |= E1000_EIMS_OTHER; - E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), msix_vector, msixbm); - q_vector->eims_value = msixbm; - break; - case e1000_82576: - /* - * 82576 uses a table that essentially consists of 2 columns - * with 8 rows. The ordering is column-major so we use the - * lower 3 bits as the row index, and the 4th bit as the - * column offset. - */ - if (rx_queue > IGB_N0_QUEUE) - igb_write_ivar(hw, msix_vector, - rx_queue & 0x7, - (rx_queue & 0x8) << 1); - if (tx_queue > IGB_N0_QUEUE) - igb_write_ivar(hw, msix_vector, - tx_queue & 0x7, - ((tx_queue & 0x8) << 1) + 8); - q_vector->eims_value = 1 << msix_vector; - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - /* - * On 82580 and newer adapters the scheme is similar to 82576 - * however instead of ordering column-major we have things - * ordered row-major. So we traverse the table by using - * bit 0 as the column offset, and the remaining bits as the - * row index. - */ - if (rx_queue > IGB_N0_QUEUE) - igb_write_ivar(hw, msix_vector, - rx_queue >> 1, - (rx_queue & 0x1) << 4); - if (tx_queue > IGB_N0_QUEUE) - igb_write_ivar(hw, msix_vector, - tx_queue >> 1, - ((tx_queue & 0x1) << 4) + 8); - q_vector->eims_value = 1 << msix_vector; - break; - default: - BUG(); - break; - } - - /* add q_vector eims value to global eims_enable_mask */ - adapter->eims_enable_mask |= q_vector->eims_value; - - /* configure q_vector to set itr on first interrupt */ - q_vector->set_itr = 1; -} - -/** - * igb_configure_msix - Configure MSI-X hardware - * - * igb_configure_msix sets up the hardware to properly - * generate MSI-X interrupts. - **/ -static void igb_configure_msix(struct igb_adapter *adapter) -{ - u32 tmp; - int i, vector = 0; - struct e1000_hw *hw = &adapter->hw; - - adapter->eims_enable_mask = 0; - - /* set vector for other causes, i.e. link changes */ - switch (hw->mac.type) { - case e1000_82575: - tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); - /* enable MSI-X PBA support*/ - tmp |= E1000_CTRL_EXT_PBA_CLR; - - /* Auto-Mask interrupts upon ICR read. */ - tmp |= E1000_CTRL_EXT_EIAME; - tmp |= E1000_CTRL_EXT_IRCA; - - E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); - - /* enable msix_other interrupt */ - E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), vector++, - E1000_EIMS_OTHER); - adapter->eims_other = E1000_EIMS_OTHER; - - break; - - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - /* Turn on MSI-X capability first, or our settings - * won't stick. And it will take days to debug. */ - E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | - E1000_GPIE_PBA | E1000_GPIE_EIAME | - E1000_GPIE_NSICR); - - /* enable msix_other interrupt */ - adapter->eims_other = 1 << vector; - tmp = (vector++ | E1000_IVAR_VALID) << 8; - - E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp); - break; - default: - /* do nothing, since nothing else supports MSI-X */ - break; - } /* switch (hw->mac.type) */ - - adapter->eims_enable_mask |= adapter->eims_other; - - for (i = 0; i < adapter->num_q_vectors; i++) - igb_assign_vector(adapter->q_vector[i], vector++); - - E1000_WRITE_FLUSH(hw); -} - -/** - * igb_request_msix - Initialize MSI-X interrupts - * - * igb_request_msix allocates MSI-X vectors and requests interrupts from the - * kernel. - **/ -static int igb_request_msix(struct igb_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - struct e1000_hw *hw = &adapter->hw; - int i, err = 0, vector = 0, free_vector = 0; - - err = request_irq(adapter->msix_entries[vector].vector, - &igb_msix_other, 0, netdev->name, adapter); - if (err) - goto err_out; - - for (i = 0; i < adapter->num_q_vectors; i++) { - struct igb_q_vector *q_vector = adapter->q_vector[i]; - - vector++; - - q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); - - if (q_vector->rx.ring && q_vector->tx.ring) - sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, - q_vector->rx.ring->queue_index); - else if (q_vector->tx.ring) - sprintf(q_vector->name, "%s-tx-%u", netdev->name, - q_vector->tx.ring->queue_index); - else if (q_vector->rx.ring) - sprintf(q_vector->name, "%s-rx-%u", netdev->name, - q_vector->rx.ring->queue_index); - else - sprintf(q_vector->name, "%s-unused", netdev->name); - - err = request_irq(adapter->msix_entries[vector].vector, - igb_msix_ring, 0, q_vector->name, - q_vector); - if (err) - goto err_free; - } - - igb_configure_msix(adapter); - return 0; - -err_free: - /* free already assigned IRQs */ - free_irq(adapter->msix_entries[free_vector++].vector, adapter); - - vector--; - for (i = 0; i < vector; i++) { - free_irq(adapter->msix_entries[free_vector++].vector, - adapter->q_vector[i]); - } -err_out: - return err; -} - -static void igb_reset_interrupt_capability(struct igb_adapter *adapter) -{ - if (adapter->msix_entries) { - pci_disable_msix(adapter->pdev); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - } else if (adapter->flags & IGB_FLAG_HAS_MSI) { - pci_disable_msi(adapter->pdev); - } -} - -/** - * igb_free_q_vector - Free memory allocated for specific interrupt vector - * @adapter: board private structure to initialize - * @v_idx: Index of vector to be freed - * - * This function frees the memory allocated to the q_vector. In addition if - * NAPI is enabled it will delete any references to the NAPI struct prior - * to freeing the q_vector. - **/ -static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) -{ - struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; - - if (q_vector->tx.ring) - adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; - - if (q_vector->rx.ring) - adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL; - - adapter->q_vector[v_idx] = NULL; - netif_napi_del(&q_vector->napi); -#ifndef IGB_NO_LRO - __skb_queue_purge(&q_vector->lrolist.active); -#endif - kfree(q_vector); -} - -/** - * igb_free_q_vectors - Free memory allocated for interrupt vectors - * @adapter: board private structure to initialize - * - * This function frees the memory allocated to the q_vectors. In addition if - * NAPI is enabled it will delete any references to the NAPI struct prior - * to freeing the q_vector. - **/ -static void igb_free_q_vectors(struct igb_adapter *adapter) -{ - int v_idx = adapter->num_q_vectors; - - adapter->num_tx_queues = 0; - adapter->num_rx_queues = 0; - adapter->num_q_vectors = 0; - - while (v_idx--) - igb_free_q_vector(adapter, v_idx); -} - -/** - * igb_clear_interrupt_scheme - reset the device to a state of no interrupts - * - * This function resets the device so that it has 0 rx queues, tx queues, and - * MSI-X interrupts allocated. - */ -static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) -{ - igb_free_q_vectors(adapter); - igb_reset_interrupt_capability(adapter); -} - -/** - * igb_process_mdd_event - * @adapter - board private structure - * - * Identify a malicious VF, disable the VF TX/RX queues and log a message. - */ -static void igb_process_mdd_event(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 lvmmc, vfte, vfre, mdfb; - u8 vf_queue; - - lvmmc = E1000_READ_REG(hw, E1000_LVMMC); - vf_queue = lvmmc >> 29; - - /* VF index cannot be bigger or equal to VFs allocated */ - if (vf_queue >= adapter->vfs_allocated_count) - return; - - netdev_info(adapter->netdev, - "VF %d misbehaved. VF queues are disabled. " - "VM misbehavior code is 0x%x\n", vf_queue, lvmmc); - - /* Disable VFTE and VFRE related bits */ - vfte = E1000_READ_REG(hw, E1000_VFTE); - vfte &= ~(1 << vf_queue); - E1000_WRITE_REG(hw, E1000_VFTE, vfte); - - vfre = E1000_READ_REG(hw, E1000_VFRE); - vfre &= ~(1 << vf_queue); - E1000_WRITE_REG(hw, E1000_VFRE, vfre); - - /* Disable MDFB related bit. Clear on write */ - mdfb = E1000_READ_REG(hw, E1000_MDFB); - mdfb |= (1 << vf_queue); - E1000_WRITE_REG(hw, E1000_MDFB, mdfb); - - /* Reset the specific VF */ - E1000_WRITE_REG(hw, E1000_VTCTRL(vf_queue), E1000_VTCTRL_RST); -} - -/** - * igb_disable_mdd - * @adapter - board private structure - * - * Disable MDD behavior in the HW - **/ -static void igb_disable_mdd(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 reg; - - if ((hw->mac.type != e1000_i350) || - (hw->mac.type != e1000_i354)) - return; - - reg = E1000_READ_REG(hw, E1000_DTXCTL); - reg &= (~E1000_DTXCTL_MDP_EN); - E1000_WRITE_REG(hw, E1000_DTXCTL, reg); -} - -/** - * igb_enable_mdd - * @adapter - board private structure - * - * Enable the HW to detect malicious driver and sends an interrupt to - * the driver. - **/ -static void igb_enable_mdd(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 reg; - - /* Only available on i350 device */ - if (hw->mac.type != e1000_i350) - return; - - reg = E1000_READ_REG(hw, E1000_DTXCTL); - reg |= E1000_DTXCTL_MDP_EN; - E1000_WRITE_REG(hw, E1000_DTXCTL, reg); -} - -/** - * igb_reset_sriov_capability - disable SR-IOV if enabled - * - * Attempt to disable single root IO virtualization capabilites present in the - * kernel. - **/ -static void igb_reset_sriov_capability(struct igb_adapter *adapter) -{ - struct pci_dev *pdev = adapter->pdev; - struct e1000_hw *hw = &adapter->hw; - - /* reclaim resources allocated to VFs */ - if (adapter->vf_data) { - if (!pci_vfs_assigned(pdev)) { - /* - * disable iov and allow time for transactions to - * clear - */ - pci_disable_sriov(pdev); - msleep(500); - - dev_info(pci_dev_to_dev(pdev), "IOV Disabled\n"); - } else { - dev_info(pci_dev_to_dev(pdev), "IOV Not Disabled\n " - "VF(s) are assigned to guests!\n"); - } - /* Disable Malicious Driver Detection */ - igb_disable_mdd(adapter); - - /* free vf data storage */ - kfree(adapter->vf_data); - adapter->vf_data = NULL; - - /* switch rings back to PF ownership */ - E1000_WRITE_REG(hw, E1000_IOVCTL, - E1000_IOVCTL_REUSE_VFQ); - E1000_WRITE_FLUSH(hw); - msleep(100); - } - - adapter->vfs_allocated_count = 0; -} - -/** - * igb_set_sriov_capability - setup SR-IOV if supported - * - * Attempt to enable single root IO virtualization capabilites present in the - * kernel. - **/ -static void igb_set_sriov_capability(struct igb_adapter *adapter) -{ - struct pci_dev *pdev = adapter->pdev; - int old_vfs = 0; - int i; - - old_vfs = pci_num_vf(pdev); - if (old_vfs) { - dev_info(pci_dev_to_dev(pdev), - "%d pre-allocated VFs found - override " - "max_vfs setting of %d\n", old_vfs, - adapter->vfs_allocated_count); - adapter->vfs_allocated_count = old_vfs; - } - /* no VFs requested, do nothing */ - if (!adapter->vfs_allocated_count) - return; - - /* allocate vf data storage */ - adapter->vf_data = kcalloc(adapter->vfs_allocated_count, - sizeof(struct vf_data_storage), - GFP_KERNEL); - - if (adapter->vf_data) { - if (!old_vfs) { - if (pci_enable_sriov(pdev, - adapter->vfs_allocated_count)) - goto err_out; - } - for (i = 0; i < adapter->vfs_allocated_count; i++) - igb_vf_configure(adapter, i); - - switch (adapter->hw.mac.type) { - case e1000_82576: - case e1000_i350: - /* Enable VM to VM loopback by default */ - adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE; - break; - default: - /* Currently no other hardware supports loopback */ - break; - } - - /* DMA Coalescing is not supported in IOV mode. */ - if (adapter->hw.mac.type >= e1000_i350) - adapter->dmac = IGB_DMAC_DISABLE; - if (adapter->hw.mac.type < e1000_i350) - adapter->flags |= IGB_FLAG_DETECT_BAD_DMA; - return; - - } - -err_out: - kfree(adapter->vf_data); - adapter->vf_data = NULL; - adapter->vfs_allocated_count = 0; - dev_warn(pci_dev_to_dev(pdev), - "Failed to initialize SR-IOV virtualization\n"); -} - -/** - * igb_set_interrupt_capability - set MSI or MSI-X if supported - * - * Attempt to configure interrupts using the best available - * capabilities of the hardware and kernel. - **/ -static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) -{ - struct pci_dev *pdev = adapter->pdev; - int err; - int numvecs, i; - - if (!msix) - adapter->int_mode = IGB_INT_MODE_MSI; - - /* Number of supported queues. */ - adapter->num_rx_queues = adapter->rss_queues; - - if (adapter->vmdq_pools > 1) - adapter->num_rx_queues += adapter->vmdq_pools - 1; - -#ifdef HAVE_TX_MQ - if (adapter->vmdq_pools) - adapter->num_tx_queues = adapter->vmdq_pools; - else - adapter->num_tx_queues = adapter->num_rx_queues; -#else - adapter->num_tx_queues = max_t(u32, 1, adapter->vmdq_pools); -#endif - - switch (adapter->int_mode) { - case IGB_INT_MODE_MSIX: - /* start with one vector for every rx queue */ - numvecs = adapter->num_rx_queues; - - /* if tx handler is separate add 1 for every tx queue */ - if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) - numvecs += adapter->num_tx_queues; - - /* store the number of vectors reserved for queues */ - adapter->num_q_vectors = numvecs; - - /* add 1 vector for link status interrupts */ - numvecs++; - adapter->msix_entries = kcalloc(numvecs, - sizeof(struct msix_entry), - GFP_KERNEL); - if (adapter->msix_entries) { - for (i = 0; i < numvecs; i++) - adapter->msix_entries[i].entry = i; - -#ifdef HAVE_PCI_ENABLE_MSIX - err = pci_enable_msix(pdev, - adapter->msix_entries, numvecs); -#else - err = pci_enable_msix_range(pdev, - adapter->msix_entries, - numvecs, - numvecs); -#endif - if (err == 0) - break; - } - /* MSI-X failed, so fall through and try MSI */ - dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI-X interrupts. " - "Falling back to MSI interrupts.\n"); - igb_reset_interrupt_capability(adapter); - case IGB_INT_MODE_MSI: - if (!pci_enable_msi(pdev)) - adapter->flags |= IGB_FLAG_HAS_MSI; - else - dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI " - "interrupts. Falling back to legacy " - "interrupts.\n"); - /* Fall through */ - case IGB_INT_MODE_LEGACY: - /* disable advanced features and set number of queues to 1 */ - igb_reset_sriov_capability(adapter); - adapter->vmdq_pools = 0; - adapter->rss_queues = 1; - adapter->flags |= IGB_FLAG_QUEUE_PAIRS; - adapter->num_rx_queues = 1; - adapter->num_tx_queues = 1; - adapter->num_q_vectors = 1; - /* Don't do anything; this is system default */ - break; - } -} - -static void igb_add_ring(struct igb_ring *ring, - struct igb_ring_container *head) -{ - head->ring = ring; - head->count++; -} - -/** - * igb_alloc_q_vector - Allocate memory for a single interrupt vector - * @adapter: board private structure to initialize - * @v_count: q_vectors allocated on adapter, used for ring interleaving - * @v_idx: index of vector in adapter struct - * @txr_count: total number of Tx rings to allocate - * @txr_idx: index of first Tx ring to allocate - * @rxr_count: total number of Rx rings to allocate - * @rxr_idx: index of first Rx ring to allocate - * - * We allocate one q_vector. If allocation fails we return -ENOMEM. - **/ -static int igb_alloc_q_vector(struct igb_adapter *adapter, - unsigned int v_count, unsigned int v_idx, - unsigned int txr_count, unsigned int txr_idx, - unsigned int rxr_count, unsigned int rxr_idx) -{ - struct igb_q_vector *q_vector; - struct igb_ring *ring; - int ring_count, size; - - /* igb only supports 1 Tx and/or 1 Rx queue per vector */ - if (txr_count > 1 || rxr_count > 1) - return -ENOMEM; - - ring_count = txr_count + rxr_count; - size = sizeof(struct igb_q_vector) + - (sizeof(struct igb_ring) * ring_count); - - /* allocate q_vector and rings */ - q_vector = kzalloc(size, GFP_KERNEL); - if (!q_vector) - return -ENOMEM; - -#ifndef IGB_NO_LRO - /* initialize LRO */ - __skb_queue_head_init(&q_vector->lrolist.active); - -#endif - /* initialize NAPI */ - netif_napi_add(adapter->netdev, &q_vector->napi, - igb_poll, 64); - - /* tie q_vector and adapter together */ - adapter->q_vector[v_idx] = q_vector; - q_vector->adapter = adapter; - - /* initialize work limits */ - q_vector->tx.work_limit = adapter->tx_work_limit; - - /* initialize ITR configuration */ - q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0); - q_vector->itr_val = IGB_START_ITR; - - /* initialize pointer to rings */ - ring = q_vector->ring; - - /* initialize ITR */ - if (rxr_count) { - /* rx or rx/tx vector */ - if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) - q_vector->itr_val = adapter->rx_itr_setting; - } else { - /* tx only vector */ - if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) - q_vector->itr_val = adapter->tx_itr_setting; - } - - if (txr_count) { - /* assign generic ring traits */ - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - - /* configure backlink on ring */ - ring->q_vector = q_vector; - - /* update q_vector Tx values */ - igb_add_ring(ring, &q_vector->tx); - - /* For 82575, context index must be unique per ring. */ - if (adapter->hw.mac.type == e1000_82575) - set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); - - /* apply Tx specific ring traits */ - ring->count = adapter->tx_ring_count; - ring->queue_index = txr_idx; - - /* assign ring to adapter */ - adapter->tx_ring[txr_idx] = ring; - - /* push pointer to next ring */ - ring++; - } - - if (rxr_count) { - /* assign generic ring traits */ - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - - /* configure backlink on ring */ - ring->q_vector = q_vector; - - /* update q_vector Rx values */ - igb_add_ring(ring, &q_vector->rx); - -#ifndef HAVE_NDO_SET_FEATURES - /* enable rx checksum */ - set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags); - -#endif - /* set flag indicating ring supports SCTP checksum offload */ - if (adapter->hw.mac.type >= e1000_82576) - set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); - - if ((adapter->hw.mac.type == e1000_i350) || - (adapter->hw.mac.type == e1000_i354)) - set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); - - /* apply Rx specific ring traits */ - ring->count = adapter->rx_ring_count; - ring->queue_index = rxr_idx; - - /* assign ring to adapter */ - adapter->rx_ring[rxr_idx] = ring; - } - - return 0; -} - -/** - * igb_alloc_q_vectors - Allocate memory for interrupt vectors - * @adapter: board private structure to initialize - * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. - **/ -static int igb_alloc_q_vectors(struct igb_adapter *adapter) -{ - int q_vectors = adapter->num_q_vectors; - int rxr_remaining = adapter->num_rx_queues; - int txr_remaining = adapter->num_tx_queues; - int rxr_idx = 0, txr_idx = 0, v_idx = 0; - int err; - - if (q_vectors >= (rxr_remaining + txr_remaining)) { - for (; rxr_remaining; v_idx++) { - err = igb_alloc_q_vector(adapter, q_vectors, v_idx, - 0, 0, 1, rxr_idx); - - if (err) - goto err_out; - - /* update counts and index */ - rxr_remaining--; - rxr_idx++; - } - } - - for (; v_idx < q_vectors; v_idx++) { - int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); - int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); - err = igb_alloc_q_vector(adapter, q_vectors, v_idx, - tqpv, txr_idx, rqpv, rxr_idx); - - if (err) - goto err_out; - - /* update counts and index */ - rxr_remaining -= rqpv; - txr_remaining -= tqpv; - rxr_idx++; - txr_idx++; - } - - return 0; - -err_out: - adapter->num_tx_queues = 0; - adapter->num_rx_queues = 0; - adapter->num_q_vectors = 0; - - while (v_idx--) - igb_free_q_vector(adapter, v_idx); - - return -ENOMEM; -} - -/** - * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors - * - * This function initializes the interrupts and allocates all of the queues. - **/ -static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix) -{ - struct pci_dev *pdev = adapter->pdev; - int err; - - igb_set_interrupt_capability(adapter, msix); - - err = igb_alloc_q_vectors(adapter); - if (err) { - dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for vectors\n"); - goto err_alloc_q_vectors; - } - - igb_cache_ring_register(adapter); - - return 0; - -err_alloc_q_vectors: - igb_reset_interrupt_capability(adapter); - return err; -} - -/** - * igb_request_irq - initialize interrupts - * - * Attempts to configure interrupts using the best available - * capabilities of the hardware and kernel. - **/ -static int igb_request_irq(struct igb_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - struct pci_dev *pdev = adapter->pdev; - int err = 0; - - if (adapter->msix_entries) { - err = igb_request_msix(adapter); - if (!err) - goto request_done; - /* fall back to MSI */ - igb_free_all_tx_resources(adapter); - igb_free_all_rx_resources(adapter); - - igb_clear_interrupt_scheme(adapter); - igb_reset_sriov_capability(adapter); - err = igb_init_interrupt_scheme(adapter, false); - if (err) - goto request_done; - igb_setup_all_tx_resources(adapter); - igb_setup_all_rx_resources(adapter); - igb_configure(adapter); - } - - igb_assign_vector(adapter->q_vector[0], 0); - - if (adapter->flags & IGB_FLAG_HAS_MSI) { - err = request_irq(pdev->irq, &igb_intr_msi, 0, - netdev->name, adapter); - if (!err) - goto request_done; - - /* fall back to legacy interrupts */ - igb_reset_interrupt_capability(adapter); - adapter->flags &= ~IGB_FLAG_HAS_MSI; - } - - err = request_irq(pdev->irq, &igb_intr, IRQF_SHARED, - netdev->name, adapter); - - if (err) - dev_err(pci_dev_to_dev(pdev), "Error %d getting interrupt\n", - err); - -request_done: - return err; -} - -static void igb_free_irq(struct igb_adapter *adapter) -{ - if (adapter->msix_entries) { - int vector = 0, i; - - free_irq(adapter->msix_entries[vector++].vector, adapter); - - for (i = 0; i < adapter->num_q_vectors; i++) - free_irq(adapter->msix_entries[vector++].vector, - adapter->q_vector[i]); - } else { - free_irq(adapter->pdev->irq, adapter); - } -} - -/** - * igb_irq_disable - Mask off interrupt generation on the NIC - * @adapter: board private structure - **/ -static void igb_irq_disable(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - - /* - * we need to be careful when disabling interrupts. The VFs are also - * mapped into these registers and so clearing the bits can cause - * issues on the VF drivers so we only need to clear what we set - */ - if (adapter->msix_entries) { - u32 regval = E1000_READ_REG(hw, E1000_EIAM); - E1000_WRITE_REG(hw, E1000_EIAM, regval & ~adapter->eims_enable_mask); - E1000_WRITE_REG(hw, E1000_EIMC, adapter->eims_enable_mask); - regval = E1000_READ_REG(hw, E1000_EIAC); - E1000_WRITE_REG(hw, E1000_EIAC, regval & ~adapter->eims_enable_mask); - } - - E1000_WRITE_REG(hw, E1000_IAM, 0); - E1000_WRITE_REG(hw, E1000_IMC, ~0); - E1000_WRITE_FLUSH(hw); - - if (adapter->msix_entries) { - int vector = 0, i; - - synchronize_irq(adapter->msix_entries[vector++].vector); - - for (i = 0; i < adapter->num_q_vectors; i++) - synchronize_irq(adapter->msix_entries[vector++].vector); - } else { - synchronize_irq(adapter->pdev->irq); - } -} - -/** - * igb_irq_enable - Enable default interrupt generation settings - * @adapter: board private structure - **/ -static void igb_irq_enable(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - - if (adapter->msix_entries) { - u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; - u32 regval = E1000_READ_REG(hw, E1000_EIAC); - E1000_WRITE_REG(hw, E1000_EIAC, regval | adapter->eims_enable_mask); - regval = E1000_READ_REG(hw, E1000_EIAM); - E1000_WRITE_REG(hw, E1000_EIAM, regval | adapter->eims_enable_mask); - E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_enable_mask); - if (adapter->vfs_allocated_count) { - E1000_WRITE_REG(hw, E1000_MBVFIMR, 0xFF); - ims |= E1000_IMS_VMMB; - if (adapter->mdd) - if ((adapter->hw.mac.type == e1000_i350) || - (adapter->hw.mac.type == e1000_i354)) - ims |= E1000_IMS_MDDET; - } - E1000_WRITE_REG(hw, E1000_IMS, ims); - } else { - E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK | - E1000_IMS_DRSTA); - E1000_WRITE_REG(hw, E1000_IAM, IMS_ENABLE_MASK | - E1000_IMS_DRSTA); - } -} - -static void igb_update_mng_vlan(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u16 vid = adapter->hw.mng_cookie.vlan_id; - u16 old_vid = adapter->mng_vlan_id; - - if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { - /* add VID to filter table */ - igb_vfta_set(adapter, vid, TRUE); - adapter->mng_vlan_id = vid; - } else { - adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; - } - - if ((old_vid != (u16)IGB_MNG_VLAN_NONE) && - (vid != old_vid) && -#ifdef HAVE_VLAN_RX_REGISTER - !vlan_group_get_device(adapter->vlgrp, old_vid)) { -#else - !test_bit(old_vid, adapter->active_vlans)) { -#endif - /* remove VID from filter table */ - igb_vfta_set(adapter, old_vid, FALSE); - } -} - -/** - * igb_release_hw_control - release control of the h/w to f/w - * @adapter: address of board private structure - * - * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that the - * driver is no longer loaded. - * - **/ -static void igb_release_hw_control(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 ctrl_ext; - - /* Let firmware take over control of h/w */ - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - E1000_WRITE_REG(hw, E1000_CTRL_EXT, - ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); -} - -/** - * igb_get_hw_control - get control of the h/w from f/w - * @adapter: address of board private structure - * - * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that - * the driver is loaded. - * - **/ -static void igb_get_hw_control(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 ctrl_ext; - - /* Let firmware know the driver has taken over */ - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - E1000_WRITE_REG(hw, E1000_CTRL_EXT, - ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); -} - -/** - * igb_configure - configure the hardware for RX and TX - * @adapter: private board structure - **/ -static void igb_configure(struct igb_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - int i; - - igb_get_hw_control(adapter); - igb_set_rx_mode(netdev); - - igb_restore_vlan(adapter); - - igb_setup_tctl(adapter); - igb_setup_mrqc(adapter); - igb_setup_rctl(adapter); - - igb_configure_tx(adapter); - igb_configure_rx(adapter); - - e1000_rx_fifo_flush_82575(&adapter->hw); -#ifdef CONFIG_NETDEVICES_MULTIQUEUE - if (adapter->num_tx_queues > 1) - netdev->features |= NETIF_F_MULTI_QUEUE; - else - netdev->features &= ~NETIF_F_MULTI_QUEUE; -#endif - - /* call igb_desc_unused which always leaves - * at least 1 descriptor unused to make sure - * next_to_use != next_to_clean */ - for (i = 0; i < adapter->num_rx_queues; i++) { - struct igb_ring *ring = adapter->rx_ring[i]; - igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); - } -} - -/** - * igb_power_up_link - Power up the phy/serdes link - * @adapter: address of board private structure - **/ -void igb_power_up_link(struct igb_adapter *adapter) -{ - e1000_phy_hw_reset(&adapter->hw); - - if (adapter->hw.phy.media_type == e1000_media_type_copper) - e1000_power_up_phy(&adapter->hw); - else - e1000_power_up_fiber_serdes_link(&adapter->hw); -} - -/** - * igb_power_down_link - Power down the phy/serdes link - * @adapter: address of board private structure - */ -static void igb_power_down_link(struct igb_adapter *adapter) -{ - if (adapter->hw.phy.media_type == e1000_media_type_copper) - e1000_power_down_phy(&adapter->hw); - else - e1000_shutdown_fiber_serdes_link(&adapter->hw); -} - -/* Detect and switch function for Media Auto Sense */ -static void igb_check_swap_media(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 ctrl_ext, connsw; - bool swap_now = false; - bool link; - - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - connsw = E1000_READ_REG(hw, E1000_CONNSW); - link = igb_has_link(adapter); - (void) link; - - /* need to live swap if current media is copper and we have fiber/serdes - * to go to. - */ - - if ((hw->phy.media_type == e1000_media_type_copper) && - (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { - swap_now = true; - } else if (!(connsw & E1000_CONNSW_SERDESD)) { - /* copper signal takes time to appear */ - if (adapter->copper_tries < 2) { - adapter->copper_tries++; - connsw |= E1000_CONNSW_AUTOSENSE_CONF; - E1000_WRITE_REG(hw, E1000_CONNSW, connsw); - return; - } else { - adapter->copper_tries = 0; - if ((connsw & E1000_CONNSW_PHYSD) && - (!(connsw & E1000_CONNSW_PHY_PDN))) { - swap_now = true; - connsw &= ~E1000_CONNSW_AUTOSENSE_CONF; - E1000_WRITE_REG(hw, E1000_CONNSW, connsw); - } - } - } - - if (swap_now) { - switch (hw->phy.media_type) { - case e1000_media_type_copper: - dev_info(pci_dev_to_dev(adapter->pdev), - "%s:MAS: changing media to fiber/serdes\n", - adapter->netdev->name); - ctrl_ext |= - E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; - adapter->flags |= IGB_FLAG_MEDIA_RESET; - adapter->copper_tries = 0; - break; - case e1000_media_type_internal_serdes: - case e1000_media_type_fiber: - dev_info(pci_dev_to_dev(adapter->pdev), - "%s:MAS: changing media to copper\n", - adapter->netdev->name); - ctrl_ext &= - ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; - adapter->flags |= IGB_FLAG_MEDIA_RESET; - break; - default: - /* shouldn't get here during regular operation */ - dev_err(pci_dev_to_dev(adapter->pdev), - "%s:AMS: Invalid media type found, returning\n", - adapter->netdev->name); - break; - } - E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); - } -} - -#ifdef HAVE_I2C_SUPPORT -/* igb_get_i2c_data - Reads the I2C SDA data bit - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register - * - * Returns the I2C data bit value - */ -static int igb_get_i2c_data(void *data) -{ - struct igb_adapter *adapter = data; - struct e1000_hw *hw = &adapter->hw; - s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - - return (i2cctl & E1000_I2C_DATA_IN) != 0; -} - -/* igb_set_i2c_data - Sets the I2C data bit - * @data: pointer to hardware structure - * @state: I2C data value (0 or 1) to set - * - * Sets the I2C data bit - */ -static void igb_set_i2c_data(void *data, int state) -{ - struct igb_adapter *adapter = data; - struct e1000_hw *hw = &adapter->hw; - s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - - if (state) - i2cctl |= E1000_I2C_DATA_OUT; - else - i2cctl &= ~E1000_I2C_DATA_OUT; - - i2cctl &= ~E1000_I2C_DATA_OE_N; - i2cctl |= E1000_I2C_CLK_OE_N; - - E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); - E1000_WRITE_FLUSH(hw); - -} - -/* igb_set_i2c_clk - Sets the I2C SCL clock - * @data: pointer to hardware structure - * @state: state to set clock - * - * Sets the I2C clock line to state - */ -static void igb_set_i2c_clk(void *data, int state) -{ - struct igb_adapter *adapter = data; - struct e1000_hw *hw = &adapter->hw; - s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - - if (state) { - i2cctl |= E1000_I2C_CLK_OUT; - i2cctl &= ~E1000_I2C_CLK_OE_N; - } else { - i2cctl &= ~E1000_I2C_CLK_OUT; - i2cctl &= ~E1000_I2C_CLK_OE_N; - } - E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); - E1000_WRITE_FLUSH(hw); -} - -/* igb_get_i2c_clk - Gets the I2C SCL clock state - * @data: pointer to hardware structure - * - * Gets the I2C clock state - */ -static int igb_get_i2c_clk(void *data) -{ - struct igb_adapter *adapter = data; - struct e1000_hw *hw = &adapter->hw; - s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); - - return (i2cctl & E1000_I2C_CLK_IN) != 0; -} - -static const struct i2c_algo_bit_data igb_i2c_algo = { - .setsda = igb_set_i2c_data, - .setscl = igb_set_i2c_clk, - .getsda = igb_get_i2c_data, - .getscl = igb_get_i2c_clk, - .udelay = 5, - .timeout = 20, -}; - -/* igb_init_i2c - Init I2C interface - * @adapter: pointer to adapter structure - * - */ -static s32 igb_init_i2c(struct igb_adapter *adapter) -{ - s32 status = E1000_SUCCESS; - - /* I2C interface supported on i350 devices */ - if (adapter->hw.mac.type != e1000_i350) - return E1000_SUCCESS; - - /* Initialize the i2c bus which is controlled by the registers. - * This bus will use the i2c_algo_bit structue that implements - * the protocol through toggling of the 4 bits in the register. - */ - adapter->i2c_adap.owner = THIS_MODULE; - adapter->i2c_algo = igb_i2c_algo; - adapter->i2c_algo.data = adapter; - adapter->i2c_adap.algo_data = &adapter->i2c_algo; - adapter->i2c_adap.dev.parent = &adapter->pdev->dev; - strlcpy(adapter->i2c_adap.name, "igb BB", - sizeof(adapter->i2c_adap.name)); - status = i2c_bit_add_bus(&adapter->i2c_adap); - return status; -} - -#endif /* HAVE_I2C_SUPPORT */ -/** - * igb_up - Open the interface and prepare it to handle traffic - * @adapter: board private structure - **/ -int igb_up(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - int i; - - /* hardware has been reset, we need to reload some things */ - igb_configure(adapter); - - clear_bit(__IGB_DOWN, &adapter->state); - - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&(adapter->q_vector[i]->napi)); - - if (adapter->msix_entries) - igb_configure_msix(adapter); - else - igb_assign_vector(adapter->q_vector[0], 0); - - igb_configure_lli(adapter); - - /* Clear any pending interrupts. */ - E1000_READ_REG(hw, E1000_ICR); - igb_irq_enable(adapter); - - /* notify VFs that reset has been completed */ - if (adapter->vfs_allocated_count) { - u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT); - reg_data |= E1000_CTRL_EXT_PFRSTD; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data); - } - - netif_tx_start_all_queues(adapter->netdev); - - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - schedule_work(&adapter->dma_err_task); - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); - - if ((adapter->flags & IGB_FLAG_EEE) && - (!hw->dev_spec._82575.eee_disable)) - adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T; - - return 0; -} - -void igb_down(struct igb_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - struct e1000_hw *hw = &adapter->hw; - u32 tctl, rctl; - int i; - - /* signal that we're down so the interrupt handler does not - * reschedule our watchdog timer */ - set_bit(__IGB_DOWN, &adapter->state); - - /* disable receives in the hardware */ - rctl = E1000_READ_REG(hw, E1000_RCTL); - E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); - /* flush and sleep below */ - - netif_tx_stop_all_queues(netdev); - - /* disable transmits in the hardware */ - tctl = E1000_READ_REG(hw, E1000_TCTL); - tctl &= ~E1000_TCTL_EN; - E1000_WRITE_REG(hw, E1000_TCTL, tctl); - /* flush both disables and wait for them to finish */ - E1000_WRITE_FLUSH(hw); - usleep_range(10000, 20000); - - for (i = 0; i < adapter->num_q_vectors; i++) - napi_disable(&(adapter->q_vector[i]->napi)); - - igb_irq_disable(adapter); - - adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; - - del_timer_sync(&adapter->watchdog_timer); - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - del_timer_sync(&adapter->dma_err_timer); - del_timer_sync(&adapter->phy_info_timer); - - netif_carrier_off(netdev); - - /* record the stats before reset*/ - igb_update_stats(adapter); - - adapter->link_speed = 0; - adapter->link_duplex = 0; - -#ifdef HAVE_PCI_ERS - if (!pci_channel_offline(adapter->pdev)) - igb_reset(adapter); -#else - igb_reset(adapter); -#endif - igb_clean_all_tx_rings(adapter); - igb_clean_all_rx_rings(adapter); -#ifdef IGB_DCA - /* since we reset the hardware DCA settings were cleared */ - igb_setup_dca(adapter); -#endif -} - -void igb_reinit_locked(struct igb_adapter *adapter) -{ - WARN_ON(in_interrupt()); - while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - igb_down(adapter); - igb_up(adapter); - clear_bit(__IGB_RESETTING, &adapter->state); -} - -/** - * igb_enable_mas - Media Autosense re-enable after swap - * - * @adapter: adapter struct - **/ -static s32 igb_enable_mas(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 connsw; - s32 ret_val = E1000_SUCCESS; - - connsw = E1000_READ_REG(hw, E1000_CONNSW); - if (hw->phy.media_type == e1000_media_type_copper) { - /* configure for SerDes media detect */ - if (!(connsw & E1000_CONNSW_SERDESD)) { - connsw |= E1000_CONNSW_ENRGSRC; - connsw |= E1000_CONNSW_AUTOSENSE_EN; - E1000_WRITE_REG(hw, E1000_CONNSW, connsw); - E1000_WRITE_FLUSH(hw); - } else if (connsw & E1000_CONNSW_SERDESD) { - /* already SerDes, no need to enable anything */ - return ret_val; - } else { - dev_info(pci_dev_to_dev(adapter->pdev), - "%s:MAS: Unable to configure feature, disabling..\n", - adapter->netdev->name); - adapter->flags &= ~IGB_FLAG_MAS_ENABLE; - } - } - return ret_val; -} - -void igb_reset(struct igb_adapter *adapter) -{ - struct pci_dev *pdev = adapter->pdev; - struct e1000_hw *hw = &adapter->hw; - struct e1000_mac_info *mac = &hw->mac; - struct e1000_fc_info *fc = &hw->fc; - u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm; - - /* Repartition Pba for greater than 9k mtu - * To take effect CTRL.RST is required. - */ - switch (mac->type) { - case e1000_i350: - case e1000_82580: - case e1000_i354: - pba = E1000_READ_REG(hw, E1000_RXPBS); - pba = e1000_rxpbs_adjust_82580(pba); - break; - case e1000_82576: - pba = E1000_READ_REG(hw, E1000_RXPBS); - pba &= E1000_RXPBS_SIZE_MASK_82576; - break; - case e1000_82575: - case e1000_i210: - case e1000_i211: - default: - pba = E1000_PBA_34K; - break; - } - - if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && - (mac->type < e1000_82576)) { - /* adjust PBA for jumbo frames */ - E1000_WRITE_REG(hw, E1000_PBA, pba); - - /* To maintain wire speed transmits, the Tx FIFO should be - * large enough to accommodate two full transmit packets, - * rounded up to the next 1KB and expressed in KB. Likewise, - * the Rx FIFO should be large enough to accommodate at least - * one full receive packet and is similarly rounded up and - * expressed in KB. */ - pba = E1000_READ_REG(hw, E1000_PBA); - /* upper 16 bits has Tx packet buffer allocation size in KB */ - tx_space = pba >> 16; - /* lower 16 bits has Rx packet buffer allocation size in KB */ - pba &= 0xffff; - /* the tx fifo also stores 16 bytes of information about the tx - * but don't include ethernet FCS because hardware appends it */ - min_tx_space = (adapter->max_frame_size + - sizeof(union e1000_adv_tx_desc) - - ETH_FCS_LEN) * 2; - min_tx_space = ALIGN(min_tx_space, 1024); - min_tx_space >>= 10; - /* software strips receive CRC, so leave room for it */ - min_rx_space = adapter->max_frame_size; - min_rx_space = ALIGN(min_rx_space, 1024); - min_rx_space >>= 10; - - /* If current Tx allocation is less than the min Tx FIFO size, - * and the min Tx FIFO size is less than the current Rx FIFO - * allocation, take space away from current Rx allocation */ - if (tx_space < min_tx_space && - ((min_tx_space - tx_space) < pba)) { - pba = pba - (min_tx_space - tx_space); - - /* if short on rx space, rx wins and must trump tx - * adjustment */ - if (pba < min_rx_space) - pba = min_rx_space; - } - E1000_WRITE_REG(hw, E1000_PBA, pba); - } - - /* flow control settings */ - /* The high water mark must be low enough to fit one full frame - * (or the size used for early receive) above it in the Rx FIFO. - * Set it to the lower of: - * - 90% of the Rx FIFO size, or - * - the full Rx FIFO size minus one full frame */ - hwm = min(((pba << 10) * 9 / 10), - ((pba << 10) - 2 * adapter->max_frame_size)); - - fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ - fc->low_water = fc->high_water - 16; - fc->pause_time = 0xFFFF; - fc->send_xon = 1; - fc->current_mode = fc->requested_mode; - - /* disable receive for all VFs and wait one second */ - if (adapter->vfs_allocated_count) { - int i; - /* - * Clear all flags except indication that the PF has set - * the VF MAC addresses administratively - */ - for (i = 0 ; i < adapter->vfs_allocated_count; i++) - adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; - - /* ping all the active vfs to let them know we are going down */ - igb_ping_all_vfs(adapter); - - /* disable transmits and receives */ - E1000_WRITE_REG(hw, E1000_VFRE, 0); - E1000_WRITE_REG(hw, E1000_VFTE, 0); - } - - /* Allow time for pending master requests to run */ - e1000_reset_hw(hw); - E1000_WRITE_REG(hw, E1000_WUC, 0); - - if (adapter->flags & IGB_FLAG_MEDIA_RESET) { - e1000_setup_init_funcs(hw, TRUE); - igb_check_options(adapter); - e1000_get_bus_info(hw); - adapter->flags &= ~IGB_FLAG_MEDIA_RESET; - } - if (adapter->flags & IGB_FLAG_MAS_ENABLE) { - if (igb_enable_mas(adapter)) - dev_err(pci_dev_to_dev(pdev), - "Error enabling Media Auto Sense\n"); - } - if (e1000_init_hw(hw)) - dev_err(pci_dev_to_dev(pdev), "Hardware Error\n"); - - /* - * Flow control settings reset on hardware reset, so guarantee flow - * control is off when forcing speed. - */ - if (!hw->mac.autoneg) - e1000_force_mac_fc(hw); - - igb_init_dmac(adapter, pba); - /* Re-initialize the thermal sensor on i350 devices. */ - if (mac->type == e1000_i350 && hw->bus.func == 0) { - /* - * If present, re-initialize the external thermal sensor - * interface. - */ - if (adapter->ets) - e1000_set_i2c_bb(hw); - e1000_init_thermal_sensor_thresh(hw); - } - - /*Re-establish EEE setting */ - if (hw->phy.media_type == e1000_media_type_copper) { - switch (mac->type) { - case e1000_i350: - case e1000_i210: - case e1000_i211: - e1000_set_eee_i350(hw); - break; - case e1000_i354: - e1000_set_eee_i354(hw); - break; - default: - break; - } - } - - if (!netif_running(adapter->netdev)) - igb_power_down_link(adapter); - - igb_update_mng_vlan(adapter); - - /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ - E1000_WRITE_REG(hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE); - - -#ifdef HAVE_PTP_1588_CLOCK - /* Re-enable PTP, where applicable. */ - igb_ptp_reset(adapter); -#endif /* HAVE_PTP_1588_CLOCK */ - - e1000_get_phy_info(hw); - - adapter->devrc++; -} - -#ifdef HAVE_NDO_SET_FEATURES -static kni_netdev_features_t igb_fix_features(struct net_device *netdev, - kni_netdev_features_t features) -{ - /* - * Since there is no support for separate tx vlan accel - * enabled make sure tx flag is cleared if rx is. - */ -#ifdef NETIF_F_HW_VLAN_CTAG_RX - if (!(features & NETIF_F_HW_VLAN_CTAG_RX)) - features &= ~NETIF_F_HW_VLAN_CTAG_TX; -#else - if (!(features & NETIF_F_HW_VLAN_RX)) - features &= ~NETIF_F_HW_VLAN_TX; -#endif - - /* If Rx checksum is disabled, then LRO should also be disabled */ - if (!(features & NETIF_F_RXCSUM)) - features &= ~NETIF_F_LRO; - - return features; -} - -static int igb_set_features(struct net_device *netdev, - kni_netdev_features_t features) -{ - u32 changed = netdev->features ^ features; - -#ifdef NETIF_F_HW_VLAN_CTAG_RX - if (changed & NETIF_F_HW_VLAN_CTAG_RX) -#else - if (changed & NETIF_F_HW_VLAN_RX) -#endif - igb_vlan_mode(netdev, features); - - return 0; -} - -#ifdef NTF_SELF -#ifdef USE_CONST_DEV_UC_CHAR -static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, -#ifdef HAVE_NDO_FDB_ADD_VID - u16 vid, -#endif - u16 flags) -#else -static int igb_ndo_fdb_add(struct ndmsg *ndm, - struct net_device *dev, - unsigned char *addr, - u16 flags) -#endif -{ - struct igb_adapter *adapter = netdev_priv(dev); - struct e1000_hw *hw = &adapter->hw; - int err; - - if (!(adapter->vfs_allocated_count)) - return -EOPNOTSUPP; - - /* Hardware does not support aging addresses so if a - * ndm_state is given only allow permanent addresses - */ - if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { - pr_info("%s: FDB only supports static addresses\n", - igb_driver_name); - return -EINVAL; - } - - if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { - u32 rar_uc_entries = hw->mac.rar_entry_count - - (adapter->vfs_allocated_count + 1); - - if (netdev_uc_count(dev) < rar_uc_entries) - err = dev_uc_add_excl(dev, addr); - else - err = -ENOMEM; - } else if (is_multicast_ether_addr(addr)) { - err = dev_mc_add_excl(dev, addr); - } else { - err = -EINVAL; - } - - /* Only return duplicate errors if NLM_F_EXCL is set */ - if (err == -EEXIST && !(flags & NLM_F_EXCL)) - err = 0; - - return err; -} - -#ifndef USE_DEFAULT_FDB_DEL_DUMP -#ifdef USE_CONST_DEV_UC_CHAR -static int igb_ndo_fdb_del(struct ndmsg *ndm, - struct net_device *dev, - const unsigned char *addr) -#else -static int igb_ndo_fdb_del(struct ndmsg *ndm, - struct net_device *dev, - unsigned char *addr) -#endif -{ - struct igb_adapter *adapter = netdev_priv(dev); - int err = -EOPNOTSUPP; - - if (ndm->ndm_state & NUD_PERMANENT) { - pr_info("%s: FDB only supports static addresses\n", - igb_driver_name); - return -EINVAL; - } - - if (adapter->vfs_allocated_count) { - if (is_unicast_ether_addr(addr)) - err = dev_uc_del(dev, addr); - else if (is_multicast_ether_addr(addr)) - err = dev_mc_del(dev, addr); - else - err = -EINVAL; - } - - return err; -} - -static int igb_ndo_fdb_dump(struct sk_buff *skb, - struct netlink_callback *cb, - struct net_device *dev, - int idx) -{ - struct igb_adapter *adapter = netdev_priv(dev); - - if (adapter->vfs_allocated_count) - idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); - - return idx; -} -#endif /* USE_DEFAULT_FDB_DEL_DUMP */ - -#ifdef HAVE_BRIDGE_ATTRIBS -#ifdef HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS -static int igb_ndo_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -#else -static int igb_ndo_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh) -#endif /* HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS */ -{ - struct igb_adapter *adapter = netdev_priv(dev); - struct e1000_hw *hw = &adapter->hw; - struct nlattr *attr, *br_spec; - int rem; - - if (!(adapter->vfs_allocated_count)) - return -EOPNOTSUPP; - - switch (adapter->hw.mac.type) { - case e1000_82576: - case e1000_i350: - case e1000_i354: - break; - default: - return -EOPNOTSUPP; - } - - br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); - - nla_for_each_nested(attr, br_spec, rem) { - __u16 mode; - - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - - mode = nla_get_u16(attr); - if (mode == BRIDGE_MODE_VEPA) { - e1000_vmdq_set_loopback_pf(hw, 0); - adapter->flags &= ~IGB_FLAG_LOOPBACK_ENABLE; - } else if (mode == BRIDGE_MODE_VEB) { - e1000_vmdq_set_loopback_pf(hw, 1); - adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE; - } else - return -EINVAL; - - netdev_info(adapter->netdev, "enabling bridge mode: %s\n", - mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB"); - } - - return 0; -} - -#ifdef HAVE_BRIDGE_FILTER -#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS -static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask, - int nlflags) -#else -static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask) -#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */ -#else -static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) -#endif -{ - struct igb_adapter *adapter = netdev_priv(dev); - u16 mode; - - if (!(adapter->vfs_allocated_count)) - return -EOPNOTSUPP; - - if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE) - mode = BRIDGE_MODE_VEB; - else - mode = BRIDGE_MODE_VEPA; - -#ifdef HAVE_NDO_DFLT_BRIDGE_ADD_MASK -#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS -#ifdef HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, - nlflags, filter_mask, NULL); -#else - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, nlflags); -#endif /* HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL */ -#else - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0); -#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */ -#else - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode); -#endif /* HAVE_NDO_DFLT_BRIDGE_ADD_MASK */ -} -#endif /* HAVE_BRIDGE_ATTRIBS */ -#endif /* NTF_SELF */ - -#endif /* HAVE_NDO_SET_FEATURES */ -#ifdef HAVE_NET_DEVICE_OPS -static const struct net_device_ops igb_netdev_ops = { - .ndo_open = igb_open, - .ndo_stop = igb_close, - .ndo_start_xmit = igb_xmit_frame, - .ndo_get_stats = igb_get_stats, - .ndo_set_rx_mode = igb_set_rx_mode, - .ndo_set_mac_address = igb_set_mac, - .ndo_change_mtu = igb_change_mtu, - .ndo_do_ioctl = igb_ioctl, - .ndo_tx_timeout = igb_tx_timeout, - .ndo_validate_addr = eth_validate_addr, - .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, -#ifdef IFLA_VF_MAX - .ndo_set_vf_mac = igb_ndo_set_vf_mac, - .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, -#ifdef HAVE_VF_MIN_MAX_TXRATE - .ndo_set_vf_rate = igb_ndo_set_vf_bw, -#else /* HAVE_VF_MIN_MAX_TXRATE */ - .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, -#endif /* HAVE_VF_MIN_MAX_TXRATE */ - .ndo_get_vf_config = igb_ndo_get_vf_config, -#ifdef HAVE_VF_SPOOFCHK_CONFIGURE - .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, -#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */ -#endif /* IFLA_VF_MAX */ -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = igb_netpoll, -#endif -#ifdef HAVE_NDO_SET_FEATURES - .ndo_fix_features = igb_fix_features, - .ndo_set_features = igb_set_features, -#endif -#ifdef HAVE_VLAN_RX_REGISTER - .ndo_vlan_rx_register = igb_vlan_mode, -#endif -#ifndef HAVE_RHEL6_NETDEV_OPS_EXT_FDB -#ifdef NTF_SELF - .ndo_fdb_add = igb_ndo_fdb_add, -#ifndef USE_DEFAULT_FDB_DEL_DUMP - .ndo_fdb_del = igb_ndo_fdb_del, - .ndo_fdb_dump = igb_ndo_fdb_dump, -#endif -#endif /* ! HAVE_RHEL6_NETDEV_OPS_EXT_FDB */ -#ifdef HAVE_BRIDGE_ATTRIBS - .ndo_bridge_setlink = igb_ndo_bridge_setlink, - .ndo_bridge_getlink = igb_ndo_bridge_getlink, -#endif /* HAVE_BRIDGE_ATTRIBS */ -#endif -}; - -#ifdef CONFIG_IGB_VMDQ_NETDEV -static const struct net_device_ops igb_vmdq_ops = { - .ndo_open = &igb_vmdq_open, - .ndo_stop = &igb_vmdq_close, - .ndo_start_xmit = &igb_vmdq_xmit_frame, - .ndo_get_stats = &igb_vmdq_get_stats, - .ndo_set_rx_mode = &igb_vmdq_set_rx_mode, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = &igb_vmdq_set_mac, - .ndo_change_mtu = &igb_vmdq_change_mtu, - .ndo_tx_timeout = &igb_vmdq_tx_timeout, - .ndo_vlan_rx_register = &igb_vmdq_vlan_rx_register, - .ndo_vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid, -}; - -#endif /* CONFIG_IGB_VMDQ_NETDEV */ -#endif /* HAVE_NET_DEVICE_OPS */ -#ifdef CONFIG_IGB_VMDQ_NETDEV -void igb_assign_vmdq_netdev_ops(struct net_device *vnetdev) -{ -#ifdef HAVE_NET_DEVICE_OPS - vnetdev->netdev_ops = &igb_vmdq_ops; -#else - dev->open = &igb_vmdq_open; - dev->stop = &igb_vmdq_close; - dev->hard_start_xmit = &igb_vmdq_xmit_frame; - dev->get_stats = &igb_vmdq_get_stats; -#ifdef HAVE_SET_RX_MODE - dev->set_rx_mode = &igb_vmdq_set_rx_mode; -#endif - dev->set_multicast_list = &igb_vmdq_set_rx_mode; - dev->set_mac_address = &igb_vmdq_set_mac; - dev->change_mtu = &igb_vmdq_change_mtu; -#ifdef HAVE_TX_TIMEOUT - dev->tx_timeout = &igb_vmdq_tx_timeout; -#endif -#if defined(NETIF_F_HW_VLAN_TX) || defined(NETIF_F_HW_VLAN_CTAG_TX) - dev->vlan_rx_register = &igb_vmdq_vlan_rx_register; - dev->vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid; - dev->vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid; -#endif -#endif - igb_vmdq_set_ethtool_ops(vnetdev); - vnetdev->watchdog_timeo = 5 * HZ; - -} - -int igb_init_vmdq_netdevs(struct igb_adapter *adapter) -{ - int pool, err = 0, base_queue; - struct net_device *vnetdev; - struct igb_vmdq_adapter *vmdq_adapter; - - for (pool = 1; pool < adapter->vmdq_pools; pool++) { - int qpp = (!adapter->rss_queues ? 1 : adapter->rss_queues); - base_queue = pool * qpp; - vnetdev = alloc_etherdev(sizeof(struct igb_vmdq_adapter)); - if (!vnetdev) { - err = -ENOMEM; - break; - } - vmdq_adapter = netdev_priv(vnetdev); - vmdq_adapter->vnetdev = vnetdev; - vmdq_adapter->real_adapter = adapter; - vmdq_adapter->rx_ring = adapter->rx_ring[base_queue]; - vmdq_adapter->tx_ring = adapter->tx_ring[base_queue]; - igb_assign_vmdq_netdev_ops(vnetdev); - snprintf(vnetdev->name, IFNAMSIZ, "%sv%d", - adapter->netdev->name, pool); - vnetdev->features = adapter->netdev->features; -#ifdef HAVE_NETDEV_VLAN_FEATURES - vnetdev->vlan_features = adapter->netdev->vlan_features; -#endif - adapter->vmdq_netdev[pool-1] = vnetdev; - err = register_netdev(vnetdev); - if (err) - break; - } - return err; -} - -int igb_remove_vmdq_netdevs(struct igb_adapter *adapter) -{ - int pool, err = 0; - - for (pool = 1; pool < adapter->vmdq_pools; pool++) { - unregister_netdev(adapter->vmdq_netdev[pool-1]); - free_netdev(adapter->vmdq_netdev[pool-1]); - adapter->vmdq_netdev[pool-1] = NULL; - } - return err; -} -#endif /* CONFIG_IGB_VMDQ_NETDEV */ - -/** - * igb_set_fw_version - Configure version string for ethtool - * @adapter: adapter struct - * - **/ -static void igb_set_fw_version(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_fw_version fw; - - e1000_get_fw_version(hw, &fw); - - switch (hw->mac.type) { - case e1000_i210: - case e1000_i211: - if (!(e1000_get_flash_presence_i210(hw))) { - snprintf(adapter->fw_version, - sizeof(adapter->fw_version), - "%2d.%2d-%d", - fw.invm_major, fw.invm_minor, fw.invm_img_type); - break; - } - /* fall through */ - default: - /* if option rom is valid, display its version too*/ - if (fw.or_valid) { - snprintf(adapter->fw_version, - sizeof(adapter->fw_version), - "%d.%d, 0x%08x, %d.%d.%d", - fw.eep_major, fw.eep_minor, fw.etrack_id, - fw.or_major, fw.or_build, fw.or_patch); - /* no option rom */ - } else { - if (fw.etrack_id != 0X0000) { - snprintf(adapter->fw_version, - sizeof(adapter->fw_version), - "%d.%d, 0x%08x", - fw.eep_major, fw.eep_minor, fw.etrack_id); - } else { - snprintf(adapter->fw_version, - sizeof(adapter->fw_version), - "%d.%d.%d", - fw.eep_major, fw.eep_minor, fw.eep_build); - } - } - break; - } - - return; -} - -/** - * igb_init_mas - init Media Autosense feature if enabled in the NVM - * - * @adapter: adapter struct - **/ -static void igb_init_mas(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u16 eeprom_data; - - e1000_read_nvm(hw, NVM_COMPAT, 1, &eeprom_data); - switch (hw->bus.func) { - case E1000_FUNC_0: - if (eeprom_data & IGB_MAS_ENABLE_0) - adapter->flags |= IGB_FLAG_MAS_ENABLE; - break; - case E1000_FUNC_1: - if (eeprom_data & IGB_MAS_ENABLE_1) - adapter->flags |= IGB_FLAG_MAS_ENABLE; - break; - case E1000_FUNC_2: - if (eeprom_data & IGB_MAS_ENABLE_2) - adapter->flags |= IGB_FLAG_MAS_ENABLE; - break; - case E1000_FUNC_3: - if (eeprom_data & IGB_MAS_ENABLE_3) - adapter->flags |= IGB_FLAG_MAS_ENABLE; - break; - default: - /* Shouldn't get here */ - dev_err(pci_dev_to_dev(adapter->pdev), - "%s:AMS: Invalid port configuration, returning\n", - adapter->netdev->name); - break; - } -} - -/** - * igb_probe - Device Initialization Routine - * @pdev: PCI device information struct - * @ent: entry in igb_pci_tbl - * - * Returns 0 on success, negative on failure - * - * igb_probe initializes an adapter identified by a pci_dev structure. - * The OS initialization, configuring of the adapter private structure, - * and a hardware reset occur. - **/ -static int __devinit igb_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - struct net_device *netdev; - struct igb_adapter *adapter; - struct e1000_hw *hw; - u16 eeprom_data = 0; - u8 pba_str[E1000_PBANUM_LENGTH]; - s32 ret_val; - static int global_quad_port_a; /* global quad port a indication */ - int i, err, pci_using_dac; - static int cards_found; - - err = pci_enable_device_mem(pdev); - if (err) - return err; - - pci_using_dac = 0; - err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); - if (!err) { - err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); - if (!err) - pci_using_dac = 1; - } else { - err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); - if (err) { - err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); - if (err) { - IGB_ERR("No usable DMA configuration, " - "aborting\n"); - goto err_dma; - } - } - } - -#ifndef HAVE_ASPM_QUIRKS - /* 82575 requires that the pci-e link partner disable the L0s state */ - switch (pdev->device) { - case E1000_DEV_ID_82575EB_COPPER: - case E1000_DEV_ID_82575EB_FIBER_SERDES: - case E1000_DEV_ID_82575GB_QUAD_COPPER: - pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); - default: - break; - } - -#endif /* HAVE_ASPM_QUIRKS */ - err = pci_request_selected_regions(pdev, - pci_select_bars(pdev, - IORESOURCE_MEM), - igb_driver_name); - if (err) - goto err_pci_reg; - - pci_enable_pcie_error_reporting(pdev); - - pci_set_master(pdev); - - err = -ENOMEM; -#ifdef HAVE_TX_MQ - netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), - IGB_MAX_TX_QUEUES); -#else - netdev = alloc_etherdev(sizeof(struct igb_adapter)); -#endif /* HAVE_TX_MQ */ - if (!netdev) - goto err_alloc_etherdev; - - SET_MODULE_OWNER(netdev); - SET_NETDEV_DEV(netdev, &pdev->dev); - - pci_set_drvdata(pdev, netdev); - adapter = netdev_priv(netdev); - adapter->netdev = netdev; - adapter->pdev = pdev; - hw = &adapter->hw; - hw->back = adapter; - adapter->port_num = hw->bus.func; - adapter->msg_enable = (1 << debug) - 1; - -#ifdef HAVE_PCI_ERS - err = pci_save_state(pdev); - if (err) - goto err_ioremap; -#endif - err = -EIO; - hw->hw_addr = ioremap(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); - if (!hw->hw_addr) - goto err_ioremap; - -#ifdef HAVE_NET_DEVICE_OPS - netdev->netdev_ops = &igb_netdev_ops; -#else /* HAVE_NET_DEVICE_OPS */ - netdev->open = &igb_open; - netdev->stop = &igb_close; - netdev->get_stats = &igb_get_stats; -#ifdef HAVE_SET_RX_MODE - netdev->set_rx_mode = &igb_set_rx_mode; -#endif - netdev->set_multicast_list = &igb_set_rx_mode; - netdev->set_mac_address = &igb_set_mac; - netdev->change_mtu = &igb_change_mtu; - netdev->do_ioctl = &igb_ioctl; -#ifdef HAVE_TX_TIMEOUT - netdev->tx_timeout = &igb_tx_timeout; -#endif - netdev->vlan_rx_register = igb_vlan_mode; - netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid; - netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid; -#ifdef CONFIG_NET_POLL_CONTROLLER - netdev->poll_controller = igb_netpoll; -#endif - netdev->hard_start_xmit = &igb_xmit_frame; -#endif /* HAVE_NET_DEVICE_OPS */ - igb_set_ethtool_ops(netdev); -#ifdef HAVE_TX_TIMEOUT - netdev->watchdog_timeo = 5 * HZ; -#endif - - strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); - - adapter->bd_number = cards_found; - - /* setup the private structure */ - err = igb_sw_init(adapter); - if (err) - goto err_sw_init; - - e1000_get_bus_info(hw); - - hw->phy.autoneg_wait_to_complete = FALSE; - hw->mac.adaptive_ifs = FALSE; - - /* Copper options */ - if (hw->phy.media_type == e1000_media_type_copper) { - hw->phy.mdix = AUTO_ALL_MODES; - hw->phy.disable_polarity_correction = FALSE; - hw->phy.ms_type = e1000_ms_hw_default; - } - - if (e1000_check_reset_block(hw)) - dev_info(pci_dev_to_dev(pdev), - "PHY reset is blocked due to SOL/IDER session.\n"); - - /* - * features is initialized to 0 in allocation, it might have bits - * set by igb_sw_init so we should use an or instead of an - * assignment. - */ - netdev->features |= NETIF_F_SG | - NETIF_F_IP_CSUM | -#ifdef NETIF_F_IPV6_CSUM - NETIF_F_IPV6_CSUM | -#endif -#ifdef NETIF_F_TSO - NETIF_F_TSO | -#ifdef NETIF_F_TSO6 - NETIF_F_TSO6 | -#endif -#endif /* NETIF_F_TSO */ -#ifdef NETIF_F_RXHASH - NETIF_F_RXHASH | -#endif - NETIF_F_RXCSUM | -#ifdef NETIF_F_HW_VLAN_CTAG_RX - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_TX; -#else - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_TX; -#endif - - if (hw->mac.type >= e1000_82576) - netdev->features |= NETIF_F_SCTP_CSUM; - -#ifdef HAVE_NDO_SET_FEATURES - /* copy netdev features into list of user selectable features */ - netdev->hw_features |= netdev->features; -#ifndef IGB_NO_LRO - - /* give us the option of enabling LRO later */ - netdev->hw_features |= NETIF_F_LRO; -#endif -#else -#ifdef NETIF_F_GRO - - /* this is only needed on kernels prior to 2.6.39 */ - netdev->features |= NETIF_F_GRO; -#endif -#endif - - /* set this bit last since it cannot be part of hw_features */ -#ifdef NETIF_F_HW_VLAN_CTAG_FILTER - netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#else - netdev->features |= NETIF_F_HW_VLAN_FILTER; -#endif - -#ifdef HAVE_NETDEV_VLAN_FEATURES - netdev->vlan_features |= NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_SG; - -#endif - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; - - adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); -#ifdef DEBUG - if (adapter->dmac != IGB_DMAC_DISABLE) - printk("%s: DMA Coalescing is enabled..\n", netdev->name); -#endif - - /* before reading the NVM, reset the controller to put the device in a - * known good starting state */ - e1000_reset_hw(hw); - - /* make sure the NVM is good */ - if (e1000_validate_nvm_checksum(hw) < 0) { - dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not" - " Valid\n"); - err = -EIO; - goto err_eeprom; - } - - /* copy the MAC address out of the NVM */ - if (e1000_read_mac_addr(hw)) - dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n"); - memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); -#ifdef ETHTOOL_GPERMADDR - memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); - - if (!is_valid_ether_addr(netdev->perm_addr)) { -#else - if (!is_valid_ether_addr(netdev->dev_addr)) { -#endif - dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n"); - err = -EIO; - goto err_eeprom; - } - - memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len); - adapter->mac_table[0].queue = adapter->vfs_allocated_count; - adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE); - igb_rar_set(adapter, 0); - - /* get firmware version for ethtool -i */ - igb_set_fw_version(adapter); - - /* Check if Media Autosense is enabled */ - if (hw->mac.type == e1000_82580) - igb_init_mas(adapter); -#ifdef HAVE_TIMER_SETUP - timer_setup(&adapter->watchdog_timer, &igb_watchdog, 0); - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - timer_setup(&adapter->dma_err_timer, &igb_dma_err_timer, 0); - timer_setup(&adapter->phy_info_timer, &igb_update_phy_info, 0); -#else - setup_timer(&adapter->watchdog_timer, &igb_watchdog, - (unsigned long) adapter); - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer, - (unsigned long) adapter); - setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, - (unsigned long) adapter); -#endif - - INIT_WORK(&adapter->reset_task, igb_reset_task); - INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - INIT_WORK(&adapter->dma_err_task, igb_dma_err_task); - - /* Initialize link properties that are user-changeable */ - adapter->fc_autoneg = true; - hw->mac.autoneg = true; - hw->phy.autoneg_advertised = 0x2f; - - hw->fc.requested_mode = e1000_fc_default; - hw->fc.current_mode = e1000_fc_default; - - e1000_validate_mdi_setting(hw); - - /* By default, support wake on port A */ - if (hw->bus.func == 0) - adapter->flags |= IGB_FLAG_WOL_SUPPORTED; - - /* Check the NVM for wake support for non-port A ports */ - if (hw->mac.type >= e1000_82580) - hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + - NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, - &eeprom_data); - else if (hw->bus.func == 1) - e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); - - if (eeprom_data & IGB_EEPROM_APME) - adapter->flags |= IGB_FLAG_WOL_SUPPORTED; - - /* now that we have the eeprom settings, apply the special cases where - * the eeprom may be wrong or the board simply won't support wake on - * lan on a particular port */ - switch (pdev->device) { - case E1000_DEV_ID_82575GB_QUAD_COPPER: - adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; - break; - case E1000_DEV_ID_82575EB_FIBER_SERDES: - case E1000_DEV_ID_82576_FIBER: - case E1000_DEV_ID_82576_SERDES: - /* Wake events only supported on port A for dual fiber - * regardless of eeprom setting */ - if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1) - adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; - break; - case E1000_DEV_ID_82576_QUAD_COPPER: - case E1000_DEV_ID_82576_QUAD_COPPER_ET2: - /* if quad port adapter, disable WoL on all but port A */ - if (global_quad_port_a != 0) - adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; - else - adapter->flags |= IGB_FLAG_QUAD_PORT_A; - /* Reset for multiple quad port adapters */ - if (++global_quad_port_a == 4) - global_quad_port_a = 0; - break; - default: - /* If the device can't wake, don't set software support */ - if (!device_can_wakeup(&adapter->pdev->dev)) - adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; - break; - } - - /* initialize the wol settings based on the eeprom settings */ - if (adapter->flags & IGB_FLAG_WOL_SUPPORTED) - adapter->wol |= E1000_WUFC_MAG; - - /* Some vendors want WoL disabled by default, but still supported */ - if ((hw->mac.type == e1000_i350) && - (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) { - adapter->flags |= IGB_FLAG_WOL_SUPPORTED; - adapter->wol = 0; - } - - device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), - adapter->flags & IGB_FLAG_WOL_SUPPORTED); - - /* reset the hardware with the new settings */ - igb_reset(adapter); - adapter->devrc = 0; - -#ifdef HAVE_I2C_SUPPORT - /* Init the I2C interface */ - err = igb_init_i2c(adapter); - if (err) { - dev_err(&pdev->dev, "failed to init i2c interface\n"); - goto err_eeprom; - } -#endif /* HAVE_I2C_SUPPORT */ - - /* let the f/w know that the h/w is now under the control of the - * driver. */ - igb_get_hw_control(adapter); - - strncpy(netdev->name, "eth%d", IFNAMSIZ); - err = register_netdev(netdev); - if (err) - goto err_register; - -#ifdef CONFIG_IGB_VMDQ_NETDEV - err = igb_init_vmdq_netdevs(adapter); - if (err) - goto err_register; -#endif - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -#ifdef IGB_DCA - if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) { - adapter->flags |= IGB_FLAG_DCA_ENABLED; - dev_info(pci_dev_to_dev(pdev), "DCA enabled\n"); - igb_setup_dca(adapter); - } - -#endif -#ifdef HAVE_PTP_1588_CLOCK - /* do hw tstamp init after resetting */ - igb_ptp_init(adapter); -#endif /* HAVE_PTP_1588_CLOCK */ - - dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n"); - /* print bus type/speed/width info */ - dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ", - netdev->name, - ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" : - (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" : - (hw->mac.type == e1000_i354) ? "integrated" : - "unknown"), - ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : - (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : - (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : - (hw->mac.type == e1000_i354) ? "integrated" : - "unknown")); - dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name); - for (i = 0; i < 6; i++) - printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); - - ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH); - if (ret_val) - strncpy(pba_str, "Unknown", sizeof(pba_str) - 1); - dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name, - pba_str); - - - /* Initialize the thermal sensor on i350 devices. */ - if (hw->mac.type == e1000_i350) { - if (hw->bus.func == 0) { - u16 ets_word; - - /* - * Read the NVM to determine if this i350 device - * supports an external thermal sensor. - */ - e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word); - if (ets_word != 0x0000 && ets_word != 0xFFFF) - adapter->ets = true; - else - adapter->ets = false; - } -#ifdef IGB_HWMON - - igb_sysfs_init(adapter); -#else -#ifdef IGB_PROCFS - - igb_procfs_init(adapter); -#endif /* IGB_PROCFS */ -#endif /* IGB_HWMON */ - } else { - adapter->ets = false; - } - - if (hw->phy.media_type == e1000_media_type_copper) { - switch (hw->mac.type) { - case e1000_i350: - case e1000_i210: - case e1000_i211: - /* Enable EEE for internal copper PHY devices */ - err = e1000_set_eee_i350(hw); - if (!err && - (adapter->flags & IGB_FLAG_EEE)) - adapter->eee_advert = - MDIO_EEE_100TX | MDIO_EEE_1000T; - break; - case e1000_i354: - if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) & - (E1000_CTRL_EXT_LINK_MODE_SGMII)) { - err = e1000_set_eee_i354(hw); - if ((!err) && - (adapter->flags & IGB_FLAG_EEE)) - adapter->eee_advert = - MDIO_EEE_100TX | MDIO_EEE_1000T; - } - break; - default: - break; - } - } - - /* send driver version info to firmware */ - if (hw->mac.type >= e1000_i350) - igb_init_fw(adapter); - -#ifndef IGB_NO_LRO - if (netdev->features & NETIF_F_LRO) - dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n"); - else - dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n"); -#endif - dev_info(pci_dev_to_dev(pdev), - "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", - adapter->msix_entries ? "MSI-X" : - (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", - adapter->num_rx_queues, adapter->num_tx_queues); - - cards_found++; - - pm_runtime_put_noidle(&pdev->dev); - return 0; - -err_register: - igb_release_hw_control(adapter); -#ifdef HAVE_I2C_SUPPORT - memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); -#endif /* HAVE_I2C_SUPPORT */ -err_eeprom: - if (!e1000_check_reset_block(hw)) - e1000_phy_hw_reset(hw); - - if (hw->flash_address) - iounmap(hw->flash_address); -err_sw_init: - igb_clear_interrupt_scheme(adapter); - igb_reset_sriov_capability(adapter); - iounmap(hw->hw_addr); -err_ioremap: - free_netdev(netdev); -err_alloc_etherdev: - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); -err_pci_reg: -err_dma: - pci_disable_device(pdev); - return err; -} -#ifdef HAVE_I2C_SUPPORT -/* - * igb_remove_i2c - Cleanup I2C interface - * @adapter: pointer to adapter structure - * - */ -static void igb_remove_i2c(struct igb_adapter *adapter) -{ - - /* free the adapter bus structure */ - i2c_del_adapter(&adapter->i2c_adap); -} -#endif /* HAVE_I2C_SUPPORT */ - -/** - * igb_remove - Device Removal Routine - * @pdev: PCI device information struct - * - * igb_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. The could be caused by a - * Hot-Plug event, or because the driver is going to be removed from - * memory. - **/ -static void __devexit igb_remove(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - - pm_runtime_get_noresume(&pdev->dev); -#ifdef HAVE_I2C_SUPPORT - igb_remove_i2c(adapter); -#endif /* HAVE_I2C_SUPPORT */ -#ifdef HAVE_PTP_1588_CLOCK - igb_ptp_stop(adapter); -#endif /* HAVE_PTP_1588_CLOCK */ - - /* flush_scheduled work may reschedule our watchdog task, so - * explicitly disable watchdog tasks from being rescheduled */ - set_bit(__IGB_DOWN, &adapter->state); - del_timer_sync(&adapter->watchdog_timer); - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - del_timer_sync(&adapter->dma_err_timer); - del_timer_sync(&adapter->phy_info_timer); - - flush_scheduled_work(); - -#ifdef IGB_DCA - if (adapter->flags & IGB_FLAG_DCA_ENABLED) { - dev_info(pci_dev_to_dev(pdev), "DCA disabled\n"); - dca_remove_requester(&pdev->dev); - adapter->flags &= ~IGB_FLAG_DCA_ENABLED; - E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE); - } -#endif - - /* Release control of h/w to f/w. If f/w is AMT enabled, this - * would have already happened in close and is redundant. */ - igb_release_hw_control(adapter); - - unregister_netdev(netdev); -#ifdef CONFIG_IGB_VMDQ_NETDEV - igb_remove_vmdq_netdevs(adapter); -#endif - - igb_clear_interrupt_scheme(adapter); - igb_reset_sriov_capability(adapter); - - iounmap(hw->hw_addr); - if (hw->flash_address) - iounmap(hw->flash_address); - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); - -#ifdef IGB_HWMON - igb_sysfs_exit(adapter); -#else -#ifdef IGB_PROCFS - igb_procfs_exit(adapter); -#endif /* IGB_PROCFS */ -#endif /* IGB_HWMON */ - kfree(adapter->mac_table); - kfree(adapter->shadow_vfta); - free_netdev(netdev); - - pci_disable_pcie_error_reporting(pdev); - - pci_disable_device(pdev); -} - -/** - * igb_sw_init - Initialize general software structures (struct igb_adapter) - * @adapter: board private structure to initialize - * - * igb_sw_init initializes the Adapter private data structure. - * Fields are initialized based on PCI device information and - * OS network device settings (MTU size). - **/ -static int igb_sw_init(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - struct pci_dev *pdev = adapter->pdev; - - /* PCI config space info */ - - hw->vendor_id = pdev->vendor; - hw->device_id = pdev->device; - hw->subsystem_vendor_id = pdev->subsystem_vendor; - hw->subsystem_device_id = pdev->subsystem_device; - - pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); - - pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); - - /* set default ring sizes */ - adapter->tx_ring_count = IGB_DEFAULT_TXD; - adapter->rx_ring_count = IGB_DEFAULT_RXD; - - /* set default work limits */ - adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; - - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + - VLAN_HLEN; - - /* Initialize the hardware-specific values */ - if (e1000_setup_init_funcs(hw, TRUE)) { - dev_err(pci_dev_to_dev(pdev), "Hardware Initialization Failure\n"); - return -EIO; - } - - adapter->mac_table = kzalloc(sizeof(struct igb_mac_addr) * - hw->mac.rar_entry_count, - GFP_ATOMIC); - - /* Setup and initialize a copy of the hw vlan table array */ - adapter->shadow_vfta = kzalloc(sizeof(u32) * E1000_VFTA_ENTRIES, - GFP_ATOMIC); -#ifdef NO_KNI - /* These calls may decrease the number of queues */ - if (hw->mac.type < e1000_i210) { - igb_set_sriov_capability(adapter); - } - - if (igb_init_interrupt_scheme(adapter, true)) { - dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n"); - return -ENOMEM; - } - - /* Explicitly disable IRQ since the NIC can be in any state. */ - igb_irq_disable(adapter); - - set_bit(__IGB_DOWN, &adapter->state); -#endif - return 0; -} - -/** - * igb_open - Called when a network interface is made active - * @netdev: network interface device structure - * - * Returns 0 on success, negative value on failure - * - * The open entry point is called when a network interface is made - * active by the system (IFF_UP). At this point all resources needed - * for transmit and receive operations are allocated, the interrupt - * handler is registered with the OS, the watchdog timer is started, - * and the stack is notified that the interface is ready. - **/ -static int __igb_open(struct net_device *netdev, bool resuming) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; -#ifdef CONFIG_PM_RUNTIME - struct pci_dev *pdev = adapter->pdev; -#endif /* CONFIG_PM_RUNTIME */ - int err; - int i; - - /* disallow open during test */ - if (test_bit(__IGB_TESTING, &adapter->state)) { - WARN_ON(resuming); - return -EBUSY; - } - -#ifdef CONFIG_PM_RUNTIME - if (!resuming) - pm_runtime_get_sync(&pdev->dev); -#endif /* CONFIG_PM_RUNTIME */ - - netif_carrier_off(netdev); - - /* allocate transmit descriptors */ - err = igb_setup_all_tx_resources(adapter); - if (err) - goto err_setup_tx; - - /* allocate receive descriptors */ - err = igb_setup_all_rx_resources(adapter); - if (err) - goto err_setup_rx; - - igb_power_up_link(adapter); - - /* before we allocate an interrupt, we must be ready to handle it. - * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt - * as soon as we call pci_request_irq, so we have to setup our - * clean_rx handler before we do so. */ - igb_configure(adapter); - - err = igb_request_irq(adapter); - if (err) - goto err_req_irq; - - /* Notify the stack of the actual queue counts. */ - netif_set_real_num_tx_queues(netdev, - adapter->vmdq_pools ? 1 : - adapter->num_tx_queues); - - err = netif_set_real_num_rx_queues(netdev, - adapter->vmdq_pools ? 1 : - adapter->num_rx_queues); - if (err) - goto err_set_queues; - - /* From here on the code is the same as igb_up() */ - clear_bit(__IGB_DOWN, &adapter->state); - - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&(adapter->q_vector[i]->napi)); - igb_configure_lli(adapter); - - /* Clear any pending interrupts. */ - E1000_READ_REG(hw, E1000_ICR); - - igb_irq_enable(adapter); - - /* notify VFs that reset has been completed */ - if (adapter->vfs_allocated_count) { - u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT); - reg_data |= E1000_CTRL_EXT_PFRSTD; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data); - } - - netif_tx_start_all_queues(netdev); - - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - schedule_work(&adapter->dma_err_task); - - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); - - return E1000_SUCCESS; - -err_set_queues: - igb_free_irq(adapter); -err_req_irq: - igb_release_hw_control(adapter); - igb_power_down_link(adapter); - igb_free_all_rx_resources(adapter); -err_setup_rx: - igb_free_all_tx_resources(adapter); -err_setup_tx: - igb_reset(adapter); - -#ifdef CONFIG_PM_RUNTIME - if (!resuming) - pm_runtime_put(&pdev->dev); -#endif /* CONFIG_PM_RUNTIME */ - - return err; -} - -static int igb_open(struct net_device *netdev) -{ - return __igb_open(netdev, false); -} - -/** - * igb_close - Disables a network interface - * @netdev: network interface device structure - * - * Returns 0, this is not allowed to fail - * - * The close entry point is called when an interface is de-activated - * by the OS. The hardware is still under the driver's control, but - * needs to be disabled. A global MAC reset is issued to stop the - * hardware, and all transmit and receive resources are freed. - **/ -static int __igb_close(struct net_device *netdev, bool suspending) -{ - struct igb_adapter *adapter = netdev_priv(netdev); -#ifdef CONFIG_PM_RUNTIME - struct pci_dev *pdev = adapter->pdev; -#endif /* CONFIG_PM_RUNTIME */ - - WARN_ON(test_bit(__IGB_RESETTING, &adapter->state)); - -#ifdef CONFIG_PM_RUNTIME - if (!suspending) - pm_runtime_get_sync(&pdev->dev); -#endif /* CONFIG_PM_RUNTIME */ - - igb_down(adapter); - - igb_release_hw_control(adapter); - - igb_free_irq(adapter); - - igb_free_all_tx_resources(adapter); - igb_free_all_rx_resources(adapter); - -#ifdef CONFIG_PM_RUNTIME - if (!suspending) - pm_runtime_put_sync(&pdev->dev); -#endif /* CONFIG_PM_RUNTIME */ - - return 0; -} - -static int igb_close(struct net_device *netdev) -{ - return __igb_close(netdev, false); -} - -/** - * igb_setup_tx_resources - allocate Tx resources (Descriptors) - * @tx_ring: tx descriptor ring (for a specific queue) to setup - * - * Return 0 on success, negative on failure - **/ -int igb_setup_tx_resources(struct igb_ring *tx_ring) -{ - struct device *dev = tx_ring->dev; - int size; - - size = sizeof(struct igb_tx_buffer) * tx_ring->count; - tx_ring->tx_buffer_info = vzalloc(size); - if (!tx_ring->tx_buffer_info) - goto err; - - /* round up to nearest 4K */ - tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); - tx_ring->size = ALIGN(tx_ring->size, 4096); - - tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, - &tx_ring->dma, GFP_KERNEL); - - if (!tx_ring->desc) - goto err; - - tx_ring->next_to_use = 0; - tx_ring->next_to_clean = 0; - - return 0; - -err: - vfree(tx_ring->tx_buffer_info); - dev_err(dev, - "Unable to allocate memory for the transmit descriptor ring\n"); - return -ENOMEM; -} - -/** - * igb_setup_all_tx_resources - wrapper to allocate Tx resources - * (Descriptors) for all queues - * @adapter: board private structure - * - * Return 0 on success, negative on failure - **/ -static int igb_setup_all_tx_resources(struct igb_adapter *adapter) -{ - struct pci_dev *pdev = adapter->pdev; - int i, err = 0; - - for (i = 0; i < adapter->num_tx_queues; i++) { - err = igb_setup_tx_resources(adapter->tx_ring[i]); - if (err) { - dev_err(pci_dev_to_dev(pdev), - "Allocation for Tx Queue %u failed\n", i); - for (i--; i >= 0; i--) - igb_free_tx_resources(adapter->tx_ring[i]); - break; - } - } - - return err; -} - -/** - * igb_setup_tctl - configure the transmit control registers - * @adapter: Board private structure - **/ -void igb_setup_tctl(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 tctl; - - /* disable queue 0 which is enabled by default on 82575 and 82576 */ - E1000_WRITE_REG(hw, E1000_TXDCTL(0), 0); - - /* Program the Transmit Control Register */ - tctl = E1000_READ_REG(hw, E1000_TCTL); - tctl &= ~E1000_TCTL_CT; - tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | - (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); - - e1000_config_collision_dist(hw); - - /* Enable transmits */ - tctl |= E1000_TCTL_EN; - - E1000_WRITE_REG(hw, E1000_TCTL, tctl); -} - -static u32 igb_tx_wthresh(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - switch (hw->mac.type) { - case e1000_i354: - return 4; - case e1000_82576: - if (adapter->msix_entries) - return 1; - default: - break; - } - - return 16; -} - -/** - * igb_configure_tx_ring - Configure transmit ring after Reset - * @adapter: board private structure - * @ring: tx ring to configure - * - * Configure a transmit ring after a reset. - **/ -void igb_configure_tx_ring(struct igb_adapter *adapter, - struct igb_ring *ring) -{ - struct e1000_hw *hw = &adapter->hw; - u32 txdctl = 0; - u64 tdba = ring->dma; - int reg_idx = ring->reg_idx; - - /* disable the queue */ - E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), 0); - E1000_WRITE_FLUSH(hw); - mdelay(10); - - E1000_WRITE_REG(hw, E1000_TDLEN(reg_idx), - ring->count * sizeof(union e1000_adv_tx_desc)); - E1000_WRITE_REG(hw, E1000_TDBAL(reg_idx), - tdba & 0x00000000ffffffffULL); - E1000_WRITE_REG(hw, E1000_TDBAH(reg_idx), tdba >> 32); - - ring->tail = hw->hw_addr + E1000_TDT(reg_idx); - E1000_WRITE_REG(hw, E1000_TDH(reg_idx), 0); - writel(0, ring->tail); - - txdctl |= IGB_TX_PTHRESH; - txdctl |= IGB_TX_HTHRESH << 8; - txdctl |= igb_tx_wthresh(adapter) << 16; - - txdctl |= E1000_TXDCTL_QUEUE_ENABLE; - E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), txdctl); -} - -/** - * igb_configure_tx - Configure transmit Unit after Reset - * @adapter: board private structure - * - * Configure the Tx unit of the MAC after a reset. - **/ -static void igb_configure_tx(struct igb_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) - igb_configure_tx_ring(adapter, adapter->tx_ring[i]); -} - -/** - * igb_setup_rx_resources - allocate Rx resources (Descriptors) - * @rx_ring: rx descriptor ring (for a specific queue) to setup - * - * Returns 0 on success, negative on failure - **/ -int igb_setup_rx_resources(struct igb_ring *rx_ring) -{ - struct device *dev = rx_ring->dev; - int size, desc_len; - - size = sizeof(struct igb_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vzalloc(size); - if (!rx_ring->rx_buffer_info) - goto err; - - desc_len = sizeof(union e1000_adv_rx_desc); - - /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * desc_len; - rx_ring->size = ALIGN(rx_ring->size, 4096); - - rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, - &rx_ring->dma, GFP_KERNEL); - - if (!rx_ring->desc) - goto err; - - rx_ring->next_to_alloc = 0; - rx_ring->next_to_clean = 0; - rx_ring->next_to_use = 0; - - return 0; - -err: - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; - dev_err(dev, "Unable to allocate memory for the receive descriptor" - " ring\n"); - return -ENOMEM; -} - -/** - * igb_setup_all_rx_resources - wrapper to allocate Rx resources - * (Descriptors) for all queues - * @adapter: board private structure - * - * Return 0 on success, negative on failure - **/ -static int igb_setup_all_rx_resources(struct igb_adapter *adapter) -{ - struct pci_dev *pdev = adapter->pdev; - int i, err = 0; - - for (i = 0; i < adapter->num_rx_queues; i++) { - err = igb_setup_rx_resources(adapter->rx_ring[i]); - if (err) { - dev_err(pci_dev_to_dev(pdev), - "Allocation for Rx Queue %u failed\n", i); - for (i--; i >= 0; i--) - igb_free_rx_resources(adapter->rx_ring[i]); - break; - } - } - - return err; -} - -/** - * igb_setup_mrqc - configure the multiple receive queue control registers - * @adapter: Board private structure - **/ -static void igb_setup_mrqc(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 mrqc, rxcsum; - u32 j, num_rx_queues, shift = 0, shift2 = 0; - static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741, - 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE, - 0xA32DCB77, 0x0CF23080, 0x3BB7426A, - 0xFA01ACBE }; - - /* Fill out hash function seeds */ - for (j = 0; j < 10; j++) - E1000_WRITE_REG(hw, E1000_RSSRK(j), rsskey[j]); - - num_rx_queues = adapter->rss_queues; - - /* 82575 and 82576 supports 2 RSS queues for VMDq */ - switch (hw->mac.type) { - case e1000_82575: - if (adapter->vmdq_pools) { - shift = 2; - shift2 = 6; - break; - } - shift = 6; - break; - case e1000_82576: - /* 82576 supports 2 RSS queues for SR-IOV */ - if (adapter->vfs_allocated_count || adapter->vmdq_pools) { - shift = 3; - num_rx_queues = 2; - } - break; - default: - break; - } - - /* - * Populate the redirection table 4 entries at a time. To do this - * we are generating the results for n and n+2 and then interleaving - * those with the results with n+1 and n+3. - */ - for (j = 0; j < 32; j++) { - /* first pass generates n and n+2 */ - u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues; - u32 reta = (base & 0x07800780) >> (7 - shift); - - /* second pass generates n+1 and n+3 */ - base += 0x00010001 * num_rx_queues; - reta |= (base & 0x07800780) << (1 + shift); - - /* generate 2nd table for 82575 based parts */ - if (shift2) - reta |= (0x01010101 * num_rx_queues) << shift2; - - E1000_WRITE_REG(hw, E1000_RETA(j), reta); - } - - /* - * Disable raw packet checksumming so that RSS hash is placed in - * descriptor on writeback. No need to enable TCP/UDP/IP checksum - * offloads as they are enabled by default - */ - rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); - rxcsum |= E1000_RXCSUM_PCSD; - - if (adapter->hw.mac.type >= e1000_82576) - /* Enable Receive Checksum Offload for SCTP */ - rxcsum |= E1000_RXCSUM_CRCOFL; - - /* Don't need to set TUOFL or IPOFL, they default to 1 */ - E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); - - /* Generate RSS hash based on packet types, TCP/UDP - * port numbers and/or IPv4/v6 src and dst addresses - */ - mrqc = E1000_MRQC_RSS_FIELD_IPV4 | - E1000_MRQC_RSS_FIELD_IPV4_TCP | - E1000_MRQC_RSS_FIELD_IPV6 | - E1000_MRQC_RSS_FIELD_IPV6_TCP | - E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; - - if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) - mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP; - if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) - mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP; - - /* If VMDq is enabled then we set the appropriate mode for that, else - * we default to RSS so that an RSS hash is calculated per packet even - * if we are only using one queue */ - if (adapter->vfs_allocated_count || adapter->vmdq_pools) { - if (hw->mac.type > e1000_82575) { - /* Set the default pool for the PF's first queue */ - u32 vtctl = E1000_READ_REG(hw, E1000_VT_CTL); - vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | - E1000_VT_CTL_DISABLE_DEF_POOL); - vtctl |= adapter->vfs_allocated_count << - E1000_VT_CTL_DEFAULT_POOL_SHIFT; - E1000_WRITE_REG(hw, E1000_VT_CTL, vtctl); - } else if (adapter->rss_queues > 1) { - /* set default queue for pool 1 to queue 2 */ - E1000_WRITE_REG(hw, E1000_VT_CTL, - adapter->rss_queues << 7); - } - if (adapter->rss_queues > 1) - mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q; - else - mrqc |= E1000_MRQC_ENABLE_VMDQ; - } else { - mrqc |= E1000_MRQC_ENABLE_RSS_4Q; - } - igb_vmm_control(adapter); - - E1000_WRITE_REG(hw, E1000_MRQC, mrqc); -} - -/** - * igb_setup_rctl - configure the receive control registers - * @adapter: Board private structure - **/ -void igb_setup_rctl(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 rctl; - - rctl = E1000_READ_REG(hw, E1000_RCTL); - - rctl &= ~(3 << E1000_RCTL_MO_SHIFT); - rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); - - rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | - (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); - - /* - * enable stripping of CRC. It's unlikely this will break BMC - * redirection as it did with e1000. Newer features require - * that the HW strips the CRC. - */ - rctl |= E1000_RCTL_SECRC; - - /* disable store bad packets and clear size bits. */ - rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); - - /* enable LPE to prevent packets larger than max_frame_size */ - rctl |= E1000_RCTL_LPE; - - /* disable queue 0 to prevent tail write w/o re-config */ - E1000_WRITE_REG(hw, E1000_RXDCTL(0), 0); - - /* Attention!!! For SR-IOV PF driver operations you must enable - * queue drop for all VF and PF queues to prevent head of line blocking - * if an un-trusted VF does not provide descriptors to hardware. - */ - if (adapter->vfs_allocated_count) { - /* set all queue drop enable bits */ - E1000_WRITE_REG(hw, E1000_QDE, ALL_QUEUES); - } - - E1000_WRITE_REG(hw, E1000_RCTL, rctl); -} - -static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, - int vfn) -{ - struct e1000_hw *hw = &adapter->hw; - u32 vmolr; - - /* if it isn't the PF check to see if VFs are enabled and - * increase the size to support vlan tags */ - if (vfn < adapter->vfs_allocated_count && - adapter->vf_data[vfn].vlans_enabled) - size += VLAN_HLEN; - -#ifdef CONFIG_IGB_VMDQ_NETDEV - if (vfn >= adapter->vfs_allocated_count) { - int queue = vfn - adapter->vfs_allocated_count; - struct igb_vmdq_adapter *vadapter; - - vadapter = netdev_priv(adapter->vmdq_netdev[queue-1]); - if (vadapter->vlgrp) - size += VLAN_HLEN; - } -#endif - vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn)); - vmolr &= ~E1000_VMOLR_RLPML_MASK; - vmolr |= size | E1000_VMOLR_LPE; - E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr); - - return 0; -} - -/** - * igb_rlpml_set - set maximum receive packet size - * @adapter: board private structure - * - * Configure maximum receivable packet size. - **/ -static void igb_rlpml_set(struct igb_adapter *adapter) -{ - u32 max_frame_size = adapter->max_frame_size; - struct e1000_hw *hw = &adapter->hw; - u16 pf_id = adapter->vfs_allocated_count; - - if (adapter->vmdq_pools && hw->mac.type != e1000_82575) { - int i; - for (i = 0; i < adapter->vmdq_pools; i++) - igb_set_vf_rlpml(adapter, max_frame_size, pf_id + i); - /* - * If we're in VMDQ or SR-IOV mode, then set global RLPML - * to our max jumbo frame size, in case we need to enable - * jumbo frames on one of the rings later. - * This will not pass over-length frames into the default - * queue because it's gated by the VMOLR.RLPML. - */ - max_frame_size = MAX_JUMBO_FRAME_SIZE; - } - /* Set VF RLPML for the PF device. */ - if (adapter->vfs_allocated_count) - igb_set_vf_rlpml(adapter, max_frame_size, pf_id); - - E1000_WRITE_REG(hw, E1000_RLPML, max_frame_size); -} - -static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter, - int vfn, bool enable) -{ - struct e1000_hw *hw = &adapter->hw; - u32 val; - void __iomem *reg; - - if (hw->mac.type < e1000_82576) - return; - - if (hw->mac.type == e1000_i350) - reg = hw->hw_addr + E1000_DVMOLR(vfn); - else - reg = hw->hw_addr + E1000_VMOLR(vfn); - - val = readl(reg); - if (enable) - val |= E1000_VMOLR_STRVLAN; - else - val &= ~(E1000_VMOLR_STRVLAN); - writel(val, reg); -} -static inline void igb_set_vmolr(struct igb_adapter *adapter, - int vfn, bool aupe) -{ - struct e1000_hw *hw = &adapter->hw; - u32 vmolr; - - /* - * This register exists only on 82576 and newer so if we are older then - * we should exit and do nothing - */ - if (hw->mac.type < e1000_82576) - return; - - vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn)); - - if (aupe) - vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ - else - vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ - - /* clear all bits that might not be set */ - vmolr &= ~E1000_VMOLR_RSSE; - - if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count) - vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ - - vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ - vmolr |= E1000_VMOLR_LPE; /* Accept long packets */ - - E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr); -} - -/** - * igb_configure_rx_ring - Configure a receive ring after Reset - * @adapter: board private structure - * @ring: receive ring to be configured - * - * Configure the Rx unit of the MAC after a reset. - **/ -void igb_configure_rx_ring(struct igb_adapter *adapter, - struct igb_ring *ring) -{ - struct e1000_hw *hw = &adapter->hw; - u64 rdba = ring->dma; - int reg_idx = ring->reg_idx; - u32 srrctl = 0, rxdctl = 0; - -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - /* - * RLPML prevents us from receiving a frame larger than max_frame so - * it is safe to just set the rx_buffer_len to max_frame without the - * risk of an skb over panic. - */ - ring->rx_buffer_len = max_t(u32, adapter->max_frame_size, - MAXIMUM_ETHERNET_VLAN_SIZE); - -#endif - /* disable the queue */ - E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), 0); - - /* Set DMA base address registers */ - E1000_WRITE_REG(hw, E1000_RDBAL(reg_idx), - rdba & 0x00000000ffffffffULL); - E1000_WRITE_REG(hw, E1000_RDBAH(reg_idx), rdba >> 32); - E1000_WRITE_REG(hw, E1000_RDLEN(reg_idx), - ring->count * sizeof(union e1000_adv_rx_desc)); - - /* initialize head and tail */ - ring->tail = hw->hw_addr + E1000_RDT(reg_idx); - E1000_WRITE_REG(hw, E1000_RDH(reg_idx), 0); - writel(0, ring->tail); - - /* reset next-to- use/clean to place SW in sync with hardwdare */ - ring->next_to_clean = 0; - ring->next_to_use = 0; -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT - ring->next_to_alloc = 0; - -#endif - /* set descriptor configuration */ -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT - srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT; -#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ - srrctl = ALIGN(ring->rx_buffer_len, 1024) >> - E1000_SRRCTL_BSIZEPKT_SHIFT; -#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ - srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; -#ifdef HAVE_PTP_1588_CLOCK - if (hw->mac.type >= e1000_82580) - srrctl |= E1000_SRRCTL_TIMESTAMP; -#endif /* HAVE_PTP_1588_CLOCK */ - /* - * We should set the drop enable bit if: - * SR-IOV is enabled - * or - * Flow Control is disabled and number of RX queues > 1 - * - * This allows us to avoid head of line blocking for security - * and performance reasons. - */ - if (adapter->vfs_allocated_count || - (adapter->num_rx_queues > 1 && - (hw->fc.requested_mode == e1000_fc_none || - hw->fc.requested_mode == e1000_fc_rx_pause))) - srrctl |= E1000_SRRCTL_DROP_EN; - - E1000_WRITE_REG(hw, E1000_SRRCTL(reg_idx), srrctl); - - /* set filtering for VMDQ pools */ - igb_set_vmolr(adapter, reg_idx & 0x7, true); - - rxdctl |= IGB_RX_PTHRESH; - rxdctl |= IGB_RX_HTHRESH << 8; - rxdctl |= IGB_RX_WTHRESH << 16; - - /* enable receive descriptor fetching */ - rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; - E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), rxdctl); -} - -/** - * igb_configure_rx - Configure receive Unit after Reset - * @adapter: board private structure - * - * Configure the Rx unit of the MAC after a reset. - **/ -static void igb_configure_rx(struct igb_adapter *adapter) -{ - int i; - - /* set UTA to appropriate mode */ - igb_set_uta(adapter); - - igb_full_sync_mac_table(adapter); - /* Setup the HW Rx Head and Tail Descriptor Pointers and - * the Base and Length of the Rx Descriptor Ring */ - for (i = 0; i < adapter->num_rx_queues; i++) - igb_configure_rx_ring(adapter, adapter->rx_ring[i]); -} - -/** - * igb_free_tx_resources - Free Tx Resources per Queue - * @tx_ring: Tx descriptor ring for a specific queue - * - * Free all transmit software resources - **/ -void igb_free_tx_resources(struct igb_ring *tx_ring) -{ - igb_clean_tx_ring(tx_ring); - - vfree(tx_ring->tx_buffer_info); - tx_ring->tx_buffer_info = NULL; - - /* if not set, then don't free */ - if (!tx_ring->desc) - return; - - dma_free_coherent(tx_ring->dev, tx_ring->size, - tx_ring->desc, tx_ring->dma); - - tx_ring->desc = NULL; -} - -/** - * igb_free_all_tx_resources - Free Tx Resources for All Queues - * @adapter: board private structure - * - * Free all transmit software resources - **/ -static void igb_free_all_tx_resources(struct igb_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) - igb_free_tx_resources(adapter->tx_ring[i]); -} - -void igb_unmap_and_free_tx_resource(struct igb_ring *ring, - struct igb_tx_buffer *tx_buffer) -{ - if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } else if (dma_unmap_len(tx_buffer, len)) { - dma_unmap_page(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } - tx_buffer->next_to_watch = NULL; - tx_buffer->skb = NULL; - dma_unmap_len_set(tx_buffer, len, 0); - /* buffer_info must be completely set up in the transmit path */ -} - -/** - * igb_clean_tx_ring - Free Tx Buffers - * @tx_ring: ring to be cleaned - **/ -static void igb_clean_tx_ring(struct igb_ring *tx_ring) -{ - struct igb_tx_buffer *buffer_info; - unsigned long size; - u16 i; - - if (!tx_ring->tx_buffer_info) - return; - /* Free all the Tx ring sk_buffs */ - - for (i = 0; i < tx_ring->count; i++) { - buffer_info = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, buffer_info); - } - - netdev_tx_reset_queue(txring_txq(tx_ring)); - - size = sizeof(struct igb_tx_buffer) * tx_ring->count; - memset(tx_ring->tx_buffer_info, 0, size); - - /* Zero out the descriptor ring */ - memset(tx_ring->desc, 0, tx_ring->size); - - tx_ring->next_to_use = 0; - tx_ring->next_to_clean = 0; -} - -/** - * igb_clean_all_tx_rings - Free Tx Buffers for all queues - * @adapter: board private structure - **/ -static void igb_clean_all_tx_rings(struct igb_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) - igb_clean_tx_ring(adapter->tx_ring[i]); -} - -/** - * igb_free_rx_resources - Free Rx Resources - * @rx_ring: ring to clean the resources from - * - * Free all receive software resources - **/ -void igb_free_rx_resources(struct igb_ring *rx_ring) -{ - igb_clean_rx_ring(rx_ring); - - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; - - /* if not set, then don't free */ - if (!rx_ring->desc) - return; - - dma_free_coherent(rx_ring->dev, rx_ring->size, - rx_ring->desc, rx_ring->dma); - - rx_ring->desc = NULL; -} - -/** - * igb_free_all_rx_resources - Free Rx Resources for All Queues - * @adapter: board private structure - * - * Free all receive software resources - **/ -static void igb_free_all_rx_resources(struct igb_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_rx_queues; i++) - igb_free_rx_resources(adapter->rx_ring[i]); -} - -/** - * igb_clean_rx_ring - Free Rx Buffers per Queue - * @rx_ring: ring to free buffers from - **/ -void igb_clean_rx_ring(struct igb_ring *rx_ring) -{ - unsigned long size; - u16 i; - - if (!rx_ring->rx_buffer_info) - return; - -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT - if (rx_ring->skb) - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - -#endif - /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - if (buffer_info->dma) { - dma_unmap_single(rx_ring->dev, - buffer_info->dma, - rx_ring->rx_buffer_len, - DMA_FROM_DEVICE); - buffer_info->dma = 0; - } - - if (buffer_info->skb) { - dev_kfree_skb(buffer_info->skb); - buffer_info->skb = NULL; - } -#else - if (!buffer_info->page) - continue; - - dma_unmap_page(rx_ring->dev, - buffer_info->dma, - PAGE_SIZE, - DMA_FROM_DEVICE); - __free_page(buffer_info->page); - - buffer_info->page = NULL; -#endif - } - - size = sizeof(struct igb_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_buffer_info, 0, size); - - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); - - rx_ring->next_to_alloc = 0; - rx_ring->next_to_clean = 0; - rx_ring->next_to_use = 0; -} - -/** - * igb_clean_all_rx_rings - Free Rx Buffers for all queues - * @adapter: board private structure - **/ -static void igb_clean_all_rx_rings(struct igb_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_rx_queues; i++) - igb_clean_rx_ring(adapter->rx_ring[i]); -} - -/** - * igb_set_mac - Change the Ethernet Address of the NIC - * @netdev: network interface device structure - * @p: pointer to an address structure - * - * Returns 0 on success, negative on failure - **/ -static int igb_set_mac(struct net_device *netdev, void *p) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct sockaddr *addr = p; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - igb_del_mac_filter(adapter, hw->mac.addr, - adapter->vfs_allocated_count); - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - - /* set the correct pool for the new PF MAC address in entry 0 */ - return igb_add_mac_filter(adapter, hw->mac.addr, - adapter->vfs_allocated_count); -} - -/** - * igb_write_mc_addr_list - write multicast addresses to MTA - * @netdev: network interface device structure - * - * Writes multicast address list to the MTA hash table. - * Returns: -ENOMEM on failure - * 0 on no addresses written - * X on writing X addresses to MTA - **/ -int igb_write_mc_addr_list(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; -#ifdef NETDEV_HW_ADDR_T_MULTICAST - struct netdev_hw_addr *ha; -#else - struct dev_mc_list *ha; -#endif - u8 *mta_list; - int i, count; -#ifdef CONFIG_IGB_VMDQ_NETDEV - int vm; -#endif - count = netdev_mc_count(netdev); -#ifdef CONFIG_IGB_VMDQ_NETDEV - for (vm = 1; vm < adapter->vmdq_pools; vm++) { - if (!adapter->vmdq_netdev[vm]) - break; - if (!netif_running(adapter->vmdq_netdev[vm])) - continue; - count += netdev_mc_count(adapter->vmdq_netdev[vm]); - } -#endif - - if (!count) { - e1000_update_mc_addr_list(hw, NULL, 0); - return 0; - } - mta_list = kzalloc(count * 6, GFP_ATOMIC); - if (!mta_list) - return -ENOMEM; - - /* The shared function expects a packed array of only addresses. */ - i = 0; - netdev_for_each_mc_addr(ha, netdev) -#ifdef NETDEV_HW_ADDR_T_MULTICAST - memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); -#else - memcpy(mta_list + (i++ * ETH_ALEN), ha->dmi_addr, ETH_ALEN); -#endif -#ifdef CONFIG_IGB_VMDQ_NETDEV - for (vm = 1; vm < adapter->vmdq_pools; vm++) { - if (!adapter->vmdq_netdev[vm]) - break; - if (!netif_running(adapter->vmdq_netdev[vm]) || - !netdev_mc_count(adapter->vmdq_netdev[vm])) - continue; - netdev_for_each_mc_addr(ha, adapter->vmdq_netdev[vm]) -#ifdef NETDEV_HW_ADDR_T_MULTICAST - memcpy(mta_list + (i++ * ETH_ALEN), - ha->addr, ETH_ALEN); -#else - memcpy(mta_list + (i++ * ETH_ALEN), - ha->dmi_addr, ETH_ALEN); -#endif - } -#endif - e1000_update_mc_addr_list(hw, mta_list, i); - kfree(mta_list); - - return count; -} - -void igb_rar_set(struct igb_adapter *adapter, u32 index) -{ - u32 rar_low, rar_high; - struct e1000_hw *hw = &adapter->hw; - u8 *addr = adapter->mac_table[index].addr; - /* HW expects these in little endian so we reverse the byte order - * from network order (big endian) to little endian - */ - rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | - ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); - rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); - - /* Indicate to hardware the Address is Valid. */ - if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE) - rar_high |= E1000_RAH_AV; - - if (hw->mac.type == e1000_82575) - rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue; - else - rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue; - - E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); - E1000_WRITE_FLUSH(hw); - E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); - E1000_WRITE_FLUSH(hw); -} - -void igb_full_sync_mac_table(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - int i; - for (i = 0; i < hw->mac.rar_entry_count; i++) { - igb_rar_set(adapter, i); - } -} - -void igb_sync_mac_table(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - int i; - for (i = 0; i < hw->mac.rar_entry_count; i++) { - if (adapter->mac_table[i].state & IGB_MAC_STATE_MODIFIED) - igb_rar_set(adapter, i); - adapter->mac_table[i].state &= ~(IGB_MAC_STATE_MODIFIED); - } -} - -int igb_available_rars(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - int i, count = 0; - - for (i = 0; i < hw->mac.rar_entry_count; i++) { - if (adapter->mac_table[i].state == 0) - count++; - } - return count; -} - -#ifdef HAVE_SET_RX_MODE -/** - * igb_write_uc_addr_list - write unicast addresses to RAR table - * @netdev: network interface device structure - * - * Writes unicast address list to the RAR table. - * Returns: -ENOMEM on failure/insufficient address space - * 0 on no addresses written - * X on writing X addresses to the RAR table - **/ -static int igb_write_uc_addr_list(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - unsigned int vfn = adapter->vfs_allocated_count; - int count = 0; - - /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > igb_available_rars(adapter)) - return -ENOMEM; - if (!netdev_uc_empty(netdev)) { -#ifdef NETDEV_HW_ADDR_T_UNICAST - struct netdev_hw_addr *ha; -#else - struct dev_mc_list *ha; -#endif - netdev_for_each_uc_addr(ha, netdev) { -#ifdef NETDEV_HW_ADDR_T_UNICAST - igb_del_mac_filter(adapter, ha->addr, vfn); - igb_add_mac_filter(adapter, ha->addr, vfn); -#else - igb_del_mac_filter(adapter, ha->da_addr, vfn); - igb_add_mac_filter(adapter, ha->da_addr, vfn); -#endif - count++; - } - } - return count; -} - -#endif /* HAVE_SET_RX_MODE */ -/** - * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set - * @netdev: network interface device structure - * - * The set_rx_mode entry point is called whenever the unicast or multicast - * address lists or the network interface flags are updated. This routine is - * responsible for configuring the hardware for proper unicast, multicast, - * promiscuous mode, and all-multi behavior. - **/ -static void igb_set_rx_mode(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - unsigned int vfn = adapter->vfs_allocated_count; - u32 rctl, vmolr = 0; - int count; - - /* Check for Promiscuous and All Multicast modes */ - rctl = E1000_READ_REG(hw, E1000_RCTL); - - /* clear the effected bits */ - rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); - - if (netdev->flags & IFF_PROMISC) { - rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); - vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); - /* retain VLAN HW filtering if in VT mode */ - if (adapter->vfs_allocated_count || adapter->vmdq_pools) - rctl |= E1000_RCTL_VFE; - } else { - if (netdev->flags & IFF_ALLMULTI) { - rctl |= E1000_RCTL_MPE; - vmolr |= E1000_VMOLR_MPME; - } else { - /* - * Write addresses to the MTA, if the attempt fails - * then we should just turn on promiscuous mode so - * that we can at least receive multicast traffic - */ - count = igb_write_mc_addr_list(netdev); - if (count < 0) { - rctl |= E1000_RCTL_MPE; - vmolr |= E1000_VMOLR_MPME; - } else if (count) { - vmolr |= E1000_VMOLR_ROMPE; - } - } -#ifdef HAVE_SET_RX_MODE - /* - * Write addresses to available RAR registers, if there is not - * sufficient space to store all the addresses then enable - * unicast promiscuous mode - */ - count = igb_write_uc_addr_list(netdev); - if (count < 0) { - rctl |= E1000_RCTL_UPE; - vmolr |= E1000_VMOLR_ROPE; - } -#endif /* HAVE_SET_RX_MODE */ - rctl |= E1000_RCTL_VFE; - } - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - - /* - * In order to support SR-IOV and eventually VMDq it is necessary to set - * the VMOLR to enable the appropriate modes. Without this workaround - * we will have issues with VLAN tag stripping not being done for frames - * that are only arriving because we are the default pool - */ - if (hw->mac.type < e1000_82576) - return; - - vmolr |= E1000_READ_REG(hw, E1000_VMOLR(vfn)) & - ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); - E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr); - igb_restore_vf_multicasts(adapter); -} - -static void igb_check_wvbr(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 wvbr = 0; - - switch (hw->mac.type) { - case e1000_82576: - case e1000_i350: - if (!(wvbr = E1000_READ_REG(hw, E1000_WVBR))) - return; - break; - default: - break; - } - - adapter->wvbr |= wvbr; -} - -#define IGB_STAGGERED_QUEUE_OFFSET 8 - -static void igb_spoof_check(struct igb_adapter *adapter) -{ - int j; - - if (!adapter->wvbr) - return; - - switch (adapter->hw.mac.type) { - case e1000_82576: - for (j = 0; j < adapter->vfs_allocated_count; j++) { - if (adapter->wvbr & (1 << j) || - adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) { - DPRINTK(DRV, WARNING, - "Spoof event(s) detected on VF %d\n", j); - adapter->wvbr &= - ~((1 << j) | - (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))); - } - } - break; - case e1000_i350: - for (j = 0; j < adapter->vfs_allocated_count; j++) { - if (adapter->wvbr & (1 << j)) { - DPRINTK(DRV, WARNING, - "Spoof event(s) detected on VF %d\n", j); - adapter->wvbr &= ~(1 << j); - } - } - break; - default: - break; - } -} - -/* Need to wait a few seconds after link up to get diagnostic information from - * the phy */ -#ifdef HAVE_TIMER_SETUP -static void igb_update_phy_info(struct timer_list *t) -{ - struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer); -#else -static void igb_update_phy_info(unsigned long data) -{ - struct igb_adapter *adapter = (struct igb_adapter *) data; -#endif - e1000_get_phy_info(&adapter->hw); -} - -/** - * igb_has_link - check shared code for link and determine up/down - * @adapter: pointer to driver private info - **/ -bool igb_has_link(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - bool link_active = FALSE; - - /* get_link_status is set on LSC (link status) interrupt or - * rx sequence error interrupt. get_link_status will stay - * false until the e1000_check_for_link establishes link - * for copper adapters ONLY - */ - switch (hw->phy.media_type) { - case e1000_media_type_copper: - if (!hw->mac.get_link_status) - return true; - case e1000_media_type_internal_serdes: - e1000_check_for_link(hw); - link_active = !hw->mac.get_link_status; - break; - case e1000_media_type_unknown: - default: - break; - } - - if (((hw->mac.type == e1000_i210) || - (hw->mac.type == e1000_i211)) && - (hw->phy.id == I210_I_PHY_ID)) { - if (!netif_carrier_ok(adapter->netdev)) { - adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; - } else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) { - adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE; - adapter->link_check_timeout = jiffies; - } - } - - return link_active; -} - -/** - * igb_watchdog - Timer Call-back - * @data: pointer to adapter cast into an unsigned long - **/ -#ifdef HAVE_TIMER_SETUP -static void igb_watchdog(struct timer_list *t) -{ - struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer); -#else -static void igb_watchdog(unsigned long data) -{ - struct igb_adapter *adapter = (struct igb_adapter *)data; -#endif - /* Do the rest outside of interrupt context */ - schedule_work(&adapter->watchdog_task); -} - -static void igb_watchdog_task(struct work_struct *work) -{ - struct igb_adapter *adapter = container_of(work, - struct igb_adapter, - watchdog_task); - struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - u32 link; - int i; - u32 thstat, ctrl_ext; - u32 connsw; - - link = igb_has_link(adapter); - /* Force link down if we have fiber to swap to */ - if (adapter->flags & IGB_FLAG_MAS_ENABLE) { - if (hw->phy.media_type == e1000_media_type_copper) { - connsw = E1000_READ_REG(hw, E1000_CONNSW); - if (!(connsw & E1000_CONNSW_AUTOSENSE_EN)) - link = 0; - } - } - - if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) { - if (time_after(jiffies, (adapter->link_check_timeout + HZ))) - adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; - else - link = FALSE; - } - - if (link) { - /* Perform a reset if the media type changed. */ - if (hw->dev_spec._82575.media_changed) { - hw->dev_spec._82575.media_changed = false; - adapter->flags |= IGB_FLAG_MEDIA_RESET; - igb_reset(adapter); - } - - /* Cancel scheduled suspend requests. */ - pm_runtime_resume(netdev->dev.parent); - - if (!netif_carrier_ok(netdev)) { - u32 ctrl; - e1000_get_speed_and_duplex(hw, - &adapter->link_speed, - &adapter->link_duplex); - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - /* Links status message must follow this format */ - printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, " - "Flow Control: %s\n", - netdev->name, - adapter->link_speed, - adapter->link_duplex == FULL_DUPLEX ? - "Full Duplex" : "Half Duplex", - ((ctrl & E1000_CTRL_TFCE) && - (ctrl & E1000_CTRL_RFCE)) ? "RX/TX": - ((ctrl & E1000_CTRL_RFCE) ? "RX" : - ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None"))); - /* adjust timeout factor according to speed/duplex */ - adapter->tx_timeout_factor = 1; - switch (adapter->link_speed) { - case SPEED_10: - adapter->tx_timeout_factor = 14; - break; - case SPEED_100: - /* maybe add some timeout factor ? */ - break; - default: - break; - } - - netif_carrier_on(netdev); - netif_tx_wake_all_queues(netdev); - - igb_ping_all_vfs(adapter); -#ifdef IFLA_VF_MAX - igb_check_vf_rate_limit(adapter); -#endif /* IFLA_VF_MAX */ - - /* link state has changed, schedule phy info update */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->phy_info_timer, - round_jiffies(jiffies + 2 * HZ)); - } - } else { - if (netif_carrier_ok(netdev)) { - adapter->link_speed = 0; - adapter->link_duplex = 0; - /* check for thermal sensor event on i350 */ - if (hw->mac.type == e1000_i350) { - thstat = E1000_READ_REG(hw, E1000_THSTAT); - ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); - if ((hw->phy.media_type == - e1000_media_type_copper) && - !(ctrl_ext & - E1000_CTRL_EXT_LINK_MODE_SGMII)) { - if (thstat & E1000_THSTAT_PWR_DOWN) { - printk(KERN_ERR "igb: %s The " - "network adapter was stopped " - "because it overheated.\n", - netdev->name); - } - if (thstat & E1000_THSTAT_LINK_THROTTLE) { - printk(KERN_INFO - "igb: %s The network " - "adapter supported " - "link speed " - "was downshifted " - "because it " - "overheated.\n", - netdev->name); - } - } - } - - /* Links status message must follow this format */ - printk(KERN_INFO "igb: %s NIC Link is Down\n", - netdev->name); - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); - - igb_ping_all_vfs(adapter); - - /* link state has changed, schedule phy info update */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->phy_info_timer, - round_jiffies(jiffies + 2 * HZ)); - /* link is down, time to check for alternate media */ - if (adapter->flags & IGB_FLAG_MAS_ENABLE) { - igb_check_swap_media(adapter); - if (adapter->flags & IGB_FLAG_MEDIA_RESET) { - schedule_work(&adapter->reset_task); - /* return immediately */ - return; - } - } - pm_schedule_suspend(netdev->dev.parent, - MSEC_PER_SEC * 5); - - /* also check for alternate media here */ - } else if (!netif_carrier_ok(netdev) && - (adapter->flags & IGB_FLAG_MAS_ENABLE)) { - hw->mac.ops.power_up_serdes(hw); - igb_check_swap_media(adapter); - if (adapter->flags & IGB_FLAG_MEDIA_RESET) { - schedule_work(&adapter->reset_task); - /* return immediately */ - return; - } - } - } - - igb_update_stats(adapter); - - for (i = 0; i < adapter->num_tx_queues; i++) { - struct igb_ring *tx_ring = adapter->tx_ring[i]; - if (!netif_carrier_ok(netdev)) { - /* We've lost link, so the controller stops DMA, - * but we've got queued Tx work that's never going - * to get done, so reset controller to flush Tx. - * (Do the reset outside of interrupt context). */ - if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { - adapter->tx_timeout_count++; - schedule_work(&adapter->reset_task); - /* return immediately since reset is imminent */ - return; - } - } - - /* Force detection of hung controller every watchdog period */ - set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); - } - - /* Cause software interrupt to ensure rx ring is cleaned */ - if (adapter->msix_entries) { - u32 eics = 0; - for (i = 0; i < adapter->num_q_vectors; i++) - eics |= adapter->q_vector[i]->eims_value; - E1000_WRITE_REG(hw, E1000_EICS, eics); - } else { - E1000_WRITE_REG(hw, E1000_ICS, E1000_ICS_RXDMT0); - } - - igb_spoof_check(adapter); - - /* Reset the timer */ - if (!test_bit(__IGB_DOWN, &adapter->state)) { - if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + HZ)); - else - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 2 * HZ)); - } -} - -static void igb_dma_err_task(struct work_struct *work) -{ - struct igb_adapter *adapter = container_of(work, - struct igb_adapter, - dma_err_task); - int vf; - struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - u32 hgptc; - u32 ciaa, ciad; - - hgptc = E1000_READ_REG(hw, E1000_HGPTC); - if (hgptc) /* If incrementing then no need for the check below */ - goto dma_timer_reset; - /* - * Check to see if a bad DMA write target from an errant or - * malicious VF has caused a PCIe error. If so then we can - * issue a VFLR to the offending VF(s) and then resume without - * requesting a full slot reset. - */ - - for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { - ciaa = (vf << 16) | 0x80000000; - /* 32 bit read so align, we really want status at offset 6 */ - ciaa |= PCI_COMMAND; - E1000_WRITE_REG(hw, E1000_CIAA, ciaa); - ciad = E1000_READ_REG(hw, E1000_CIAD); - ciaa &= 0x7FFFFFFF; - /* disable debug mode asap after reading data */ - E1000_WRITE_REG(hw, E1000_CIAA, ciaa); - /* Get the upper 16 bits which will be the PCI status reg */ - ciad >>= 16; - if (ciad & (PCI_STATUS_REC_MASTER_ABORT | - PCI_STATUS_REC_TARGET_ABORT | - PCI_STATUS_SIG_SYSTEM_ERROR)) { - netdev_err(netdev, "VF %d suffered error\n", vf); - /* Issue VFLR */ - ciaa = (vf << 16) | 0x80000000; - ciaa |= 0xA8; - E1000_WRITE_REG(hw, E1000_CIAA, ciaa); - ciad = 0x00008000; /* VFLR */ - E1000_WRITE_REG(hw, E1000_CIAD, ciad); - ciaa &= 0x7FFFFFFF; - E1000_WRITE_REG(hw, E1000_CIAA, ciaa); - } - } -dma_timer_reset: - /* Reset the timer */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->dma_err_timer, - round_jiffies(jiffies + HZ / 10)); -} - -/** - * igb_dma_err_timer - Timer Call-back - * @data: pointer to adapter cast into an unsigned long - **/ -#ifdef HAVE_TIMER_SETUP -static void igb_dma_err_timer(struct timer_list *t) -{ - struct igb_adapter *adapter = from_timer(adapter, t, dma_err_timer); -#else -static void igb_dma_err_timer(unsigned long data) -{ - struct igb_adapter *adapter = (struct igb_adapter *)data; -#endif - /* Do the rest outside of interrupt context */ - schedule_work(&adapter->dma_err_task); -} - -enum latency_range { - lowest_latency = 0, - low_latency = 1, - bulk_latency = 2, - latency_invalid = 255 -}; - -/** - * igb_update_ring_itr - update the dynamic ITR value based on packet size - * - * Stores a new ITR value based on strictly on packet size. This - * algorithm is less sophisticated than that used in igb_update_itr, - * due to the difficulty of synchronizing statistics across multiple - * receive rings. The divisors and thresholds used by this function - * were determined based on theoretical maximum wire speed and testing - * data, in order to minimize response time while increasing bulk - * throughput. - * This functionality is controlled by the InterruptThrottleRate module - * parameter (see igb_param.c) - * NOTE: This function is called only when operating in a multiqueue - * receive environment. - * @q_vector: pointer to q_vector - **/ -static void igb_update_ring_itr(struct igb_q_vector *q_vector) -{ - int new_val = q_vector->itr_val; - int avg_wire_size = 0; - struct igb_adapter *adapter = q_vector->adapter; - unsigned int packets; - - /* For non-gigabit speeds, just fix the interrupt rate at 4000 - * ints/sec - ITR timer value of 120 ticks. - */ - switch (adapter->link_speed) { - case SPEED_10: - case SPEED_100: - new_val = IGB_4K_ITR; - goto set_itr_val; - default: - break; - } - - packets = q_vector->rx.total_packets; - if (packets) - avg_wire_size = q_vector->rx.total_bytes / packets; - - packets = q_vector->tx.total_packets; - if (packets) - avg_wire_size = max_t(u32, avg_wire_size, - q_vector->tx.total_bytes / packets); - - /* if avg_wire_size isn't set no work was done */ - if (!avg_wire_size) - goto clear_counts; - - /* Add 24 bytes to size to account for CRC, preamble, and gap */ - avg_wire_size += 24; - - /* Don't starve jumbo frames */ - avg_wire_size = min(avg_wire_size, 3000); - - /* Give a little boost to mid-size frames */ - if ((avg_wire_size > 300) && (avg_wire_size < 1200)) - new_val = avg_wire_size / 3; - else - new_val = avg_wire_size / 2; - - /* conservative mode (itr 3) eliminates the lowest_latency setting */ - if (new_val < IGB_20K_ITR && - ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || - (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) - new_val = IGB_20K_ITR; - -set_itr_val: - if (new_val != q_vector->itr_val) { - q_vector->itr_val = new_val; - q_vector->set_itr = 1; - } -clear_counts: - q_vector->rx.total_bytes = 0; - q_vector->rx.total_packets = 0; - q_vector->tx.total_bytes = 0; - q_vector->tx.total_packets = 0; -} - -/** - * igb_update_itr - update the dynamic ITR value based on statistics - * Stores a new ITR value based on packets and byte - * counts during the last interrupt. The advantage of per interrupt - * computation is faster updates and more accurate ITR for the current - * traffic pattern. Constants in this function were computed - * based on theoretical maximum wire speed and thresholds were set based - * on testing data as well as attempting to minimize response time - * while increasing bulk throughput. - * this functionality is controlled by the InterruptThrottleRate module - * parameter (see igb_param.c) - * NOTE: These calculations are only valid when operating in a single- - * queue environment. - * @q_vector: pointer to q_vector - * @ring_container: ring info to update the itr for - **/ -static void igb_update_itr(struct igb_q_vector *q_vector, - struct igb_ring_container *ring_container) -{ - unsigned int packets = ring_container->total_packets; - unsigned int bytes = ring_container->total_bytes; - u8 itrval = ring_container->itr; - - /* no packets, exit with status unchanged */ - if (packets == 0) - return; - - switch (itrval) { - case lowest_latency: - /* handle TSO and jumbo frames */ - if (bytes/packets > 8000) - itrval = bulk_latency; - else if ((packets < 5) && (bytes > 512)) - itrval = low_latency; - break; - case low_latency: /* 50 usec aka 20000 ints/s */ - if (bytes > 10000) { - /* this if handles the TSO accounting */ - if (bytes/packets > 8000) { - itrval = bulk_latency; - } else if ((packets < 10) || ((bytes/packets) > 1200)) { - itrval = bulk_latency; - } else if (packets > 35) { - itrval = lowest_latency; - } - } else if (bytes/packets > 2000) { - itrval = bulk_latency; - } else if (packets <= 2 && bytes < 512) { - itrval = lowest_latency; - } - break; - case bulk_latency: /* 250 usec aka 4000 ints/s */ - if (bytes > 25000) { - if (packets > 35) - itrval = low_latency; - } else if (bytes < 1500) { - itrval = low_latency; - } - break; - } - - /* clear work counters since we have the values we need */ - ring_container->total_bytes = 0; - ring_container->total_packets = 0; - - /* write updated itr to ring container */ - ring_container->itr = itrval; -} - -static void igb_set_itr(struct igb_q_vector *q_vector) -{ - struct igb_adapter *adapter = q_vector->adapter; - u32 new_itr = q_vector->itr_val; - u8 current_itr = 0; - - /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ - switch (adapter->link_speed) { - case SPEED_10: - case SPEED_100: - current_itr = 0; - new_itr = IGB_4K_ITR; - goto set_itr_now; - default: - break; - } - - igb_update_itr(q_vector, &q_vector->tx); - igb_update_itr(q_vector, &q_vector->rx); - - current_itr = max(q_vector->rx.itr, q_vector->tx.itr); - - /* conservative mode (itr 3) eliminates the lowest_latency setting */ - if (current_itr == lowest_latency && - ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || - (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) - current_itr = low_latency; - - switch (current_itr) { - /* counts and packets in update_itr are dependent on these numbers */ - case lowest_latency: - new_itr = IGB_70K_ITR; /* 70,000 ints/sec */ - break; - case low_latency: - new_itr = IGB_20K_ITR; /* 20,000 ints/sec */ - break; - case bulk_latency: - new_itr = IGB_4K_ITR; /* 4,000 ints/sec */ - break; - default: - break; - } - -set_itr_now: - if (new_itr != q_vector->itr_val) { - /* this attempts to bias the interrupt rate towards Bulk - * by adding intermediate steps when interrupt rate is - * increasing */ - new_itr = new_itr > q_vector->itr_val ? - max((new_itr * q_vector->itr_val) / - (new_itr + (q_vector->itr_val >> 2)), - new_itr) : - new_itr; - /* Don't write the value here; it resets the adapter's - * internal timer, and causes us to delay far longer than - * we should between interrupts. Instead, we write the ITR - * value at the beginning of the next interrupt so the timing - * ends up being correct. - */ - q_vector->itr_val = new_itr; - q_vector->set_itr = 1; - } -} - -void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, - u32 type_tucmd, u32 mss_l4len_idx) -{ - struct e1000_adv_tx_context_desc *context_desc; - u16 i = tx_ring->next_to_use; - - context_desc = IGB_TX_CTXTDESC(tx_ring, i); - - i++; - tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; - - /* set bits to identify this as an advanced context descriptor */ - type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; - - /* For 82575, context index must be unique per ring. */ - if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) - mss_l4len_idx |= tx_ring->reg_idx << 4; - - context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); - context_desc->seqnum_seed = 0; - context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); - context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); -} - -static int igb_tso(struct igb_ring *tx_ring, - struct igb_tx_buffer *first, - u8 *hdr_len) -{ -#ifdef NETIF_F_TSO - struct sk_buff *skb = first->skb; - u32 vlan_macip_lens, type_tucmd; - u32 mss_l4len_idx, l4len; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - return 0; - - if (!skb_is_gso(skb)) -#endif /* NETIF_F_TSO */ - return 0; -#ifdef NETIF_F_TSO - - if (skb_header_cloned(skb)) { - int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (err) - return err; - } - - /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ - type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; - - if (first->protocol == __constant_htons(ETH_P_IP)) { - struct iphdr *iph = ip_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, - iph->daddr, 0, - IPPROTO_TCP, - 0); - type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; - first->tx_flags |= IGB_TX_FLAGS_TSO | - IGB_TX_FLAGS_CSUM | - IGB_TX_FLAGS_IPV4; -#ifdef NETIF_F_TSO6 - } else if (skb_is_gso_v6(skb)) { - ipv6_hdr(skb)->payload_len = 0; - tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - 0, IPPROTO_TCP, 0); - first->tx_flags |= IGB_TX_FLAGS_TSO | - IGB_TX_FLAGS_CSUM; -#endif - } - - /* compute header lengths */ - l4len = tcp_hdrlen(skb); - *hdr_len = skb_transport_offset(skb) + l4len; - - /* update gso size and bytecount with header size */ - first->gso_segs = skb_shinfo(skb)->gso_segs; - first->bytecount += (first->gso_segs - 1) * *hdr_len; - - /* MSS L4LEN IDX */ - mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT; - mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; - - /* VLAN MACLEN IPLEN */ - vlan_macip_lens = skb_network_header_len(skb); - vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; - - igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); - - return 1; -#endif /* NETIF_F_TSO */ -} - -static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) -{ - struct sk_buff *skb = first->skb; - u32 vlan_macip_lens = 0; - u32 mss_l4len_idx = 0; - u32 type_tucmd = 0; - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) - return; - } else { - u8 nexthdr = 0; - switch (first->protocol) { - case __constant_htons(ETH_P_IP): - vlan_macip_lens |= skb_network_header_len(skb); - type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; - nexthdr = ip_hdr(skb)->protocol; - break; -#ifdef NETIF_F_IPV6_CSUM - case __constant_htons(ETH_P_IPV6): - vlan_macip_lens |= skb_network_header_len(skb); - nexthdr = ipv6_hdr(skb)->nexthdr; - break; -#endif - default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); - } - break; - } - - switch (nexthdr) { - case IPPROTO_TCP: - type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - mss_l4len_idx = tcp_hdrlen(skb) << - E1000_ADVTXD_L4LEN_SHIFT; - break; -#ifdef HAVE_SCTP - case IPPROTO_SCTP: - type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; - mss_l4len_idx = sizeof(struct sctphdr) << - E1000_ADVTXD_L4LEN_SHIFT; - break; -#endif - case IPPROTO_UDP: - mss_l4len_idx = sizeof(struct udphdr) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - nexthdr); - } - break; - } - - /* update TX checksum flag */ - first->tx_flags |= IGB_TX_FLAGS_CSUM; - } - - vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; - - igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); -} - -#define IGB_SET_FLAG(_input, _flag, _result) \ - ((_flag <= _result) ? \ - ((u32)(_input & _flag) * (_result / _flag)) : \ - ((u32)(_input & _flag) / (_flag / _result))) - -static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) -{ - /* set type for advanced descriptor with frame checksum insertion */ - u32 cmd_type = E1000_ADVTXD_DTYP_DATA | - E1000_ADVTXD_DCMD_DEXT | - E1000_ADVTXD_DCMD_IFCS; - - /* set HW vlan bit if vlan is present */ - cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN, - (E1000_ADVTXD_DCMD_VLE)); - - /* set segmentation bits for TSO */ - cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO, - (E1000_ADVTXD_DCMD_TSE)); - - /* set timestamp bit if present */ - cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP, - (E1000_ADVTXD_MAC_TSTAMP)); - - return cmd_type; -} - -static void igb_tx_olinfo_status(struct igb_ring *tx_ring, - union e1000_adv_tx_desc *tx_desc, - u32 tx_flags, unsigned int paylen) -{ - u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; - - /* 82575 requires a unique index per ring */ - if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) - olinfo_status |= tx_ring->reg_idx << 4; - - /* insert L4 checksum */ - olinfo_status |= IGB_SET_FLAG(tx_flags, - IGB_TX_FLAGS_CSUM, - (E1000_TXD_POPTS_TXSM << 8)); - - /* insert IPv4 checksum */ - olinfo_status |= IGB_SET_FLAG(tx_flags, - IGB_TX_FLAGS_IPV4, - (E1000_TXD_POPTS_IXSM << 8)); - - tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); -} - -static void igb_tx_map(struct igb_ring *tx_ring, - struct igb_tx_buffer *first, - const u8 hdr_len) -{ - struct sk_buff *skb = first->skb; - struct igb_tx_buffer *tx_buffer; - union e1000_adv_tx_desc *tx_desc; - struct skb_frag_struct *frag; - dma_addr_t dma; - unsigned int data_len, size; - u32 tx_flags = first->tx_flags; - u32 cmd_type = igb_tx_cmd_type(skb, tx_flags); - u16 i = tx_ring->next_to_use; - - tx_desc = IGB_TX_DESC(tx_ring, i); - - igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); - - size = skb_headlen(skb); - data_len = skb->data_len; - - dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); - - tx_buffer = first; - - for (frag = &skb_shinfo(skb)->frags[0];; frag++) { - if (dma_mapping_error(tx_ring->dev, dma)) - goto dma_error; - - /* record length, and DMA address */ - dma_unmap_len_set(tx_buffer, len, size); - dma_unmap_addr_set(tx_buffer, dma, dma); - - tx_desc->read.buffer_addr = cpu_to_le64(dma); - - while (unlikely(size > IGB_MAX_DATA_PER_TXD)) { - tx_desc->read.cmd_type_len = - cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD); - - i++; - tx_desc++; - if (i == tx_ring->count) { - tx_desc = IGB_TX_DESC(tx_ring, 0); - i = 0; - } - tx_desc->read.olinfo_status = 0; - - dma += IGB_MAX_DATA_PER_TXD; - size -= IGB_MAX_DATA_PER_TXD; - - tx_desc->read.buffer_addr = cpu_to_le64(dma); - } - - if (likely(!data_len)) - break; - - tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); - - i++; - tx_desc++; - if (i == tx_ring->count) { - tx_desc = IGB_TX_DESC(tx_ring, 0); - i = 0; - } - tx_desc->read.olinfo_status = 0; - - size = skb_frag_size(frag); - data_len -= size; - - dma = skb_frag_dma_map(tx_ring->dev, frag, 0, - size, DMA_TO_DEVICE); - - tx_buffer = &tx_ring->tx_buffer_info[i]; - } - - /* write last descriptor with RS and EOP bits */ - cmd_type |= size | IGB_TXD_DCMD; - tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); - - netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); - /* set the timestamp */ - first->time_stamp = jiffies; - - /* - * Force memory writes to complete before letting h/w know there - * are new descriptors to fetch. (Only applicable for weak-ordered - * memory model archs, such as IA-64). - * - * We also need this memory barrier to make certain all of the - * status bits have been updated before next_to_watch is written. - */ - wmb(); - - /* set next_to_watch value indicating a packet is present */ - first->next_to_watch = tx_desc; - - i++; - if (i == tx_ring->count) - i = 0; - - tx_ring->next_to_use = i; - - writel(i, tx_ring->tail); - - /* we need this if more than one processor can write to our tail - * at a time, it syncronizes IO on IA64/Altix systems */ - mmiowb(); - - return; - -dma_error: - dev_err(tx_ring->dev, "TX DMA map failed\n"); - - /* clear dma mappings for failed tx_buffer_info map */ - for (;;) { - tx_buffer = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); - if (tx_buffer == first) - break; - if (i == 0) - i = tx_ring->count; - i--; - } - - tx_ring->next_to_use = i; -} - -static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) -{ - struct net_device *netdev = netdev_ring(tx_ring); - - if (netif_is_multiqueue(netdev)) - netif_stop_subqueue(netdev, ring_queue_index(tx_ring)); - else - netif_stop_queue(netdev); - - /* Herbert's original patch had: - * smp_mb__after_netif_stop_queue(); - * but since that doesn't exist yet, just open code it. */ - smp_mb(); - - /* We need to check again in a case another CPU has just - * made room available. */ - if (igb_desc_unused(tx_ring) < size) - return -EBUSY; - - /* A reprieve! */ - if (netif_is_multiqueue(netdev)) - netif_wake_subqueue(netdev, ring_queue_index(tx_ring)); - else - netif_wake_queue(netdev); - - tx_ring->tx_stats.restart_queue++; - - return 0; -} - -static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) -{ - if (igb_desc_unused(tx_ring) >= size) - return 0; - return __igb_maybe_stop_tx(tx_ring, size); -} - -netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, - struct igb_ring *tx_ring) -{ - struct igb_tx_buffer *first; - int tso; - u32 tx_flags = 0; -#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD - unsigned short f; -#endif - u16 count = TXD_USE_COUNT(skb_headlen(skb)); - __be16 protocol = vlan_get_protocol(skb); - u8 hdr_len = 0; - - /* - * need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD, - * + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD, - * + 2 desc gap to keep tail from touching head, - * + 1 desc for context descriptor, - * otherwise try next time - */ -#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); -#else - count += skb_shinfo(skb)->nr_frags; -#endif - if (igb_maybe_stop_tx(tx_ring, count + 3)) { - /* this is a hard error */ - return NETDEV_TX_BUSY; - } - - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; - first->skb = skb; - first->bytecount = skb->len; - first->gso_segs = 1; - - skb_tx_timestamp(skb); - -#ifdef HAVE_PTP_1588_CLOCK - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); - if (!adapter->ptp_tx_skb) { - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - tx_flags |= IGB_TX_FLAGS_TSTAMP; - - adapter->ptp_tx_skb = skb_get(skb); - adapter->ptp_tx_start = jiffies; - if (adapter->hw.mac.type == e1000_82576) - schedule_work(&adapter->ptp_tx_work); - } - } -#endif /* HAVE_PTP_1588_CLOCK */ - - if (vlan_tx_tag_present(skb)) { - tx_flags |= IGB_TX_FLAGS_VLAN; - tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); - } - - /* record initial flags and protocol */ - first->tx_flags = tx_flags; - first->protocol = protocol; - - tso = igb_tso(tx_ring, first, &hdr_len); - if (tso < 0) - goto out_drop; - else if (!tso) - igb_tx_csum(tx_ring, first); - - igb_tx_map(tx_ring, first, hdr_len); - -#ifndef HAVE_TRANS_START_IN_QUEUE - netdev_ring(tx_ring)->trans_start = jiffies; - -#endif - /* Make sure there is space in the ring for the next send. */ - igb_maybe_stop_tx(tx_ring, DESC_NEEDED); - - return NETDEV_TX_OK; - -out_drop: - igb_unmap_and_free_tx_resource(tx_ring, first); - - return NETDEV_TX_OK; -} - -#ifdef HAVE_TX_MQ -static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, - struct sk_buff *skb) -{ - unsigned int r_idx = skb->queue_mapping; - - if (r_idx >= adapter->num_tx_queues) - r_idx = r_idx % adapter->num_tx_queues; - - return adapter->tx_ring[r_idx]; -} -#else -#define igb_tx_queue_mapping(_adapter, _skb) (_adapter)->tx_ring[0] -#endif - -static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, - struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - - if (skb->len <= 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - - /* - * The minimum packet size with TCTL.PSP set is 17 so pad the skb - * in order to meet this minimum size requirement. - */ - if (skb->len < 17) { - if (skb_padto(skb, 17)) - return NETDEV_TX_OK; - skb->len = 17; - } - - return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); -} - -/** - * igb_tx_timeout - Respond to a Tx Hang - * @netdev: network interface device structure - **/ -static void igb_tx_timeout(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - - /* Do the reset outside of interrupt context */ - adapter->tx_timeout_count++; - - if (hw->mac.type >= e1000_82580) - hw->dev_spec._82575.global_device_reset = true; - - schedule_work(&adapter->reset_task); - E1000_WRITE_REG(hw, E1000_EICS, - (adapter->eims_enable_mask & ~adapter->eims_other)); -} - -static void igb_reset_task(struct work_struct *work) -{ - struct igb_adapter *adapter; - adapter = container_of(work, struct igb_adapter, reset_task); - - igb_reinit_locked(adapter); -} - -/** - * igb_get_stats - Get System Network Statistics - * @netdev: network interface device structure - * - * Returns the address of the device statistics structure. - * The statistics are updated here and also from the timer callback. - **/ -static struct net_device_stats *igb_get_stats(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - - if (!test_bit(__IGB_RESETTING, &adapter->state)) - igb_update_stats(adapter); - -#ifdef HAVE_NETDEV_STATS_IN_NETDEV - /* only return the current stats */ - return &netdev->stats; -#else - /* only return the current stats */ - return &adapter->net_stats; -#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ -} - -/** - * igb_change_mtu - Change the Maximum Transfer Unit - * @netdev: network interface device structure - * @new_mtu: new value for maximum frame size - * - * Returns 0 on success, negative on failure - **/ -static int igb_change_mtu(struct net_device *netdev, int new_mtu) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct pci_dev *pdev = adapter->pdev; - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - - if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { - dev_err(pci_dev_to_dev(pdev), "Invalid MTU setting\n"); - return -EINVAL; - } - -#define MAX_STD_JUMBO_FRAME_SIZE 9238 - if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { - dev_err(pci_dev_to_dev(pdev), "MTU > 9216 not supported.\n"); - return -EINVAL; - } - - /* adjust max frame to be at least the size of a standard frame */ - if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) - max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; - - while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - - /* igb_down has a dependency on max_frame_size */ - adapter->max_frame_size = max_frame; - - if (netif_running(netdev)) - igb_down(adapter); - - dev_info(pci_dev_to_dev(pdev), "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); - netdev->mtu = new_mtu; - hw->dev_spec._82575.mtu = new_mtu; - - if (netif_running(netdev)) - igb_up(adapter); - else - igb_reset(adapter); - - clear_bit(__IGB_RESETTING, &adapter->state); - - return 0; -} - -/** - * igb_update_stats - Update the board statistics counters - * @adapter: board private structure - **/ - -void igb_update_stats(struct igb_adapter *adapter) -{ -#ifdef HAVE_NETDEV_STATS_IN_NETDEV - struct net_device_stats *net_stats = &adapter->netdev->stats; -#else - struct net_device_stats *net_stats = &adapter->net_stats; -#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ - struct e1000_hw *hw = &adapter->hw; -#ifdef HAVE_PCI_ERS - struct pci_dev *pdev = adapter->pdev; -#endif - u32 reg, mpc; - u16 phy_tmp; - int i; - u64 bytes, packets; -#ifndef IGB_NO_LRO - u32 flushed = 0, coal = 0; - struct igb_q_vector *q_vector; -#endif - -#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF - - /* - * Prevent stats update while adapter is being reset, or if the pci - * connection is down. - */ - if (adapter->link_speed == 0) - return; -#ifdef HAVE_PCI_ERS - if (pci_channel_offline(pdev)) - return; - -#endif -#ifndef IGB_NO_LRO - for (i = 0; i < adapter->num_q_vectors; i++) { - q_vector = adapter->q_vector[i]; - if (!q_vector) - continue; - flushed += q_vector->lrolist.stats.flushed; - coal += q_vector->lrolist.stats.coal; - } - adapter->lro_stats.flushed = flushed; - adapter->lro_stats.coal = coal; - -#endif - bytes = 0; - packets = 0; - for (i = 0; i < adapter->num_rx_queues; i++) { - u32 rqdpc_tmp = E1000_READ_REG(hw, E1000_RQDPC(i)) & 0x0FFF; - struct igb_ring *ring = adapter->rx_ring[i]; - ring->rx_stats.drops += rqdpc_tmp; - net_stats->rx_fifo_errors += rqdpc_tmp; -#ifdef CONFIG_IGB_VMDQ_NETDEV - if (!ring->vmdq_netdev) { - bytes += ring->rx_stats.bytes; - packets += ring->rx_stats.packets; - } -#else - bytes += ring->rx_stats.bytes; - packets += ring->rx_stats.packets; -#endif - } - - net_stats->rx_bytes = bytes; - net_stats->rx_packets = packets; - - bytes = 0; - packets = 0; - for (i = 0; i < adapter->num_tx_queues; i++) { - struct igb_ring *ring = adapter->tx_ring[i]; -#ifdef CONFIG_IGB_VMDQ_NETDEV - if (!ring->vmdq_netdev) { - bytes += ring->tx_stats.bytes; - packets += ring->tx_stats.packets; - } -#else - bytes += ring->tx_stats.bytes; - packets += ring->tx_stats.packets; -#endif - } - net_stats->tx_bytes = bytes; - net_stats->tx_packets = packets; - - /* read stats registers */ - adapter->stats.crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); - adapter->stats.gprc += E1000_READ_REG(hw, E1000_GPRC); - adapter->stats.gorc += E1000_READ_REG(hw, E1000_GORCL); - E1000_READ_REG(hw, E1000_GORCH); /* clear GORCL */ - adapter->stats.bprc += E1000_READ_REG(hw, E1000_BPRC); - adapter->stats.mprc += E1000_READ_REG(hw, E1000_MPRC); - adapter->stats.roc += E1000_READ_REG(hw, E1000_ROC); - - adapter->stats.prc64 += E1000_READ_REG(hw, E1000_PRC64); - adapter->stats.prc127 += E1000_READ_REG(hw, E1000_PRC127); - adapter->stats.prc255 += E1000_READ_REG(hw, E1000_PRC255); - adapter->stats.prc511 += E1000_READ_REG(hw, E1000_PRC511); - adapter->stats.prc1023 += E1000_READ_REG(hw, E1000_PRC1023); - adapter->stats.prc1522 += E1000_READ_REG(hw, E1000_PRC1522); - adapter->stats.symerrs += E1000_READ_REG(hw, E1000_SYMERRS); - adapter->stats.sec += E1000_READ_REG(hw, E1000_SEC); - - mpc = E1000_READ_REG(hw, E1000_MPC); - adapter->stats.mpc += mpc; - net_stats->rx_fifo_errors += mpc; - adapter->stats.scc += E1000_READ_REG(hw, E1000_SCC); - adapter->stats.ecol += E1000_READ_REG(hw, E1000_ECOL); - adapter->stats.mcc += E1000_READ_REG(hw, E1000_MCC); - adapter->stats.latecol += E1000_READ_REG(hw, E1000_LATECOL); - adapter->stats.dc += E1000_READ_REG(hw, E1000_DC); - adapter->stats.rlec += E1000_READ_REG(hw, E1000_RLEC); - adapter->stats.xonrxc += E1000_READ_REG(hw, E1000_XONRXC); - adapter->stats.xontxc += E1000_READ_REG(hw, E1000_XONTXC); - adapter->stats.xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC); - adapter->stats.xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); - adapter->stats.fcruc += E1000_READ_REG(hw, E1000_FCRUC); - adapter->stats.gptc += E1000_READ_REG(hw, E1000_GPTC); - adapter->stats.gotc += E1000_READ_REG(hw, E1000_GOTCL); - E1000_READ_REG(hw, E1000_GOTCH); /* clear GOTCL */ - adapter->stats.rnbc += E1000_READ_REG(hw, E1000_RNBC); - adapter->stats.ruc += E1000_READ_REG(hw, E1000_RUC); - adapter->stats.rfc += E1000_READ_REG(hw, E1000_RFC); - adapter->stats.rjc += E1000_READ_REG(hw, E1000_RJC); - adapter->stats.tor += E1000_READ_REG(hw, E1000_TORH); - adapter->stats.tot += E1000_READ_REG(hw, E1000_TOTH); - adapter->stats.tpr += E1000_READ_REG(hw, E1000_TPR); - - adapter->stats.ptc64 += E1000_READ_REG(hw, E1000_PTC64); - adapter->stats.ptc127 += E1000_READ_REG(hw, E1000_PTC127); - adapter->stats.ptc255 += E1000_READ_REG(hw, E1000_PTC255); - adapter->stats.ptc511 += E1000_READ_REG(hw, E1000_PTC511); - adapter->stats.ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); - adapter->stats.ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); - - adapter->stats.mptc += E1000_READ_REG(hw, E1000_MPTC); - adapter->stats.bptc += E1000_READ_REG(hw, E1000_BPTC); - - adapter->stats.tpt += E1000_READ_REG(hw, E1000_TPT); - adapter->stats.colc += E1000_READ_REG(hw, E1000_COLC); - - adapter->stats.algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); - /* read internal phy sepecific stats */ - reg = E1000_READ_REG(hw, E1000_CTRL_EXT); - if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { - adapter->stats.rxerrc += E1000_READ_REG(hw, E1000_RXERRC); - - /* this stat has invalid values on i210/i211 */ - if ((hw->mac.type != e1000_i210) && - (hw->mac.type != e1000_i211)) - adapter->stats.tncrs += E1000_READ_REG(hw, E1000_TNCRS); - } - adapter->stats.tsctc += E1000_READ_REG(hw, E1000_TSCTC); - adapter->stats.tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); - - adapter->stats.iac += E1000_READ_REG(hw, E1000_IAC); - adapter->stats.icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); - adapter->stats.icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); - adapter->stats.icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); - adapter->stats.ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); - adapter->stats.ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); - adapter->stats.ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); - adapter->stats.ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); - adapter->stats.icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); - - /* Fill out the OS statistics structure */ - net_stats->multicast = adapter->stats.mprc; - net_stats->collisions = adapter->stats.colc; - - /* Rx Errors */ - - /* RLEC on some newer hardware can be incorrect so build - * our own version based on RUC and ROC */ - net_stats->rx_errors = adapter->stats.rxerrc + - adapter->stats.crcerrs + adapter->stats.algnerrc + - adapter->stats.ruc + adapter->stats.roc + - adapter->stats.cexterr; - net_stats->rx_length_errors = adapter->stats.ruc + - adapter->stats.roc; - net_stats->rx_crc_errors = adapter->stats.crcerrs; - net_stats->rx_frame_errors = adapter->stats.algnerrc; - net_stats->rx_missed_errors = adapter->stats.mpc; - - /* Tx Errors */ - net_stats->tx_errors = adapter->stats.ecol + - adapter->stats.latecol; - net_stats->tx_aborted_errors = adapter->stats.ecol; - net_stats->tx_window_errors = adapter->stats.latecol; - net_stats->tx_carrier_errors = adapter->stats.tncrs; - - /* Tx Dropped needs to be maintained elsewhere */ - - /* Phy Stats */ - if (hw->phy.media_type == e1000_media_type_copper) { - if ((adapter->link_speed == SPEED_1000) && - (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { - phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; - adapter->phy_stats.idle_errors += phy_tmp; - } - } - - /* Management Stats */ - adapter->stats.mgptc += E1000_READ_REG(hw, E1000_MGTPTC); - adapter->stats.mgprc += E1000_READ_REG(hw, E1000_MGTPRC); - if (hw->mac.type > e1000_82580) { - adapter->stats.o2bgptc += E1000_READ_REG(hw, E1000_O2BGPTC); - adapter->stats.o2bspc += E1000_READ_REG(hw, E1000_O2BSPC); - adapter->stats.b2ospc += E1000_READ_REG(hw, E1000_B2OSPC); - adapter->stats.b2ogprc += E1000_READ_REG(hw, E1000_B2OGPRC); - } -} - -static irqreturn_t igb_msix_other(int irq, void *data) -{ - struct igb_adapter *adapter = data; - struct e1000_hw *hw = &adapter->hw; - u32 icr = E1000_READ_REG(hw, E1000_ICR); - /* reading ICR causes bit 31 of EICR to be cleared */ - - if (icr & E1000_ICR_DRSTA) - schedule_work(&adapter->reset_task); - - if (icr & E1000_ICR_DOUTSYNC) { - /* HW is reporting DMA is out of sync */ - adapter->stats.doosync++; - /* The DMA Out of Sync is also indication of a spoof event - * in IOV mode. Check the Wrong VM Behavior register to - * see if it is really a spoof event. */ - igb_check_wvbr(adapter); - } - - /* Check for a mailbox event */ - if (icr & E1000_ICR_VMMB) - igb_msg_task(adapter); - - if (icr & E1000_ICR_LSC) { - hw->mac.get_link_status = 1; - /* guard against interrupt when we're going down */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); - } - -#ifdef HAVE_PTP_1588_CLOCK - if (icr & E1000_ICR_TS) { - u32 tsicr = E1000_READ_REG(hw, E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } -#endif /* HAVE_PTP_1588_CLOCK */ - - /* Check for MDD event */ - if (icr & E1000_ICR_MDDET) - igb_process_mdd_event(adapter); - - E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_other); - - return IRQ_HANDLED; -} - -static void igb_write_itr(struct igb_q_vector *q_vector) -{ - struct igb_adapter *adapter = q_vector->adapter; - u32 itr_val = q_vector->itr_val & 0x7FFC; - - if (!q_vector->set_itr) - return; - - if (!itr_val) - itr_val = 0x4; - - if (adapter->hw.mac.type == e1000_82575) - itr_val |= itr_val << 16; - else - itr_val |= E1000_EITR_CNT_IGNR; - - writel(itr_val, q_vector->itr_register); - q_vector->set_itr = 0; -} - -static irqreturn_t igb_msix_ring(int irq, void *data) -{ - struct igb_q_vector *q_vector = data; - - /* Write the ITR value calculated from the previous interrupt. */ - igb_write_itr(q_vector); - - napi_schedule(&q_vector->napi); - - return IRQ_HANDLED; -} - -#ifdef IGB_DCA -static void igb_update_tx_dca(struct igb_adapter *adapter, - struct igb_ring *tx_ring, - int cpu) -{ - struct e1000_hw *hw = &adapter->hw; - u32 txctrl = dca3_get_tag(tx_ring->dev, cpu); - - if (hw->mac.type != e1000_82575) - txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT_82576; - - /* - * We can enable relaxed ordering for reads, but not writes when - * DCA is enabled. This is due to a known issue in some chipsets - * which will cause the DCA tag to be cleared. - */ - txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN | - E1000_DCA_TXCTRL_DATA_RRO_EN | - E1000_DCA_TXCTRL_DESC_DCA_EN; - - E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl); -} - -static void igb_update_rx_dca(struct igb_adapter *adapter, - struct igb_ring *rx_ring, - int cpu) -{ - struct e1000_hw *hw = &adapter->hw; - u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu); - - if (hw->mac.type != e1000_82575) - rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT_82576; - - /* - * We can enable relaxed ordering for reads, but not writes when - * DCA is enabled. This is due to a known issue in some chipsets - * which will cause the DCA tag to be cleared. - */ - rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN | - E1000_DCA_RXCTRL_DESC_DCA_EN; - - E1000_WRITE_REG(hw, E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl); -} - -static void igb_update_dca(struct igb_q_vector *q_vector) -{ - struct igb_adapter *adapter = q_vector->adapter; - int cpu = get_cpu(); - - if (q_vector->cpu == cpu) - goto out_no_update; - - if (q_vector->tx.ring) - igb_update_tx_dca(adapter, q_vector->tx.ring, cpu); - - if (q_vector->rx.ring) - igb_update_rx_dca(adapter, q_vector->rx.ring, cpu); - - q_vector->cpu = cpu; -out_no_update: - put_cpu(); -} - -static void igb_setup_dca(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - int i; - - if (!(adapter->flags & IGB_FLAG_DCA_ENABLED)) - return; - - /* Always use CB2 mode, difference is masked in the CB driver. */ - E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); - - for (i = 0; i < adapter->num_q_vectors; i++) { - adapter->q_vector[i]->cpu = -1; - igb_update_dca(adapter->q_vector[i]); - } -} - -static int __igb_notify_dca(struct device *dev, void *data) -{ - struct net_device *netdev = dev_get_drvdata(dev); - struct igb_adapter *adapter = netdev_priv(netdev); - struct pci_dev *pdev = adapter->pdev; - struct e1000_hw *hw = &adapter->hw; - unsigned long event = *(unsigned long *)data; - - switch (event) { - case DCA_PROVIDER_ADD: - /* if already enabled, don't do it again */ - if (adapter->flags & IGB_FLAG_DCA_ENABLED) - break; - if (dca_add_requester(dev) == E1000_SUCCESS) { - adapter->flags |= IGB_FLAG_DCA_ENABLED; - dev_info(pci_dev_to_dev(pdev), "DCA enabled\n"); - igb_setup_dca(adapter); - break; - } - /* Fall Through since DCA is disabled. */ - case DCA_PROVIDER_REMOVE: - if (adapter->flags & IGB_FLAG_DCA_ENABLED) { - /* without this a class_device is left - * hanging around in the sysfs model */ - dca_remove_requester(dev); - dev_info(pci_dev_to_dev(pdev), "DCA disabled\n"); - adapter->flags &= ~IGB_FLAG_DCA_ENABLED; - E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE); - } - break; - } - - return E1000_SUCCESS; -} - -static int igb_notify_dca(struct notifier_block *nb, unsigned long event, - void *p) -{ - int ret_val; - - ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, - __igb_notify_dca); - - return ret_val ? NOTIFY_BAD : NOTIFY_DONE; -} -#endif /* IGB_DCA */ - -static int igb_vf_configure(struct igb_adapter *adapter, int vf) -{ - unsigned char mac_addr[ETH_ALEN]; - - random_ether_addr(mac_addr); - igb_set_vf_mac(adapter, vf, mac_addr); - -#ifdef IFLA_VF_MAX -#ifdef HAVE_VF_SPOOFCHK_CONFIGURE - /* By default spoof check is enabled for all VFs */ - adapter->vf_data[vf].spoofchk_enabled = true; -#endif -#endif - - return true; -} - -static void igb_ping_all_vfs(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 ping; - int i; - - for (i = 0 ; i < adapter->vfs_allocated_count; i++) { - ping = E1000_PF_CONTROL_MSG; - if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS) - ping |= E1000_VT_MSGTYPE_CTS; - e1000_write_mbx(hw, &ping, 1, i); - } -} - -/** - * igb_mta_set_ - Set multicast filter table address - * @adapter: pointer to the adapter structure - * @hash_value: determines the MTA register and bit to set - * - * The multicast table address is a register array of 32-bit registers. - * The hash_value is used to determine what register the bit is in, the - * current value is read, the new bit is OR'd in and the new value is - * written back into the register. - **/ -void igb_mta_set(struct igb_adapter *adapter, u32 hash_value) -{ - struct e1000_hw *hw = &adapter->hw; - u32 hash_bit, hash_reg, mta; - - /* - * The MTA is a register array of 32-bit registers. It is - * treated like an array of (32*mta_reg_count) bits. We want to - * set bit BitArray[hash_value]. So we figure out what register - * the bit is in, read it, OR in the new bit, then write - * back the new value. The (hw->mac.mta_reg_count - 1) serves as a - * mask to bits 31:5 of the hash value which gives us the - * register we're modifying. The hash bit within that register - * is determined by the lower 5 bits of the hash value. - */ - hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); - hash_bit = hash_value & 0x1F; - - mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg); - - mta |= (1 << hash_bit); - - E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta); - E1000_WRITE_FLUSH(hw); -} - -static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) -{ - - struct e1000_hw *hw = &adapter->hw; - u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vf)); - struct vf_data_storage *vf_data = &adapter->vf_data[vf]; - - vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC | - IGB_VF_FLAG_MULTI_PROMISC); - vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); - -#ifdef IGB_ENABLE_VF_PROMISC - if (*msgbuf & E1000_VF_SET_PROMISC_UNICAST) { - vmolr |= E1000_VMOLR_ROPE; - vf_data->flags |= IGB_VF_FLAG_UNI_PROMISC; - *msgbuf &= ~E1000_VF_SET_PROMISC_UNICAST; - } -#endif - if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) { - vmolr |= E1000_VMOLR_MPME; - vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC; - *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST; - } else { - /* - * if we have hashes and we are clearing a multicast promisc - * flag we need to write the hashes to the MTA as this step - * was previously skipped - */ - if (vf_data->num_vf_mc_hashes > 30) { - vmolr |= E1000_VMOLR_MPME; - } else if (vf_data->num_vf_mc_hashes) { - int j; - vmolr |= E1000_VMOLR_ROMPE; - for (j = 0; j < vf_data->num_vf_mc_hashes; j++) - igb_mta_set(adapter, vf_data->vf_mc_hashes[j]); - } - } - - E1000_WRITE_REG(hw, E1000_VMOLR(vf), vmolr); - - /* there are flags left unprocessed, likely not supported */ - if (*msgbuf & E1000_VT_MSGINFO_MASK) - return -EINVAL; - - return 0; - -} - -static int igb_set_vf_multicasts(struct igb_adapter *adapter, - u32 *msgbuf, u32 vf) -{ - int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; - u16 *hash_list = (u16 *)&msgbuf[1]; - struct vf_data_storage *vf_data = &adapter->vf_data[vf]; - int i; - - /* salt away the number of multicast addresses assigned - * to this VF for later use to restore when the PF multi cast - * list changes - */ - vf_data->num_vf_mc_hashes = n; - - /* only up to 30 hash values supported */ - if (n > 30) - n = 30; - - /* store the hashes for later use */ - for (i = 0; i < n; i++) - vf_data->vf_mc_hashes[i] = hash_list[i]; - - /* Flush and reset the mta with the new values */ - igb_set_rx_mode(adapter->netdev); - - return 0; -} - -static void igb_restore_vf_multicasts(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct vf_data_storage *vf_data; - int i, j; - - for (i = 0; i < adapter->vfs_allocated_count; i++) { - u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i)); - vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); - - vf_data = &adapter->vf_data[i]; - - if ((vf_data->num_vf_mc_hashes > 30) || - (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) { - vmolr |= E1000_VMOLR_MPME; - } else if (vf_data->num_vf_mc_hashes) { - vmolr |= E1000_VMOLR_ROMPE; - for (j = 0; j < vf_data->num_vf_mc_hashes; j++) - igb_mta_set(adapter, vf_data->vf_mc_hashes[j]); - } - E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr); - } -} - -static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) -{ - struct e1000_hw *hw = &adapter->hw; - u32 pool_mask, reg, vid; - u16 vlan_default; - int i; - - pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); - - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = E1000_READ_REG(hw, E1000_VLVF(i)); - - /* remove the vf from the pool */ - reg &= ~pool_mask; - - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK) && - (reg & E1000_VLVF_VLANID_ENABLE)) { - reg = 0; - vid = reg & E1000_VLVF_VLANID_MASK; - igb_vfta_set(adapter, vid, FALSE); - } - - E1000_WRITE_REG(hw, E1000_VLVF(i), reg); - } - - adapter->vf_data[vf].vlans_enabled = 0; - - vlan_default = adapter->vf_data[vf].default_vf_vlan_id; - if (vlan_default) - igb_vlvf_set(adapter, vlan_default, true, vf); -} - -s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) -{ - struct e1000_hw *hw = &adapter->hw; - u32 reg, i; - - /* The vlvf table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return -1; - - /* we only need to do this if VMDq is enabled */ - if (!adapter->vmdq_pools) - return -1; - - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = E1000_READ_REG(hw, E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; - } - - if (add) { - if (i == E1000_VLVF_ARRAY_SIZE) { - /* Did not find a matching VLAN ID entry that was - * enabled. Search for a free filter entry, i.e. - * one without the enable bit set - */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = E1000_READ_REG(hw, E1000_VLVF(i)); - if (!(reg & E1000_VLVF_VLANID_ENABLE)) - break; - } - } - if (i < E1000_VLVF_ARRAY_SIZE) { - /* Found an enabled/available entry */ - reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); - - /* if !enabled we need to set this up in vfta */ - if (!(reg & E1000_VLVF_VLANID_ENABLE)) { - /* add VID to filter table */ - igb_vfta_set(adapter, vid, TRUE); - reg |= E1000_VLVF_VLANID_ENABLE; - } - reg &= ~E1000_VLVF_VLANID_MASK; - reg |= vid; - E1000_WRITE_REG(hw, E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return E1000_SUCCESS; - - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - reg = E1000_READ_REG(hw, E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size += 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg); - } - - adapter->vf_data[vf].vlans_enabled++; - } - } else { - if (i < E1000_VLVF_ARRAY_SIZE) { - /* remove vf from the pool */ - reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK)) { - reg = 0; - igb_vfta_set(adapter, vid, FALSE); - } - E1000_WRITE_REG(hw, E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return E1000_SUCCESS; - - adapter->vf_data[vf].vlans_enabled--; - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - reg = E1000_READ_REG(hw, E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size -= 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg); - } - } - } - return E1000_SUCCESS; -} - -#ifdef IFLA_VF_MAX -static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) -{ - struct e1000_hw *hw = &adapter->hw; - - if (vid) - E1000_WRITE_REG(hw, E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT)); - else - E1000_WRITE_REG(hw, E1000_VMVIR(vf), 0); -} - -static int igb_ndo_set_vf_vlan(struct net_device *netdev, -#ifdef HAVE_VF_VLAN_PROTO - int vf, u16 vlan, u8 qos, __be16 vlan_proto) -#else - int vf, u16 vlan, u8 qos) -#endif -{ - int err = 0; - struct igb_adapter *adapter = netdev_priv(netdev); - - /* VLAN IDs accepted range 0-4094 */ - if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7)) - return -EINVAL; - -#ifdef HAVE_VF_VLAN_PROTO - if (vlan_proto != htons(ETH_P_8021Q)) - return -EPROTONOSUPPORT; -#endif - - if (vlan || qos) { - err = igb_vlvf_set(adapter, vlan, !!vlan, vf); - if (err) - goto out; - igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); - igb_set_vmolr(adapter, vf, !vlan); - adapter->vf_data[vf].pf_vlan = vlan; - adapter->vf_data[vf].pf_qos = qos; - igb_set_vf_vlan_strip(adapter, vf, true); - dev_info(&adapter->pdev->dev, - "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, - "The VF VLAN has been set," - " but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, - "Bring the PF device up before" - " attempting to use the VF device.\n"); - } - } else { - if (adapter->vf_data[vf].pf_vlan) - dev_info(&adapter->pdev->dev, - "Clearing VLAN on VF %d\n", vf); - igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, - false, vf); - igb_set_vmvir(adapter, vlan, vf); - igb_set_vmolr(adapter, vf, true); - igb_set_vf_vlan_strip(adapter, vf, false); - adapter->vf_data[vf].pf_vlan = 0; - adapter->vf_data[vf].pf_qos = 0; - } -out: - return err; -} - -#ifdef HAVE_VF_SPOOFCHK_CONFIGURE -static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, - bool setting) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - u32 dtxswc, reg_offset; - - if (!adapter->vfs_allocated_count) - return -EOPNOTSUPP; - - if (vf >= adapter->vfs_allocated_count) - return -EINVAL; - - reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC; - dtxswc = E1000_READ_REG(hw, reg_offset); - if (setting) - dtxswc |= ((1 << vf) | - (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); - else - dtxswc &= ~((1 << vf) | - (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); - E1000_WRITE_REG(hw, reg_offset, dtxswc); - - adapter->vf_data[vf].spoofchk_enabled = setting; - return E1000_SUCCESS; -} -#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */ -#endif /* IFLA_VF_MAX */ - -static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid) -{ - struct e1000_hw *hw = &adapter->hw; - int i; - u32 reg; - - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = E1000_READ_REG(hw, E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; - } - - if (i >= E1000_VLVF_ARRAY_SIZE) - i = -1; - - return i; -} - -static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) -{ - struct e1000_hw *hw = &adapter->hw; - int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; - int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); - int err = 0; - - if (vid) - igb_set_vf_vlan_strip(adapter, vf, true); - else - igb_set_vf_vlan_strip(adapter, vf, false); - - /* If in promiscuous mode we need to make sure the PF also has - * the VLAN filter set. - */ - if (add && (adapter->netdev->flags & IFF_PROMISC)) - err = igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - if (err) - goto out; - - err = igb_vlvf_set(adapter, vid, add, vf); - - if (err) - goto out; - - /* Go through all the checks to see if the VLAN filter should - * be wiped completely. - */ - if (!add && (adapter->netdev->flags & IFF_PROMISC)) { - u32 vlvf, bits; - - int regndx = igb_find_vlvf_entry(adapter, vid); - if (regndx < 0) - goto out; - /* See if any other pools are set for this VLAN filter - * entry other than the PF. - */ - vlvf = bits = E1000_READ_REG(hw, E1000_VLVF(regndx)); - bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT + - adapter->vfs_allocated_count); - /* If the filter was removed then ensure PF pool bit - * is cleared if the PF only added itself to the pool - * because the PF is in promiscuous mode. - */ - if ((vlvf & VLAN_VID_MASK) == vid && -#ifndef HAVE_VLAN_RX_REGISTER - !test_bit(vid, adapter->active_vlans) && -#endif - !bits) - igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - } - -out: - return err; -} - -static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) -{ - struct e1000_hw *hw = &adapter->hw; - - /* clear flags except flag that the PF has set the MAC */ - adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; - adapter->vf_data[vf].last_nack = jiffies; - - /* reset offloads to defaults */ - igb_set_vmolr(adapter, vf, true); - - /* reset vlans for device */ - igb_clear_vf_vfta(adapter, vf); -#ifdef IFLA_VF_MAX - if (adapter->vf_data[vf].pf_vlan) - igb_ndo_set_vf_vlan(adapter->netdev, vf, - adapter->vf_data[vf].pf_vlan, -#ifdef HAVE_VF_VLAN_PROTO - adapter->vf_data[vf].pf_qos, - htons(ETH_P_8021Q)); -#else - adapter->vf_data[vf].pf_qos); -#endif - else - igb_clear_vf_vfta(adapter, vf); -#endif - - /* reset multicast table array for vf */ - adapter->vf_data[vf].num_vf_mc_hashes = 0; - - /* Flush and reset the mta with the new values */ - igb_set_rx_mode(adapter->netdev); - - /* - * Reset the VFs TDWBAL and TDWBAH registers which are not - * cleared by a VFLR - */ - E1000_WRITE_REG(hw, E1000_TDWBAH(vf), 0); - E1000_WRITE_REG(hw, E1000_TDWBAL(vf), 0); - if (hw->mac.type == e1000_82576) { - E1000_WRITE_REG(hw, E1000_TDWBAH(IGB_MAX_VF_FUNCTIONS + vf), 0); - E1000_WRITE_REG(hw, E1000_TDWBAL(IGB_MAX_VF_FUNCTIONS + vf), 0); - } -} - -static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) -{ - unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; - - /* generate a new mac address as we were hotplug removed/added */ - if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC)) - random_ether_addr(vf_mac); - - /* process remaining reset events */ - igb_vf_reset(adapter, vf); -} - -static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) -{ - struct e1000_hw *hw = &adapter->hw; - unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; - u32 reg, msgbuf[3]; - u8 *addr = (u8 *)(&msgbuf[1]); - - /* process all the same items cleared in a function level reset */ - igb_vf_reset(adapter, vf); - - /* set vf mac address */ - igb_del_mac_filter(adapter, vf_mac, vf); - igb_add_mac_filter(adapter, vf_mac, vf); - - /* enable transmit and receive for vf */ - reg = E1000_READ_REG(hw, E1000_VFTE); - E1000_WRITE_REG(hw, E1000_VFTE, reg | (1 << vf)); - reg = E1000_READ_REG(hw, E1000_VFRE); - E1000_WRITE_REG(hw, E1000_VFRE, reg | (1 << vf)); - - adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; - - /* reply to reset with ack and vf mac address */ - msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; - memcpy(addr, vf_mac, 6); - e1000_write_mbx(hw, msgbuf, 3, vf); -} - -static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) -{ - /* - * The VF MAC Address is stored in a packed array of bytes - * starting at the second 32 bit word of the msg array - */ - unsigned char *addr = (unsigned char *)&msg[1]; - int err = -1; - - if (is_valid_ether_addr(addr)) - err = igb_set_vf_mac(adapter, vf, addr); - - return err; -} - -static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) -{ - struct e1000_hw *hw = &adapter->hw; - struct vf_data_storage *vf_data = &adapter->vf_data[vf]; - u32 msg = E1000_VT_MSGTYPE_NACK; - - /* if device isn't clear to send it shouldn't be reading either */ - if (!(vf_data->flags & IGB_VF_FLAG_CTS) && - time_after(jiffies, vf_data->last_nack + (2 * HZ))) { - e1000_write_mbx(hw, &msg, 1, vf); - vf_data->last_nack = jiffies; - } -} - -static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) -{ - struct pci_dev *pdev = adapter->pdev; - u32 msgbuf[E1000_VFMAILBOX_SIZE]; - struct e1000_hw *hw = &adapter->hw; - struct vf_data_storage *vf_data = &adapter->vf_data[vf]; - s32 retval; - - retval = e1000_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf); - - if (retval) { - dev_err(pci_dev_to_dev(pdev), "Error receiving message from VF\n"); - return; - } - - /* this is a message we already processed, do nothing */ - if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) - return; - - /* - * until the vf completes a reset it should not be - * allowed to start any configuration. - */ - - if (msgbuf[0] == E1000_VF_RESET) { - igb_vf_reset_msg(adapter, vf); - return; - } - - if (!(vf_data->flags & IGB_VF_FLAG_CTS)) { - msgbuf[0] = E1000_VT_MSGTYPE_NACK; - if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) { - e1000_write_mbx(hw, msgbuf, 1, vf); - vf_data->last_nack = jiffies; - } - return; - } - - switch ((msgbuf[0] & 0xFFFF)) { - case E1000_VF_SET_MAC_ADDR: - retval = -EINVAL; -#ifndef IGB_DISABLE_VF_MAC_SET - if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC)) - retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); - else - DPRINTK(DRV, INFO, - "VF %d attempted to override administratively " - "set MAC address\nReload the VF driver to " - "resume operations\n", vf); -#endif - break; - case E1000_VF_SET_PROMISC: - retval = igb_set_vf_promisc(adapter, msgbuf, vf); - break; - case E1000_VF_SET_MULTICAST: - retval = igb_set_vf_multicasts(adapter, msgbuf, vf); - break; - case E1000_VF_SET_LPE: - retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf); - break; - case E1000_VF_SET_VLAN: - retval = -1; -#ifdef IFLA_VF_MAX - if (vf_data->pf_vlan) - DPRINTK(DRV, INFO, - "VF %d attempted to override administratively " - "set VLAN tag\nReload the VF driver to " - "resume operations\n", vf); - else -#endif - retval = igb_set_vf_vlan(adapter, msgbuf, vf); - break; - default: - dev_err(pci_dev_to_dev(pdev), "Unhandled Msg %08x\n", msgbuf[0]); - retval = -E1000_ERR_MBX; - break; - } - - /* notify the VF of the results of what it sent us */ - if (retval) - msgbuf[0] |= E1000_VT_MSGTYPE_NACK; - else - msgbuf[0] |= E1000_VT_MSGTYPE_ACK; - - msgbuf[0] |= E1000_VT_MSGTYPE_CTS; - - e1000_write_mbx(hw, msgbuf, 1, vf); -} - -static void igb_msg_task(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 vf; - - for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { - /* process any reset requests */ - if (!e1000_check_for_rst(hw, vf)) - igb_vf_reset_event(adapter, vf); - - /* process any messages pending */ - if (!e1000_check_for_msg(hw, vf)) - igb_rcv_msg_from_vf(adapter, vf); - - /* process any acks */ - if (!e1000_check_for_ack(hw, vf)) - igb_rcv_ack_from_vf(adapter, vf); - } -} - -/** - * igb_set_uta - Set unicast filter table address - * @adapter: board private structure - * - * The unicast table address is a register array of 32-bit registers. - * The table is meant to be used in a way similar to how the MTA is used - * however due to certain limitations in the hardware it is necessary to - * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous - * enable bit to allow vlan tag stripping when promiscuous mode is enabled - **/ -static void igb_set_uta(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - int i; - - /* The UTA table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return; - - /* we only need to do this if VMDq is enabled */ - if (!adapter->vmdq_pools) - return; - - for (i = 0; i < hw->mac.uta_reg_count; i++) - E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, ~0); -} - -/** - * igb_intr_msi - Interrupt Handler - * @irq: interrupt number - * @data: pointer to a network interface device structure - **/ -static irqreturn_t igb_intr_msi(int irq, void *data) -{ - struct igb_adapter *adapter = data; - struct igb_q_vector *q_vector = adapter->q_vector[0]; - struct e1000_hw *hw = &adapter->hw; - /* read ICR disables interrupts using IAM */ - u32 icr = E1000_READ_REG(hw, E1000_ICR); - - igb_write_itr(q_vector); - - if (icr & E1000_ICR_DRSTA) - schedule_work(&adapter->reset_task); - - if (icr & E1000_ICR_DOUTSYNC) { - /* HW is reporting DMA is out of sync */ - adapter->stats.doosync++; - } - - if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - hw->mac.get_link_status = 1; - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); - } - -#ifdef HAVE_PTP_1588_CLOCK - if (icr & E1000_ICR_TS) { - u32 tsicr = E1000_READ_REG(hw, E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } -#endif /* HAVE_PTP_1588_CLOCK */ - - napi_schedule(&q_vector->napi); - - return IRQ_HANDLED; -} - -/** - * igb_intr - Legacy Interrupt Handler - * @irq: interrupt number - * @data: pointer to a network interface device structure - **/ -static irqreturn_t igb_intr(int irq, void *data) -{ - struct igb_adapter *adapter = data; - struct igb_q_vector *q_vector = adapter->q_vector[0]; - struct e1000_hw *hw = &adapter->hw; - /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No - * need for the IMC write */ - u32 icr = E1000_READ_REG(hw, E1000_ICR); - - /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is - * not set, then the adapter didn't send an interrupt */ - if (!(icr & E1000_ICR_INT_ASSERTED)) - return IRQ_NONE; - - igb_write_itr(q_vector); - - if (icr & E1000_ICR_DRSTA) - schedule_work(&adapter->reset_task); - - if (icr & E1000_ICR_DOUTSYNC) { - /* HW is reporting DMA is out of sync */ - adapter->stats.doosync++; - } - - if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - hw->mac.get_link_status = 1; - /* guard against interrupt when we're going down */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); - } - -#ifdef HAVE_PTP_1588_CLOCK - if (icr & E1000_ICR_TS) { - u32 tsicr = E1000_READ_REG(hw, E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } -#endif /* HAVE_PTP_1588_CLOCK */ - - napi_schedule(&q_vector->napi); - - return IRQ_HANDLED; -} - -void igb_ring_irq_enable(struct igb_q_vector *q_vector) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - - if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || - (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { - if ((adapter->num_q_vectors == 1) && !adapter->vf_data) - igb_set_itr(q_vector); - else - igb_update_ring_itr(q_vector); - } - - if (!test_bit(__IGB_DOWN, &adapter->state)) { - if (adapter->msix_entries) - E1000_WRITE_REG(hw, E1000_EIMS, q_vector->eims_value); - else - igb_irq_enable(adapter); - } -} - -/** - * igb_poll - NAPI Rx polling callback - * @napi: napi polling structure - * @budget: count of how many packets we should handle - **/ -static int igb_poll(struct napi_struct *napi, int budget) -{ - struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi); - bool clean_complete = true; - -#ifdef IGB_DCA - if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) - igb_update_dca(q_vector); -#endif - if (q_vector->tx.ring) - clean_complete = igb_clean_tx_irq(q_vector); - - if (q_vector->rx.ring) - clean_complete &= igb_clean_rx_irq(q_vector, budget); - -#ifndef HAVE_NETDEV_NAPI_LIST - /* if netdev is disabled we need to stop polling */ - if (!netif_running(q_vector->adapter->netdev)) - clean_complete = true; - -#endif - /* If all work not completed, return budget and keep polling */ - if (!clean_complete) - return budget; - - /* If not enough Rx work done, exit the polling mode */ - napi_complete(napi); - igb_ring_irq_enable(q_vector); - - return 0; -} - -/** - * igb_clean_tx_irq - Reclaim resources after transmit completes - * @q_vector: pointer to q_vector containing needed info - * returns TRUE if ring is completely cleaned - **/ -static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct igb_ring *tx_ring = q_vector->tx.ring; - struct igb_tx_buffer *tx_buffer; - union e1000_adv_tx_desc *tx_desc; - unsigned int total_bytes = 0, total_packets = 0; - unsigned int budget = q_vector->tx.work_limit; - unsigned int i = tx_ring->next_to_clean; - - if (test_bit(__IGB_DOWN, &adapter->state)) - return true; - - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IGB_TX_DESC(tx_ring, i); - i -= tx_ring->count; - - do { - union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; - - /* if next_to_watch is not set then there is no work pending */ - if (!eop_desc) - break; - - /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); - - /* if DD is not set pending work has not been completed */ - if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) - break; - - /* clear next_to_watch to prevent false hangs */ - tx_buffer->next_to_watch = NULL; - - /* update the statistics for this packet */ - total_bytes += tx_buffer->bytecount; - total_packets += tx_buffer->gso_segs; - - /* free the skb */ - dev_kfree_skb_any(tx_buffer->skb); - - /* unmap skb header data */ - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - - /* clear tx_buffer data */ - tx_buffer->skb = NULL; - dma_unmap_len_set(tx_buffer, len, 0); - - /* clear last DMA location and unmap remaining buffers */ - while (tx_desc != eop_desc) { - tx_buffer++; - tx_desc++; - i++; - if (unlikely(!i)) { - i -= tx_ring->count; - tx_buffer = tx_ring->tx_buffer_info; - tx_desc = IGB_TX_DESC(tx_ring, 0); - } - - /* unmap any remaining paged data */ - if (dma_unmap_len(tx_buffer, len)) { - dma_unmap_page(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); - } - } - - /* move us one more past the eop_desc for start of next pkt */ - tx_buffer++; - tx_desc++; - i++; - if (unlikely(!i)) { - i -= tx_ring->count; - tx_buffer = tx_ring->tx_buffer_info; - tx_desc = IGB_TX_DESC(tx_ring, 0); - } - - /* issue prefetch for next Tx descriptor */ - prefetch(tx_desc); - - /* update budget accounting */ - budget--; - } while (likely(budget)); - - netdev_tx_completed_queue(txring_txq(tx_ring), - total_packets, total_bytes); - - i += tx_ring->count; - tx_ring->next_to_clean = i; - tx_ring->tx_stats.bytes += total_bytes; - tx_ring->tx_stats.packets += total_packets; - q_vector->tx.total_bytes += total_bytes; - q_vector->tx.total_packets += total_packets; - -#ifdef DEBUG - if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags) && - !(adapter->disable_hw_reset && adapter->tx_hang_detected)) { -#else - if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { -#endif - struct e1000_hw *hw = &adapter->hw; - - /* Detect a transmit hang in hardware, this serializes the - * check with the clearing of time_stamp and movement of i */ - clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); - if (tx_buffer->next_to_watch && - time_after(jiffies, tx_buffer->time_stamp + - (adapter->tx_timeout_factor * HZ)) - && !(E1000_READ_REG(hw, E1000_STATUS) & - E1000_STATUS_TXOFF)) { - - /* detected Tx unit hang */ -#ifdef DEBUG - adapter->tx_hang_detected = TRUE; - if (adapter->disable_hw_reset) { - DPRINTK(DRV, WARNING, - "Deactivating netdev watchdog timer\n"); - if (del_timer(&netdev_ring(tx_ring)->watchdog_timer)) - dev_put(netdev_ring(tx_ring)); -#ifndef HAVE_NET_DEVICE_OPS - netdev_ring(tx_ring)->tx_timeout = NULL; -#endif - } -#endif /* DEBUG */ - dev_err(tx_ring->dev, - "Detected Tx Unit Hang\n" - " Tx Queue <%d>\n" - " TDH <%x>\n" - " TDT <%x>\n" - " next_to_use <%x>\n" - " next_to_clean <%x>\n" - "buffer_info[next_to_clean]\n" - " time_stamp <%lx>\n" - " next_to_watch <%p>\n" - " jiffies <%lx>\n" - " desc.status <%x>\n", - tx_ring->queue_index, - E1000_READ_REG(hw, E1000_TDH(tx_ring->reg_idx)), - readl(tx_ring->tail), - tx_ring->next_to_use, - tx_ring->next_to_clean, - tx_buffer->time_stamp, - tx_buffer->next_to_watch, - jiffies, - tx_buffer->next_to_watch->wb.status); - if (netif_is_multiqueue(netdev_ring(tx_ring))) - netif_stop_subqueue(netdev_ring(tx_ring), - ring_queue_index(tx_ring)); - else - netif_stop_queue(netdev_ring(tx_ring)); - - /* we are about to reset, no point in enabling stuff */ - return true; - } - } - -#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) - if (unlikely(total_packets && - netif_carrier_ok(netdev_ring(tx_ring)) && - igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { - /* Make sure that anybody stopping the queue after this - * sees the new next_to_clean. - */ - smp_mb(); - if (netif_is_multiqueue(netdev_ring(tx_ring))) { - if (__netif_subqueue_stopped(netdev_ring(tx_ring), - ring_queue_index(tx_ring)) && - !(test_bit(__IGB_DOWN, &adapter->state))) { - netif_wake_subqueue(netdev_ring(tx_ring), - ring_queue_index(tx_ring)); - tx_ring->tx_stats.restart_queue++; - } - } else { - if (netif_queue_stopped(netdev_ring(tx_ring)) && - !(test_bit(__IGB_DOWN, &adapter->state))) { - netif_wake_queue(netdev_ring(tx_ring)); - tx_ring->tx_stats.restart_queue++; - } - } - } - - return !!budget; -} - -#ifdef HAVE_VLAN_RX_REGISTER -/** - * igb_receive_skb - helper function to handle rx indications - * @q_vector: structure containing interrupt and ring information - * @skb: packet to send up - **/ -static void igb_receive_skb(struct igb_q_vector *q_vector, - struct sk_buff *skb) -{ - struct vlan_group **vlgrp = netdev_priv(skb->dev); - - if (IGB_CB(skb)->vid) { - if (*vlgrp) { - vlan_gro_receive(&q_vector->napi, *vlgrp, - IGB_CB(skb)->vid, skb); - } else { - dev_kfree_skb_any(skb); - } - } else { - napi_gro_receive(&q_vector->napi, skb); - } -} - -#endif /* HAVE_VLAN_RX_REGISTER */ -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT -/** - * igb_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void igb_reuse_rx_page(struct igb_ring *rx_ring, - struct igb_rx_buffer *old_buff) -{ - struct igb_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_buffer_info[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer)); - - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, - old_buff->page_offset, - IGB_RX_BUFSZ, - DMA_FROM_DEVICE); -} - -static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, - struct page *page, - unsigned int truesize) -{ - /* avoid re-using remote pages */ - if (unlikely(page_to_nid(page) != numa_node_id())) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; - - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= IGB_RX_BUFSZ; - -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; - - if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) - return false; -#endif - - /* bump ref count on page before it is given to the stack */ - get_page(page); - - return true; -} - -/** - * igb_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware - * @skb: sk_buff to place the data into - * - * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. - * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. - **/ -static bool igb_add_rx_frag(struct igb_ring *rx_ring, - struct igb_rx_buffer *rx_buffer, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct page *page = rx_buffer->page; - unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); -#if (PAGE_SIZE < 8192) - unsigned int truesize = IGB_RX_BUFSZ; -#else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); -#endif - - if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - -#ifdef HAVE_PTP_1588_CLOCK - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); - va += IGB_TS_HDR_LEN; - size -= IGB_TS_HDR_LEN; - } -#endif /* HAVE_PTP_1588_CLOCK */ - - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - - /* we can reuse buffer as-is, just make sure it is local */ - if (likely(page_to_nid(page) == numa_node_id())) - return true; - - /* this page cannot be reused so discard it */ - put_page(page); - return false; - } - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, truesize); - - return igb_can_reuse_rx_page(rx_buffer, page, truesize); -} - -static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct igb_rx_buffer *rx_buffer; - struct page *page; - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - - page = rx_buffer->page; - prefetchw(page); - - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; - - /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); -#endif - - /* allocate a skb to store the frags */ - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IGB_RX_HDR_LEN); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_failed++; - return NULL; - } - - /* - * we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - } - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - IGB_RX_BUFSZ, - DMA_FROM_DEVICE); - - /* pull page into skb */ - if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { - /* hand second half of page back to the ring */ - igb_reuse_rx_page(rx_ring, rx_buffer); - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE); - } - - /* clear contents of rx_buffer */ - rx_buffer->page = NULL; - - return skb; -} - -#endif -static inline void igb_rx_checksum(struct igb_ring *ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - skb_checksum_none_assert(skb); - - /* Ignore Checksum bit is set */ - if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM)) - return; - - /* Rx checksum disabled via ethtool */ - if (!(netdev_ring(ring)->features & NETIF_F_RXCSUM)) - return; - - /* TCP/UDP checksum error bit is set */ - if (igb_test_staterr(rx_desc, - E1000_RXDEXT_STATERR_TCPE | - E1000_RXDEXT_STATERR_IPE)) { - /* - * work around errata with sctp packets where the TCPE aka - * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) - * packets, (aka let the stack check the crc32c) - */ - if (!((skb->len == 60) && - test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) - ring->rx_stats.csum_err++; - - /* let the stack verify checksum errors */ - return; - } - /* It must be a TCP or UDP packet with a valid checksum */ - if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS | - E1000_RXD_STAT_UDPCS)) - skb->ip_summed = CHECKSUM_UNNECESSARY; -} - -#ifdef NETIF_F_RXHASH -static inline void igb_rx_hash(struct igb_ring *ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - if (netdev_ring(ring)->features & NETIF_F_RXHASH) - skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - PKT_HASH_TYPE_L3); -} - -#endif -#ifndef IGB_NO_LRO -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT -/** - * igb_merge_active_tail - merge active tail into lro skb - * @tail: pointer to active tail in frag_list - * - * This function merges the length and data of an active tail into the - * skb containing the frag_list. It resets the tail's pointer to the head, - * but it leaves the heads pointer to tail intact. - **/ -static inline struct sk_buff *igb_merge_active_tail(struct sk_buff *tail) -{ - struct sk_buff *head = IGB_CB(tail)->head; - - if (!head) - return tail; - - head->len += tail->len; - head->data_len += tail->len; - head->truesize += tail->len; - - IGB_CB(tail)->head = NULL; - - return head; -} - -/** - * igb_add_active_tail - adds an active tail into the skb frag_list - * @head: pointer to the start of the skb - * @tail: pointer to active tail to add to frag_list - * - * This function adds an active tail to the end of the frag list. This tail - * will still be receiving data so we cannot yet ad it's stats to the main - * skb. That is done via igb_merge_active_tail. - **/ -static inline void igb_add_active_tail(struct sk_buff *head, struct sk_buff *tail) -{ - struct sk_buff *old_tail = IGB_CB(head)->tail; - - if (old_tail) { - igb_merge_active_tail(old_tail); - old_tail->next = tail; - } else { - skb_shinfo(head)->frag_list = tail; - } - - IGB_CB(tail)->head = head; - IGB_CB(head)->tail = tail; - - IGB_CB(head)->append_cnt++; -} - -/** - * igb_close_active_frag_list - cleanup pointers on a frag_list skb - * @head: pointer to head of an active frag list - * - * This function will clear the frag_tail_tracker pointer on an active - * frag_list and returns true if the pointer was actually set - **/ -static inline bool igb_close_active_frag_list(struct sk_buff *head) -{ - struct sk_buff *tail = IGB_CB(head)->tail; - - if (!tail) - return false; - - igb_merge_active_tail(tail); - - IGB_CB(head)->tail = NULL; - - return true; -} - -#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ -/** - * igb_can_lro - returns true if packet is TCP/IPV4 and LRO is enabled - * @adapter: board private structure - * @rx_desc: pointer to the rx descriptor - * @skb: pointer to the skb to be merged - * - **/ -static inline bool igb_can_lro(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct iphdr *iph = (struct iphdr *)skb->data; - __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; - - /* verify hardware indicates this is IPv4/TCP */ - if((!(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) || - !(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4)))) - return false; - - /* .. and LRO is enabled */ - if (!(netdev_ring(rx_ring)->features & NETIF_F_LRO)) - return false; - - /* .. and we are not in promiscuous mode */ - if (netdev_ring(rx_ring)->flags & IFF_PROMISC) - return false; - - /* .. and the header is large enough for us to read IP/TCP fields */ - if (!pskb_may_pull(skb, sizeof(struct igb_lrohdr))) - return false; - - /* .. and there are no VLANs on packet */ - if (skb->protocol != __constant_htons(ETH_P_IP)) - return false; - - /* .. and we are version 4 with no options */ - if (*(u8 *)iph != 0x45) - return false; - - /* .. and the packet is not fragmented */ - if (iph->frag_off & htons(IP_MF | IP_OFFSET)) - return false; - - /* .. and that next header is TCP */ - if (iph->protocol != IPPROTO_TCP) - return false; - - return true; -} - -static inline struct igb_lrohdr *igb_lro_hdr(struct sk_buff *skb) -{ - return (struct igb_lrohdr *)skb->data; -} - -/** - * igb_lro_flush - Indicate packets to upper layer. - * - * Update IP and TCP header part of head skb if more than one - * skb's chained and indicate packets to upper layer. - **/ -static void igb_lro_flush(struct igb_q_vector *q_vector, - struct sk_buff *skb) -{ - struct igb_lro_list *lrolist = &q_vector->lrolist; - - __skb_unlink(skb, &lrolist->active); - - if (IGB_CB(skb)->append_cnt) { - struct igb_lrohdr *lroh = igb_lro_hdr(skb); - -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - /* close any active lro contexts */ - igb_close_active_frag_list(skb); - -#endif - /* incorporate ip header and re-calculate checksum */ - lroh->iph.tot_len = ntohs(skb->len); - lroh->iph.check = 0; - - /* header length is 5 since we know no options exist */ - lroh->iph.check = ip_fast_csum((u8 *)lroh, 5); - - /* clear TCP checksum to indicate we are an LRO frame */ - lroh->th.check = 0; - - /* incorporate latest timestamp into the tcp header */ - if (IGB_CB(skb)->tsecr) { - lroh->ts[2] = IGB_CB(skb)->tsecr; - lroh->ts[1] = htonl(IGB_CB(skb)->tsval); - } -#ifdef NETIF_F_GSO - - skb_shinfo(skb)->gso_size = IGB_CB(skb)->mss; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; -#endif - } - -#ifdef HAVE_VLAN_RX_REGISTER - igb_receive_skb(q_vector, skb); -#else - napi_gro_receive(&q_vector->napi, skb); -#endif - lrolist->stats.flushed++; -} - -static void igb_lro_flush_all(struct igb_q_vector *q_vector) -{ - struct igb_lro_list *lrolist = &q_vector->lrolist; - struct sk_buff *skb, *tmp; - - skb_queue_reverse_walk_safe(&lrolist->active, skb, tmp) - igb_lro_flush(q_vector, skb); -} - -/* - * igb_lro_header_ok - Main LRO function. - **/ -static void igb_lro_header_ok(struct sk_buff *skb) -{ - struct igb_lrohdr *lroh = igb_lro_hdr(skb); - u16 opt_bytes, data_len; - -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - IGB_CB(skb)->tail = NULL; -#endif - IGB_CB(skb)->tsecr = 0; - IGB_CB(skb)->append_cnt = 0; - IGB_CB(skb)->mss = 0; - - /* ensure that the checksum is valid */ - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - return; - - /* If we see CE codepoint in IP header, packet is not mergeable */ - if (INET_ECN_is_ce(ipv4_get_dsfield(&lroh->iph))) - return; - - /* ensure no bits set besides ack or psh */ - if (lroh->th.fin || lroh->th.syn || lroh->th.rst || - lroh->th.urg || lroh->th.ece || lroh->th.cwr || - !lroh->th.ack) - return; - - /* store the total packet length */ - data_len = ntohs(lroh->iph.tot_len); - - /* remove any padding from the end of the skb */ - __pskb_trim(skb, data_len); - - /* remove header length from data length */ - data_len -= sizeof(struct igb_lrohdr); - - /* - * check for timestamps. Since the only option we handle are timestamps, - * we only have to handle the simple case of aligned timestamps - */ - opt_bytes = (lroh->th.doff << 2) - sizeof(struct tcphdr); - if (opt_bytes != 0) { - if ((opt_bytes != TCPOLEN_TSTAMP_ALIGNED) || - !pskb_may_pull(skb, sizeof(struct igb_lrohdr) + - TCPOLEN_TSTAMP_ALIGNED) || - (lroh->ts[0] != htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP)) || - (lroh->ts[2] == 0)) { - return; - } - - IGB_CB(skb)->tsval = ntohl(lroh->ts[1]); - IGB_CB(skb)->tsecr = lroh->ts[2]; - - data_len -= TCPOLEN_TSTAMP_ALIGNED; - } - - /* record data_len as mss for the packet */ - IGB_CB(skb)->mss = data_len; - IGB_CB(skb)->next_seq = ntohl(lroh->th.seq); -} - -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT -static void igb_merge_frags(struct sk_buff *lro_skb, struct sk_buff *new_skb) -{ - struct skb_shared_info *sh_info; - struct skb_shared_info *new_skb_info; - unsigned int data_len; - - sh_info = skb_shinfo(lro_skb); - new_skb_info = skb_shinfo(new_skb); - - /* copy frags into the last skb */ - memcpy(sh_info->frags + sh_info->nr_frags, - new_skb_info->frags, - new_skb_info->nr_frags * sizeof(skb_frag_t)); - - /* copy size data over */ - sh_info->nr_frags += new_skb_info->nr_frags; - data_len = IGB_CB(new_skb)->mss; - lro_skb->len += data_len; - lro_skb->data_len += data_len; - lro_skb->truesize += data_len; - - /* wipe record of data from new_skb */ - new_skb_info->nr_frags = 0; - new_skb->len = new_skb->data_len = 0; - dev_kfree_skb_any(new_skb); -} - -#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ -/** - * igb_lro_receive - if able, queue skb into lro chain - * @q_vector: structure containing interrupt and ring information - * @new_skb: pointer to current skb being checked - * - * Checks whether the skb given is eligible for LRO and if that's - * fine chains it to the existing lro_skb based on flowid. If an LRO for - * the flow doesn't exist create one. - **/ -static void igb_lro_receive(struct igb_q_vector *q_vector, - struct sk_buff *new_skb) -{ - struct sk_buff *lro_skb; - struct igb_lro_list *lrolist = &q_vector->lrolist; - struct igb_lrohdr *lroh = igb_lro_hdr(new_skb); - __be32 saddr = lroh->iph.saddr; - __be32 daddr = lroh->iph.daddr; - __be32 tcp_ports = *(__be32 *)&lroh->th; - u16 data_len; -#ifdef HAVE_VLAN_RX_REGISTER - u16 vid = IGB_CB(new_skb)->vid; -#else - u16 vid = new_skb->vlan_tci; -#endif - - igb_lro_header_ok(new_skb); - - /* - * we have a packet that might be eligible for LRO, - * so see if it matches anything we might expect - */ - skb_queue_walk(&lrolist->active, lro_skb) { - if (*(__be32 *)&igb_lro_hdr(lro_skb)->th != tcp_ports || - igb_lro_hdr(lro_skb)->iph.saddr != saddr || - igb_lro_hdr(lro_skb)->iph.daddr != daddr) - continue; - -#ifdef HAVE_VLAN_RX_REGISTER - if (IGB_CB(lro_skb)->vid != vid) -#else - if (lro_skb->vlan_tci != vid) -#endif - continue; - - /* out of order packet */ - if (IGB_CB(lro_skb)->next_seq != IGB_CB(new_skb)->next_seq) { - igb_lro_flush(q_vector, lro_skb); - IGB_CB(new_skb)->mss = 0; - break; - } - - /* TCP timestamp options have changed */ - if (!IGB_CB(lro_skb)->tsecr != !IGB_CB(new_skb)->tsecr) { - igb_lro_flush(q_vector, lro_skb); - break; - } - - /* make sure timestamp values are increasing */ - if (IGB_CB(lro_skb)->tsecr && - IGB_CB(lro_skb)->tsval > IGB_CB(new_skb)->tsval) { - igb_lro_flush(q_vector, lro_skb); - IGB_CB(new_skb)->mss = 0; - break; - } - - data_len = IGB_CB(new_skb)->mss; - - /* Check for all of the above below - * malformed header - * no tcp data - * resultant packet would be too large - * new skb is larger than our current mss - * data would remain in header - * we would consume more frags then the sk_buff contains - * ack sequence numbers changed - * window size has changed - */ - if (data_len == 0 || - data_len > IGB_CB(lro_skb)->mss || - data_len > IGB_CB(lro_skb)->free || -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT - data_len != new_skb->data_len || - skb_shinfo(new_skb)->nr_frags >= - (MAX_SKB_FRAGS - skb_shinfo(lro_skb)->nr_frags) || -#endif - igb_lro_hdr(lro_skb)->th.ack_seq != lroh->th.ack_seq || - igb_lro_hdr(lro_skb)->th.window != lroh->th.window) { - igb_lro_flush(q_vector, lro_skb); - break; - } - - /* Remove IP and TCP header*/ - skb_pull(new_skb, new_skb->len - data_len); - - /* update timestamp and timestamp echo response */ - IGB_CB(lro_skb)->tsval = IGB_CB(new_skb)->tsval; - IGB_CB(lro_skb)->tsecr = IGB_CB(new_skb)->tsecr; - - /* update sequence and free space */ - IGB_CB(lro_skb)->next_seq += data_len; - IGB_CB(lro_skb)->free -= data_len; - - /* update append_cnt */ - IGB_CB(lro_skb)->append_cnt++; - -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT - /* if header is empty pull pages into current skb */ - igb_merge_frags(lro_skb, new_skb); -#else - /* chain this new skb in frag_list */ - igb_add_active_tail(lro_skb, new_skb); -#endif - - if ((data_len < IGB_CB(lro_skb)->mss) || lroh->th.psh || - skb_shinfo(lro_skb)->nr_frags == MAX_SKB_FRAGS) { - igb_lro_hdr(lro_skb)->th.psh |= lroh->th.psh; - igb_lro_flush(q_vector, lro_skb); - } - - lrolist->stats.coal++; - return; - } - - if (IGB_CB(new_skb)->mss && !lroh->th.psh) { - /* if we are at capacity flush the tail */ - if (skb_queue_len(&lrolist->active) >= IGB_LRO_MAX) { - lro_skb = skb_peek_tail(&lrolist->active); - if (lro_skb) - igb_lro_flush(q_vector, lro_skb); - } - - /* update sequence and free space */ - IGB_CB(new_skb)->next_seq += IGB_CB(new_skb)->mss; - IGB_CB(new_skb)->free = 65521 - new_skb->len; - - /* .. and insert at the front of the active list */ - __skb_queue_head(&lrolist->active, new_skb); - - lrolist->stats.coal++; - return; - } - - /* packet not handled by any of the above, pass it to the stack */ -#ifdef HAVE_VLAN_RX_REGISTER - igb_receive_skb(q_vector, new_skb); -#else - napi_gro_receive(&q_vector->napi, new_skb); -#endif -} - -#endif /* IGB_NO_LRO */ -/** - * igb_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated - * - * This function checks the ring, descriptor, and packet information in - * order to populate the hash, checksum, VLAN, timestamp, protocol, and - * other fields within the skb. - **/ -static void igb_process_skb_fields(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct net_device *dev = rx_ring->netdev; - __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; - -#ifdef NETIF_F_RXHASH - igb_rx_hash(rx_ring, rx_desc, skb); - -#endif - igb_rx_checksum(rx_ring, rx_desc, skb); - - /* update packet type stats */ - if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4)) - rx_ring->rx_stats.ipv4_packets++; - else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4_EX)) - rx_ring->rx_stats.ipv4e_packets++; - else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6)) - rx_ring->rx_stats.ipv6_packets++; - else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6_EX)) - rx_ring->rx_stats.ipv6e_packets++; - else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) - rx_ring->rx_stats.tcp_packets++; - else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_UDP)) - rx_ring->rx_stats.udp_packets++; - else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_SCTP)) - rx_ring->rx_stats.sctp_packets++; - else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_NFS)) - rx_ring->rx_stats.nfs_packets++; - -#ifdef HAVE_PTP_1588_CLOCK - igb_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); -#endif /* HAVE_PTP_1588_CLOCK */ - -#ifdef NETIF_F_HW_VLAN_CTAG_RX - if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && -#else - if ((dev->features & NETIF_F_HW_VLAN_RX) && -#endif - igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { - u16 vid = 0; - if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && - test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) - vid = be16_to_cpu(rx_desc->wb.upper.vlan); - else - vid = le16_to_cpu(rx_desc->wb.upper.vlan); -#ifdef HAVE_VLAN_RX_REGISTER - IGB_CB(skb)->vid = vid; - } else { - IGB_CB(skb)->vid = 0; -#else - -#ifdef HAVE_VLAN_PROTOCOL - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); -#else - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); -#endif - - -#endif - } - - skb_record_rx_queue(skb, rx_ring->queue_index); - - skb->protocol = eth_type_trans(skb, dev); -} - -/** - * igb_is_non_eop - process handling of non-EOP buffers - * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. - **/ -static bool igb_is_non_eop(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc) -{ - u32 ntc = rx_ring->next_to_clean + 1; - - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - - prefetch(IGB_RX_DESC(rx_ring, ntc)); - - if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))) - return false; - - return true; -} - -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT -/* igb_clean_rx_irq -- * legacy */ -static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) -{ - struct igb_ring *rx_ring = q_vector->rx.ring; - unsigned int total_bytes = 0, total_packets = 0; - u16 cleaned_count = igb_desc_unused(rx_ring); - - do { - struct igb_rx_buffer *rx_buffer; - union e1000_adv_rx_desc *rx_desc; - struct sk_buff *skb; - u16 ntc; - - /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= IGB_RX_BUFFER_WRITE) { - igb_alloc_rx_buffers(rx_ring, cleaned_count); - cleaned_count = 0; - } - - ntc = rx_ring->next_to_clean; - rx_desc = IGB_RX_DESC(rx_ring, ntc); - rx_buffer = &rx_ring->rx_buffer_info[ntc]; - - if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) - break; - - /* - * This memory barrier is needed to keep us from reading - * any other fields out of the rx_desc until we know the - * RXD_STAT_DD bit is set - */ - rmb(); - - skb = rx_buffer->skb; - - prefetch(skb->data); - - /* pull the header of the skb in */ - __skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length)); - - /* clear skb reference in buffer info structure */ - rx_buffer->skb = NULL; - - cleaned_count++; - - BUG_ON(igb_is_non_eop(rx_ring, rx_desc)); - - dma_unmap_single(rx_ring->dev, rx_buffer->dma, - rx_ring->rx_buffer_len, - DMA_FROM_DEVICE); - rx_buffer->dma = 0; - - if (igb_test_staterr(rx_desc, - E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { - dev_kfree_skb_any(skb); - continue; - } - - total_bytes += skb->len; - - /* populate checksum, timestamp, VLAN, and protocol */ - igb_process_skb_fields(rx_ring, rx_desc, skb); - -#ifndef IGB_NO_LRO - if (igb_can_lro(rx_ring, rx_desc, skb)) - igb_lro_receive(q_vector, skb); - else -#endif -#ifdef HAVE_VLAN_RX_REGISTER - igb_receive_skb(q_vector, skb); -#else - napi_gro_receive(&q_vector->napi, skb); -#endif - -#ifndef NETIF_F_GRO - netdev_ring(rx_ring)->last_rx = jiffies; - -#endif - /* update budget accounting */ - total_packets++; - } while (likely(total_packets < budget)); - - rx_ring->rx_stats.packets += total_packets; - rx_ring->rx_stats.bytes += total_bytes; - q_vector->rx.total_packets += total_packets; - q_vector->rx.total_bytes += total_bytes; - - if (cleaned_count) - igb_alloc_rx_buffers(rx_ring, cleaned_count); - -#ifndef IGB_NO_LRO - igb_lro_flush_all(q_vector); - -#endif /* IGB_NO_LRO */ - return total_packets < budget; -} -#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ -/** - * igb_get_headlen - determine size of header for LRO/GRO - * @data: pointer to the start of the headers - * @max_len: total length of section to find headers in - * - * This function is meant to determine the length of headers that will - * be recognized by hardware for LRO, and GRO offloads. The main - * motivation of doing this is to only perform one pull for IPv4 TCP - * packets so that we can do basic things like calculating the gso_size - * based on the average data per packet. - **/ -static unsigned int igb_get_headlen(unsigned char *data, - unsigned int max_len) -{ - union { - unsigned char *network; - /* l2 headers */ - struct ethhdr *eth; - struct vlan_hdr *vlan; - /* l3 headers */ - struct iphdr *ipv4; - struct ipv6hdr *ipv6; - } hdr; - __be16 protocol; - u8 nexthdr = 0; /* default to not TCP */ - u8 hlen; - - /* this should never happen, but better safe than sorry */ - if (max_len < ETH_HLEN) - return max_len; - - /* initialize network frame pointer */ - hdr.network = data; - - /* set first protocol and move network header forward */ - protocol = hdr.eth->h_proto; - hdr.network += ETH_HLEN; - - /* handle any vlan tag if present */ - if (protocol == __constant_htons(ETH_P_8021Q)) { - if ((hdr.network - data) > (max_len - VLAN_HLEN)) - return max_len; - - protocol = hdr.vlan->h_vlan_encapsulated_proto; - hdr.network += VLAN_HLEN; - } - - /* handle L3 protocols */ - if (protocol == __constant_htons(ETH_P_IP)) { - if ((hdr.network - data) > (max_len - sizeof(struct iphdr))) - return max_len; - - /* access ihl as a u8 to avoid unaligned access on ia64 */ - hlen = (hdr.network[0] & 0x0F) << 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct iphdr)) - return hdr.network - data; - - /* record next protocol if header is present */ - if (!(hdr.ipv4->frag_off & htons(IP_OFFSET))) - nexthdr = hdr.ipv4->protocol; -#ifdef NETIF_F_TSO6 - } else if (protocol == __constant_htons(ETH_P_IPV6)) { - if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) - return max_len; - - /* record next protocol */ - nexthdr = hdr.ipv6->nexthdr; - hlen = sizeof(struct ipv6hdr); -#endif /* NETIF_F_TSO6 */ - } else { - return hdr.network - data; - } - - /* relocate pointer to start of L4 header */ - hdr.network += hlen; - - /* finally sort out TCP */ - if (nexthdr == IPPROTO_TCP) { - if ((hdr.network - data) > (max_len - sizeof(struct tcphdr))) - return max_len; - - /* access doff as a u8 to avoid unaligned access on ia64 */ - hlen = (hdr.network[12] & 0xF0) >> 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct tcphdr)) - return hdr.network - data; - - hdr.network += hlen; - } else if (nexthdr == IPPROTO_UDP) { - if ((hdr.network - data) > (max_len - sizeof(struct udphdr))) - return max_len; - - hdr.network += sizeof(struct udphdr); - } - - /* - * If everything has gone correctly hdr.network should be the - * data section of the packet and will be the end of the header. - * If not then it probably represents the end of the last recognized - * header. - */ - if ((hdr.network - data) < max_len) - return hdr.network - data; - else - return max_len; -} - -/** - * igb_pull_tail - igb specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being adjusted - * - * This function is an igb specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void igb_pull_tail(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned char *va; - unsigned int pull_len; - - /* - * it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - -#ifdef HAVE_PTP_1588_CLOCK - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - /* retrieve timestamp from buffer */ - igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); - - /* update pointers to remove timestamp header */ - skb_frag_size_sub(frag, IGB_TS_HDR_LEN); - frag->page_offset += IGB_TS_HDR_LEN; - skb->data_len -= IGB_TS_HDR_LEN; - skb->len -= IGB_TS_HDR_LEN; - - /* move va to start of packet data */ - va += IGB_TS_HDR_LEN; - } -#endif /* HAVE_PTP_1588_CLOCK */ - - /* - * we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - -/** - * igb_cleanup_headers - Correct corrupted or empty headers - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being fixed - * - * Address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. - * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. - * - * Returns true if an error was encountered and skb was freed. - **/ -static bool igb_cleanup_headers(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - - if (unlikely((igb_test_staterr(rx_desc, - E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { - struct net_device *netdev = rx_ring->netdev; - if (!(netdev->features & NETIF_F_RXALL)) { - dev_kfree_skb_any(skb); - return true; - } - } - - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - igb_pull_tail(rx_ring, rx_desc, skb); - - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } - - return false; -} - -/* igb_clean_rx_irq -- * packet split */ -static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) -{ - struct igb_ring *rx_ring = q_vector->rx.ring; - struct sk_buff *skb = rx_ring->skb; - unsigned int total_bytes = 0, total_packets = 0; - u16 cleaned_count = igb_desc_unused(rx_ring); - - do { - union e1000_adv_rx_desc *rx_desc; - - /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= IGB_RX_BUFFER_WRITE) { - igb_alloc_rx_buffers(rx_ring, cleaned_count); - cleaned_count = 0; - } - - rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); - - if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) - break; - - /* - * This memory barrier is needed to keep us from reading - * any other fields out of the rx_desc until we know the - * RXD_STAT_DD bit is set - */ - rmb(); - - /* retrieve a buffer from the ring */ - skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); - - /* exit if we failed to retrieve a buffer */ - if (!skb) - break; - - cleaned_count++; - - /* fetch next buffer in frame if non-eop */ - if (igb_is_non_eop(rx_ring, rx_desc)) - continue; - - /* verify the packet layout is correct */ - if (igb_cleanup_headers(rx_ring, rx_desc, skb)) { - skb = NULL; - continue; - } - - /* probably a little skewed due to removing CRC */ - total_bytes += skb->len; - - /* populate checksum, timestamp, VLAN, and protocol */ - igb_process_skb_fields(rx_ring, rx_desc, skb); - -#ifndef IGB_NO_LRO - if (igb_can_lro(rx_ring, rx_desc, skb)) - igb_lro_receive(q_vector, skb); - else -#endif -#ifdef HAVE_VLAN_RX_REGISTER - igb_receive_skb(q_vector, skb); -#else - napi_gro_receive(&q_vector->napi, skb); -#endif -#ifndef NETIF_F_GRO - - netdev_ring(rx_ring)->last_rx = jiffies; -#endif - - /* reset skb pointer */ - skb = NULL; - - /* update budget accounting */ - total_packets++; - } while (likely(total_packets < budget)); - - /* place incomplete frames back on ring for completion */ - rx_ring->skb = skb; - - rx_ring->rx_stats.packets += total_packets; - rx_ring->rx_stats.bytes += total_bytes; - q_vector->rx.total_packets += total_packets; - q_vector->rx.total_bytes += total_bytes; - - if (cleaned_count) - igb_alloc_rx_buffers(rx_ring, cleaned_count); - -#ifndef IGB_NO_LRO - igb_lro_flush_all(q_vector); - -#endif /* IGB_NO_LRO */ - return total_packets < budget; -} -#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ - -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT -static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, - struct igb_rx_buffer *bi) -{ - struct sk_buff *skb = bi->skb; - dma_addr_t dma = bi->dma; - - if (dma) - return true; - - if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring), - rx_ring->rx_buffer_len); - bi->skb = skb; - if (!skb) { - rx_ring->rx_stats.alloc_failed++; - return false; - } - - /* initialize skb for ring */ - skb_record_rx_queue(skb, ring_queue_index(rx_ring)); - } - - dma = dma_map_single(rx_ring->dev, skb->data, - rx_ring->rx_buffer_len, DMA_FROM_DEVICE); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - dev_kfree_skb_any(skb); - bi->skb = NULL; - - rx_ring->rx_stats.alloc_failed++; - return false; - } - - bi->dma = dma; - return true; -} - -#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ -static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, - struct igb_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) - return true; - - /* alloc new page for storage */ - page = alloc_page(GFP_ATOMIC | __GFP_COLD); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); - - /* - * if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_page(page); - - rx_ring->rx_stats.alloc_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = 0; - - return true; -} - -#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ -/** - * igb_alloc_rx_buffers - Replace used receive buffers; packet split - * @adapter: address of board private structure - **/ -void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) -{ - union e1000_adv_rx_desc *rx_desc; - struct igb_rx_buffer *bi; - u16 i = rx_ring->next_to_use; - - /* nothing to do */ - if (!cleaned_count) - return; - - rx_desc = IGB_RX_DESC(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; - i -= rx_ring->count; - - do { -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - if (!igb_alloc_mapped_skb(rx_ring, bi)) -#else - if (!igb_alloc_mapped_page(rx_ring, bi)) -#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ - break; - - /* - * Refresh the desc even if buffer_addrs didn't change - * because each write-back erases this info. - */ -#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); -#else - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); -#endif - - rx_desc++; - bi++; - i++; - if (unlikely(!i)) { - rx_desc = IGB_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_buffer_info; - i -= rx_ring->count; - } - - /* clear the hdr_addr for the next_to_use descriptor */ - rx_desc->read.hdr_addr = 0; - - cleaned_count--; - } while (cleaned_count); - - i += rx_ring->count; - - if (rx_ring->next_to_use != i) { - /* record the next descriptor to use */ - rx_ring->next_to_use = i; - -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = i; - -#endif - /* - * Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - writel(i, rx_ring->tail); - } -} - -#ifdef SIOCGMIIPHY -/** - * igb_mii_ioctl - - * @netdev: - * @ifreq: - * @cmd: - **/ -static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct mii_ioctl_data *data = if_mii(ifr); - - if (adapter->hw.phy.media_type != e1000_media_type_copper) - return -EOPNOTSUPP; - - switch (cmd) { - case SIOCGMIIPHY: - data->phy_id = adapter->hw.phy.addr; - break; - case SIOCGMIIREG: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, - &data->val_out)) - return -EIO; - break; - case SIOCSMIIREG: - default: - return -EOPNOTSUPP; - } - return E1000_SUCCESS; -} - -#endif -/** - * igb_ioctl - - * @netdev: - * @ifreq: - * @cmd: - **/ -static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { -#ifdef SIOCGMIIPHY - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCSMIIREG: - return igb_mii_ioctl(netdev, ifr, cmd); -#endif -#ifdef HAVE_PTP_1588_CLOCK - case SIOCSHWTSTAMP: - return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd); -#endif /* HAVE_PTP_1588_CLOCK */ -#ifdef ETHTOOL_OPS_COMPAT - case SIOCETHTOOL: - return ethtool_ioctl(ifr); -#endif - default: - return -EOPNOTSUPP; - } -} - -s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) -{ - struct igb_adapter *adapter = hw->back; - u16 cap_offset; - - cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); - if (!cap_offset) - return -E1000_ERR_CONFIG; - - pci_read_config_word(adapter->pdev, cap_offset + reg, value); - - return E1000_SUCCESS; -} - -s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) -{ - struct igb_adapter *adapter = hw->back; - u16 cap_offset; - - cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); - if (!cap_offset) - return -E1000_ERR_CONFIG; - - pci_write_config_word(adapter->pdev, cap_offset + reg, *value); - - return E1000_SUCCESS; -} - -#ifdef HAVE_VLAN_RX_REGISTER -static void igb_vlan_mode(struct net_device *netdev, struct vlan_group *vlgrp) -#else -void igb_vlan_mode(struct net_device *netdev, u32 features) -#endif -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - u32 ctrl, rctl; - int i; -#ifdef HAVE_VLAN_RX_REGISTER - bool enable = !!vlgrp; - - igb_irq_disable(adapter); - - adapter->vlgrp = vlgrp; - - if (!test_bit(__IGB_DOWN, &adapter->state)) - igb_irq_enable(adapter); -#else -#ifdef NETIF_F_HW_VLAN_CTAG_RX - bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); -#else - bool enable = !!(features & NETIF_F_HW_VLAN_RX); -#endif -#endif - - if (enable) { - /* enable VLAN tag insert/strip */ - ctrl = E1000_READ_REG(hw, E1000_CTRL); - ctrl |= E1000_CTRL_VME; - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - - /* Disable CFI check */ - rctl = E1000_READ_REG(hw, E1000_RCTL); - rctl &= ~E1000_RCTL_CFIEN; - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - } else { - /* disable VLAN tag insert/strip */ - ctrl = E1000_READ_REG(hw, E1000_CTRL); - ctrl &= ~E1000_CTRL_VME; - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - } - -#ifndef CONFIG_IGB_VMDQ_NETDEV - for (i = 0; i < adapter->vmdq_pools; i++) { - igb_set_vf_vlan_strip(adapter, - adapter->vfs_allocated_count + i, - enable); - } - -#else - igb_set_vf_vlan_strip(adapter, - adapter->vfs_allocated_count, - enable); - - for (i = 1; i < adapter->vmdq_pools; i++) { -#ifdef HAVE_VLAN_RX_REGISTER - struct igb_vmdq_adapter *vadapter; - vadapter = netdev_priv(adapter->vmdq_netdev[i-1]); - enable = !!vadapter->vlgrp; -#else - struct net_device *vnetdev; - vnetdev = adapter->vmdq_netdev[i-1]; -#ifdef NETIF_F_HW_VLAN_CTAG_RX - enable = !!(vnetdev->features & NETIF_F_HW_VLAN_CTAG_RX); -#else - enable = !!(vnetdev->features & NETIF_F_HW_VLAN_RX); -#endif -#endif - igb_set_vf_vlan_strip(adapter, - adapter->vfs_allocated_count + i, - enable); - } - -#endif - igb_rlpml_set(adapter); -} - -#ifdef HAVE_VLAN_PROTOCOL -static int igb_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) -#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID -#ifdef NETIF_F_HW_VLAN_CTAG_RX -static int igb_vlan_rx_add_vid(struct net_device *netdev, - __always_unused __be16 proto, u16 vid) -#else -static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) -#endif -#else -static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) -#endif -{ - struct igb_adapter *adapter = netdev_priv(netdev); - int pf_id = adapter->vfs_allocated_count; - - /* attempt to add filter to vlvf array */ - igb_vlvf_set(adapter, vid, TRUE, pf_id); - - /* add the filter since PF can receive vlans w/o entry in vlvf */ - igb_vfta_set(adapter, vid, TRUE); -#ifndef HAVE_NETDEV_VLAN_FEATURES - - /* Copy feature flags from netdev to the vlan netdev for this vid. - * This allows things like TSO to bubble down to our vlan device. - * There is no need to update netdev for vlan 0 (DCB), since it - * wouldn't has v_netdev. - */ - if (adapter->vlgrp) { - struct vlan_group *vlgrp = adapter->vlgrp; - struct net_device *v_netdev = vlan_group_get_device(vlgrp, vid); - if (v_netdev) { - v_netdev->features |= netdev->features; - vlan_group_set_device(vlgrp, vid, v_netdev); - } - } -#endif -#ifndef HAVE_VLAN_RX_REGISTER - - set_bit(vid, adapter->active_vlans); -#endif -#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID - return 0; -#endif -} - -#ifdef HAVE_VLAN_PROTOCOL -static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) -#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID -#ifdef NETIF_F_HW_VLAN_CTAG_RX -static int igb_vlan_rx_kill_vid(struct net_device *netdev, - __always_unused __be16 proto, u16 vid) -#else -static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) -#endif -#else -static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) -#endif -{ - struct igb_adapter *adapter = netdev_priv(netdev); - int pf_id = adapter->vfs_allocated_count; - s32 err; - -#ifdef HAVE_VLAN_RX_REGISTER - igb_irq_disable(adapter); - - vlan_group_set_device(adapter->vlgrp, vid, NULL); - - if (!test_bit(__IGB_DOWN, &adapter->state)) - igb_irq_enable(adapter); - -#endif /* HAVE_VLAN_RX_REGISTER */ - /* remove vlan from VLVF table array */ - err = igb_vlvf_set(adapter, vid, FALSE, pf_id); - - /* if vid was not present in VLVF just remove it from table */ - if (err) - igb_vfta_set(adapter, vid, FALSE); -#ifndef HAVE_VLAN_RX_REGISTER - - clear_bit(vid, adapter->active_vlans); -#endif -#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID - return 0; -#endif -} - -static void igb_restore_vlan(struct igb_adapter *adapter) -{ -#ifdef HAVE_VLAN_RX_REGISTER - igb_vlan_mode(adapter->netdev, adapter->vlgrp); - - if (adapter->vlgrp) { - u16 vid; - for (vid = 0; vid < VLAN_N_VID; vid++) { - if (!vlan_group_get_device(adapter->vlgrp, vid)) - continue; -#ifdef NETIF_F_HW_VLAN_CTAG_RX - igb_vlan_rx_add_vid(adapter->netdev, - htons(ETH_P_8021Q), vid); -#else - igb_vlan_rx_add_vid(adapter->netdev, vid); -#endif - } - } -#else - u16 vid; - - igb_vlan_mode(adapter->netdev, adapter->netdev->features); - - for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) -#ifdef NETIF_F_HW_VLAN_CTAG_RX - igb_vlan_rx_add_vid(adapter->netdev, - htons(ETH_P_8021Q), vid); -#else - igb_vlan_rx_add_vid(adapter->netdev, vid); -#endif -#endif -} - -int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx) -{ - struct pci_dev *pdev = adapter->pdev; - struct e1000_mac_info *mac = &adapter->hw.mac; - - mac->autoneg = 0; - - /* SerDes device's does not support 10Mbps Full/duplex - * and 100Mbps Half duplex - */ - if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { - switch (spddplx) { - case SPEED_10 + DUPLEX_HALF: - case SPEED_10 + DUPLEX_FULL: - case SPEED_100 + DUPLEX_HALF: - dev_err(pci_dev_to_dev(pdev), - "Unsupported Speed/Duplex configuration\n"); - return -EINVAL; - default: - break; - } - } - - switch (spddplx) { - case SPEED_10 + DUPLEX_HALF: - mac->forced_speed_duplex = ADVERTISE_10_HALF; - break; - case SPEED_10 + DUPLEX_FULL: - mac->forced_speed_duplex = ADVERTISE_10_FULL; - break; - case SPEED_100 + DUPLEX_HALF: - mac->forced_speed_duplex = ADVERTISE_100_HALF; - break; - case SPEED_100 + DUPLEX_FULL: - mac->forced_speed_duplex = ADVERTISE_100_FULL; - break; - case SPEED_1000 + DUPLEX_FULL: - mac->autoneg = 1; - adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; - break; - case SPEED_1000 + DUPLEX_HALF: /* not supported */ - default: - dev_err(pci_dev_to_dev(pdev), "Unsupported Speed/Duplex configuration\n"); - return -EINVAL; - } - - /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ - adapter->hw.phy.mdix = AUTO_ALL_MODES; - - return 0; -} - -static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, - bool runtime) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - u32 ctrl, rctl, status; - u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; -#ifdef CONFIG_PM - int retval = 0; -#endif - - netif_device_detach(netdev); - - status = E1000_READ_REG(hw, E1000_STATUS); - if (status & E1000_STATUS_LU) - wufc &= ~E1000_WUFC_LNKC; - - if (netif_running(netdev)) - __igb_close(netdev, true); - - igb_clear_interrupt_scheme(adapter); - -#ifdef CONFIG_PM - retval = pci_save_state(pdev); - if (retval) - return retval; -#endif - - if (wufc) { - igb_setup_rctl(adapter); - igb_set_rx_mode(netdev); - - /* turn on all-multi mode if wake on multicast is enabled */ - if (wufc & E1000_WUFC_MC) { - rctl = E1000_READ_REG(hw, E1000_RCTL); - rctl |= E1000_RCTL_MPE; - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - } - - ctrl = E1000_READ_REG(hw, E1000_CTRL); - /* phy power management enable */ - #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 - ctrl |= E1000_CTRL_ADVD3WUC; - E1000_WRITE_REG(hw, E1000_CTRL, ctrl); - - /* Allow time for pending master requests to run */ - e1000_disable_pcie_master(hw); - - E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PME_EN); - E1000_WRITE_REG(hw, E1000_WUFC, wufc); - } else { - E1000_WRITE_REG(hw, E1000_WUC, 0); - E1000_WRITE_REG(hw, E1000_WUFC, 0); - } - - *enable_wake = wufc || adapter->en_mng_pt; - if (!*enable_wake) - igb_power_down_link(adapter); - else - igb_power_up_link(adapter); - - /* Release control of h/w to f/w. If f/w is AMT enabled, this - * would have already happened in close and is redundant. */ - igb_release_hw_control(adapter); - - pci_disable_device(pdev); - - return 0; -} - -#ifdef CONFIG_PM -#ifdef HAVE_SYSTEM_SLEEP_PM_OPS -static int igb_suspend(struct device *dev) -#else -static int igb_suspend(struct pci_dev *pdev, pm_message_t state) -#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ -{ -#ifdef HAVE_SYSTEM_SLEEP_PM_OPS - struct pci_dev *pdev = to_pci_dev(dev); -#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ - int retval; - bool wake; - - retval = __igb_shutdown(pdev, &wake, 0); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } - - return 0; -} - -#ifdef HAVE_SYSTEM_SLEEP_PM_OPS -static int igb_resume(struct device *dev) -#else -static int igb_resume(struct pci_dev *pdev) -#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ -{ -#ifdef HAVE_SYSTEM_SLEEP_PM_OPS - struct pci_dev *pdev = to_pci_dev(dev); -#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ - struct net_device *netdev = pci_get_drvdata(pdev); - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - u32 err; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - pci_save_state(pdev); - - err = pci_enable_device_mem(pdev); - if (err) { - dev_err(pci_dev_to_dev(pdev), - "igb: Cannot enable PCI device from suspend\n"); - return err; - } - pci_set_master(pdev); - - pci_enable_wake(pdev, PCI_D3hot, 0); - pci_enable_wake(pdev, PCI_D3cold, 0); - - if (igb_init_interrupt_scheme(adapter, true)) { - dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n"); - return -ENOMEM; - } - - igb_reset(adapter); - - /* let the f/w know that the h/w is now under the control of the - * driver. */ - igb_get_hw_control(adapter); - - E1000_WRITE_REG(hw, E1000_WUS, ~0); - - if (netdev->flags & IFF_UP) { - rtnl_lock(); - err = __igb_open(netdev, true); - rtnl_unlock(); - if (err) - return err; - } - - netif_device_attach(netdev); - - return 0; -} - -#ifdef CONFIG_PM_RUNTIME -#ifdef HAVE_SYSTEM_SLEEP_PM_OPS -static int igb_runtime_idle(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct net_device *netdev = pci_get_drvdata(pdev); - struct igb_adapter *adapter = netdev_priv(netdev); - - if (!igb_has_link(adapter)) - pm_schedule_suspend(dev, MSEC_PER_SEC * 5); - - return -EBUSY; -} - -static int igb_runtime_suspend(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int retval; - bool wake; - - retval = __igb_shutdown(pdev, &wake, 1); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } - - return 0; -} - -static int igb_runtime_resume(struct device *dev) -{ - return igb_resume(dev); -} -#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ -#endif /* CONFIG_PM_RUNTIME */ -#endif /* CONFIG_PM */ - -#ifdef USE_REBOOT_NOTIFIER -/* only want to do this for 2.4 kernels? */ -static int igb_notify_reboot(struct notifier_block *nb, unsigned long event, - void *p) -{ - struct pci_dev *pdev = NULL; - bool wake; - - switch (event) { - case SYS_DOWN: - case SYS_HALT: - case SYS_POWER_OFF: - while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) { - if (pci_dev_driver(pdev) == &igb_driver) { - __igb_shutdown(pdev, &wake, 0); - if (event == SYS_POWER_OFF) { - pci_wake_from_d3(pdev, wake); - pci_set_power_state(pdev, PCI_D3hot); - } - } - } - } - return NOTIFY_DONE; -} -#else -static void igb_shutdown(struct pci_dev *pdev) -{ - bool wake = false; - - __igb_shutdown(pdev, &wake, 0); - - if (system_state == SYSTEM_POWER_OFF) { - pci_wake_from_d3(pdev, wake); - pci_set_power_state(pdev, PCI_D3hot); - } -} -#endif /* USE_REBOOT_NOTIFIER */ - -#ifdef CONFIG_NET_POLL_CONTROLLER -/* - * Polling 'interrupt' - used by things like netconsole to send skbs - * without having to re-enable interrupts. It's not called while - * the interrupt routine is executing. - */ -static void igb_netpoll(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct igb_q_vector *q_vector; - int i; - - for (i = 0; i < adapter->num_q_vectors; i++) { - q_vector = adapter->q_vector[i]; - if (adapter->msix_entries) - E1000_WRITE_REG(hw, E1000_EIMC, q_vector->eims_value); - else - igb_irq_disable(adapter); - napi_schedule(&q_vector->napi); - } -} -#endif /* CONFIG_NET_POLL_CONTROLLER */ - -#ifdef HAVE_PCI_ERS -#define E1000_DEV_ID_82576_VF 0x10CA -/** - * igb_io_error_detected - called when PCI error is detected - * @pdev: Pointer to PCI device - * @state: The current pci connection state - * - * This function is called after a PCI bus error affecting - * this device has been detected. - */ -static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct igb_adapter *adapter = netdev_priv(netdev); - -#ifdef CONFIG_PCI_IOV__UNUSED - struct pci_dev *bdev, *vfdev; - u32 dw0, dw1, dw2, dw3; - int vf, pos; - u16 req_id, pf_func; - - if (!(adapter->flags & IGB_FLAG_DETECT_BAD_DMA)) - goto skip_bad_vf_detection; - - bdev = pdev->bus->self; - while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT)) - bdev = bdev->bus->self; - - if (!bdev) - goto skip_bad_vf_detection; - - pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR); - if (!pos) - goto skip_bad_vf_detection; - - pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0); - pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1); - pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2); - pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3); - - req_id = dw1 >> 16; - /* On the 82576 if bit 7 of the requestor ID is set then it's a VF */ - if (!(req_id & 0x0080)) - goto skip_bad_vf_detection; - - pf_func = req_id & 0x01; - if ((pf_func & 1) == (pdev->devfn & 1)) { - - vf = (req_id & 0x7F) >> 1; - dev_err(pci_dev_to_dev(pdev), - "VF %d has caused a PCIe error\n", vf); - dev_err(pci_dev_to_dev(pdev), - "TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: " - "%8.8x\tdw3: %8.8x\n", - dw0, dw1, dw2, dw3); - - /* Find the pci device of the offending VF */ - vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, - E1000_DEV_ID_82576_VF, NULL); - while (vfdev) { - if (vfdev->devfn == (req_id & 0xFF)) - break; - vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, - E1000_DEV_ID_82576_VF, vfdev); - } - /* - * There's a slim chance the VF could have been hot plugged, - * so if it is no longer present we don't need to issue the - * VFLR. Just clean up the AER in that case. - */ - if (vfdev) { - dev_err(pci_dev_to_dev(pdev), - "Issuing VFLR to VF %d\n", vf); - pci_write_config_dword(vfdev, 0xA8, 0x00008000); - } - - pci_cleanup_aer_uncorrect_error_status(pdev); - } - - /* - * Even though the error may have occurred on the other port - * we still need to increment the vf error reference count for - * both ports because the I/O resume function will be called - * for both of them. - */ - adapter->vferr_refcount++; - - return PCI_ERS_RESULT_RECOVERED; - -skip_bad_vf_detection: -#endif /* CONFIG_PCI_IOV */ - - netif_device_detach(netdev); - - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - if (netif_running(netdev)) - igb_down(adapter); - pci_disable_device(pdev); - - /* Request a slot slot reset. */ - return PCI_ERS_RESULT_NEED_RESET; -} - -/** - * igb_io_slot_reset - called after the pci bus has been reset. - * @pdev: Pointer to PCI device - * - * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the igb_resume routine. - */ -static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - pci_ers_result_t result; - - if (pci_enable_device_mem(pdev)) { - dev_err(pci_dev_to_dev(pdev), - "Cannot re-enable PCI device after reset.\n"); - result = PCI_ERS_RESULT_DISCONNECT; - } else { - pci_set_master(pdev); - pci_restore_state(pdev); - pci_save_state(pdev); - - pci_enable_wake(pdev, PCI_D3hot, 0); - pci_enable_wake(pdev, PCI_D3cold, 0); - - schedule_work(&adapter->reset_task); - E1000_WRITE_REG(hw, E1000_WUS, ~0); - result = PCI_ERS_RESULT_RECOVERED; - } - - pci_cleanup_aer_uncorrect_error_status(pdev); - - return result; -} - -/** - * igb_io_resume - called when traffic can start flowing again. - * @pdev: Pointer to PCI device - * - * This callback is called when the error recovery driver tells us that - * its OK to resume normal operation. Implementation resembles the - * second-half of the igb_resume routine. - */ -static void igb_io_resume(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct igb_adapter *adapter = netdev_priv(netdev); - - if (adapter->vferr_refcount) { - dev_info(pci_dev_to_dev(pdev), "Resuming after VF err\n"); - adapter->vferr_refcount--; - return; - } - - if (netif_running(netdev)) { - if (igb_up(adapter)) { - dev_err(pci_dev_to_dev(pdev), "igb_up failed after reset\n"); - return; - } - } - - netif_device_attach(netdev); - - /* let the f/w know that the h/w is now under the control of the - * driver. */ - igb_get_hw_control(adapter); -} - -#endif /* HAVE_PCI_ERS */ - -int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue) -{ - struct e1000_hw *hw = &adapter->hw; - int i; - - if (is_zero_ether_addr(addr)) - return 0; - - for (i = 0; i < hw->mac.rar_entry_count; i++) { - if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE) - continue; - adapter->mac_table[i].state = (IGB_MAC_STATE_MODIFIED | - IGB_MAC_STATE_IN_USE); - memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN); - adapter->mac_table[i].queue = queue; - igb_sync_mac_table(adapter); - return 0; - } - return -ENOMEM; -} -int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue) -{ - /* search table for addr, if found, set to 0 and sync */ - int i; - struct e1000_hw *hw = &adapter->hw; - - if (is_zero_ether_addr(addr)) - return 0; - for (i = 0; i < hw->mac.rar_entry_count; i++) { - if (ether_addr_equal(addr, adapter->mac_table[i].addr) && - adapter->mac_table[i].queue == queue) { - adapter->mac_table[i].state = IGB_MAC_STATE_MODIFIED; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); - adapter->mac_table[i].queue = 0; - igb_sync_mac_table(adapter); - return 0; - } - } - return -ENOMEM; -} -static int igb_set_vf_mac(struct igb_adapter *adapter, - int vf, unsigned char *mac_addr) -{ - igb_del_mac_filter(adapter, adapter->vf_data[vf].vf_mac_addresses, vf); - memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); - - igb_add_mac_filter(adapter, mac_addr, vf); - - return 0; -} - -#ifdef IFLA_VF_MAX -static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count)) - return -EINVAL; - adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; - dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); - dev_info(&adapter->pdev->dev, "Reload the VF driver to make this" - " change effective.\n"); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, "The VF MAC address has been set," - " but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, "Bring the PF device up before" - " attempting to use the VF device.\n"); - } - return igb_set_vf_mac(adapter, vf, mac); -} - -static int igb_link_mbps(int internal_link_speed) -{ - switch (internal_link_speed) { - case SPEED_100: - return 100; - case SPEED_1000: - return 1000; - case SPEED_2500: - return 2500; - default: - return 0; - } -} - -static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, - int link_speed) -{ - int rf_dec, rf_int; - u32 bcnrc_val; - - if (tx_rate != 0) { - /* Calculate the rate factor values to set */ - rf_int = link_speed / tx_rate; - rf_dec = (link_speed - (rf_int * tx_rate)); - rf_dec = (rf_dec * (1<vf_rate_link_speed == 0) || - (adapter->hw.mac.type != e1000_82576)) - return; - - actual_link_speed = igb_link_mbps(adapter->link_speed); - if (actual_link_speed != adapter->vf_rate_link_speed) { - reset_rate = true; - adapter->vf_rate_link_speed = 0; - dev_info(&adapter->pdev->dev, - "Link speed has been changed. VF Transmit rate is disabled\n"); - } - - for (i = 0; i < adapter->vfs_allocated_count; i++) { - if (reset_rate) - adapter->vf_data[i].tx_rate = 0; - - igb_set_vf_rate_limit(&adapter->hw, i, - adapter->vf_data[i].tx_rate, actual_link_speed); - } -} - -#ifdef HAVE_VF_MIN_MAX_TXRATE -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, - int tx_rate) -#else /* HAVE_VF_MIN_MAX_TXRATE */ -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) -#endif /* HAVE_VF_MIN_MAX_TXRATE */ -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - int actual_link_speed; - - if (hw->mac.type != e1000_82576) - return -EOPNOTSUPP; - -#ifdef HAVE_VF_MIN_MAX_TXRATE - if (min_tx_rate) - return -EINVAL; -#endif /* HAVE_VF_MIN_MAX_TXRATE */ - - actual_link_speed = igb_link_mbps(adapter->link_speed); - if ((vf >= adapter->vfs_allocated_count) || - (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) || - (tx_rate < 0) || (tx_rate > actual_link_speed)) - return -EINVAL; - - adapter->vf_rate_link_speed = actual_link_speed; - adapter->vf_data[vf].tx_rate = (u16)tx_rate; - igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); - - return 0; -} - -static int igb_ndo_get_vf_config(struct net_device *netdev, - int vf, struct ifla_vf_info *ivi) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - if (vf >= adapter->vfs_allocated_count) - return -EINVAL; - ivi->vf = vf; - memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); -#ifdef HAVE_VF_MIN_MAX_TXRATE - ivi->max_tx_rate = adapter->vf_data[vf].tx_rate; - ivi->min_tx_rate = 0; -#else /* HAVE_VF_MIN_MAX_TXRATE */ - ivi->tx_rate = adapter->vf_data[vf].tx_rate; -#endif /* HAVE_VF_MIN_MAX_TXRATE */ - ivi->vlan = adapter->vf_data[vf].pf_vlan; - ivi->qos = adapter->vf_data[vf].pf_qos; -#ifdef HAVE_VF_SPOOFCHK_CONFIGURE - ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; -#endif - return 0; -} -#endif -static void igb_vmm_control(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - int count; - u32 reg; - - switch (hw->mac.type) { - case e1000_82575: - default: - /* replication is not supported for 82575 */ - return; - case e1000_82576: - /* notify HW that the MAC is adding vlan tags */ - reg = E1000_READ_REG(hw, E1000_DTXCTL); - reg |= (E1000_DTXCTL_VLAN_ADDED | - E1000_DTXCTL_SPOOF_INT); - E1000_WRITE_REG(hw, E1000_DTXCTL, reg); - case e1000_82580: - /* enable replication vlan tag stripping */ - reg = E1000_READ_REG(hw, E1000_RPLOLR); - reg |= E1000_RPLOLR_STRVLAN; - E1000_WRITE_REG(hw, E1000_RPLOLR, reg); - case e1000_i350: - case e1000_i354: - /* none of the above registers are supported by i350 */ - break; - } - - /* Enable Malicious Driver Detection */ - if ((adapter->vfs_allocated_count) && - (adapter->mdd)) { - if (hw->mac.type == e1000_i350) - igb_enable_mdd(adapter); - } - - /* enable replication and loopback support */ - count = adapter->vfs_allocated_count || adapter->vmdq_pools; - if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE && count) - e1000_vmdq_set_loopback_pf(hw, 1); - e1000_vmdq_set_anti_spoofing_pf(hw, - adapter->vfs_allocated_count || adapter->vmdq_pools, - adapter->vfs_allocated_count); - e1000_vmdq_set_replication_pf(hw, adapter->vfs_allocated_count || - adapter->vmdq_pools); -} - -static void igb_init_fw(struct igb_adapter *adapter) -{ - struct e1000_fw_drv_info fw_cmd; - struct e1000_hw *hw = &adapter->hw; - int i; - u16 mask; - - if (hw->mac.type == e1000_i210) - mask = E1000_SWFW_EEP_SM; - else - mask = E1000_SWFW_PHY0_SM; - /* i211 parts do not support this feature */ - if (hw->mac.type == e1000_i211) - hw->mac.arc_subsystem_valid = false; - - if (!hw->mac.ops.acquire_swfw_sync(hw, mask)) { - for (i = 0; i <= FW_MAX_RETRIES; i++) { - E1000_WRITE_REG(hw, E1000_FWSTS, E1000_FWSTS_FWRI); - fw_cmd.hdr.cmd = FW_CMD_DRV_INFO; - fw_cmd.hdr.buf_len = FW_CMD_DRV_INFO_LEN; - fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CMD_RESERVED; - fw_cmd.port_num = hw->bus.func; - fw_cmd.drv_version = FW_FAMILY_DRV_VER; - fw_cmd.hdr.checksum = 0; - fw_cmd.hdr.checksum = e1000_calculate_checksum((u8 *)&fw_cmd, - (FW_HDR_LEN + - fw_cmd.hdr.buf_len)); - e1000_host_interface_command(hw, (u8*)&fw_cmd, - sizeof(fw_cmd)); - if (fw_cmd.hdr.cmd_or_resp.ret_status == FW_STATUS_SUCCESS) - break; - } - } else - dev_warn(pci_dev_to_dev(adapter->pdev), - "Unable to get semaphore, firmware init failed.\n"); - hw->mac.ops.release_swfw_sync(hw, mask); -} - -static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) -{ - struct e1000_hw *hw = &adapter->hw; - u32 dmac_thr; - u16 hwm; - u32 status; - - if (hw->mac.type == e1000_i211) - return; - - if (hw->mac.type > e1000_82580) { - if (adapter->dmac != IGB_DMAC_DISABLE) { - u32 reg; - - /* force threshold to 0. */ - E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); - - /* - * DMA Coalescing high water mark needs to be greater - * than the Rx threshold. Set hwm to PBA - max frame - * size in 16B units, capping it at PBA - 6KB. - */ - hwm = 64 * pba - adapter->max_frame_size / 16; - if (hwm < 64 * (pba - 6)) - hwm = 64 * (pba - 6); - reg = E1000_READ_REG(hw, E1000_FCRTC); - reg &= ~E1000_FCRTC_RTH_COAL_MASK; - reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) - & E1000_FCRTC_RTH_COAL_MASK); - E1000_WRITE_REG(hw, E1000_FCRTC, reg); - - /* - * Set the DMA Coalescing Rx threshold to PBA - 2 * max - * frame size, capping it at PBA - 10KB. - */ - dmac_thr = pba - adapter->max_frame_size / 512; - if (dmac_thr < pba - 10) - dmac_thr = pba - 10; - reg = E1000_READ_REG(hw, E1000_DMACR); - reg &= ~E1000_DMACR_DMACTHR_MASK; - reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) - & E1000_DMACR_DMACTHR_MASK); - - /* transition to L0x or L1 if available..*/ - reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); - - /* Check if status is 2.5Gb backplane connection - * before configuration of watchdog timer, which is - * in msec values in 12.8usec intervals - * watchdog timer= msec values in 32usec intervals - * for non 2.5Gb connection - */ - if (hw->mac.type == e1000_i354) { - status = E1000_READ_REG(hw, E1000_STATUS); - if ((status & E1000_STATUS_2P5_SKU) && - (!(status & E1000_STATUS_2P5_SKU_OVER))) - reg |= ((adapter->dmac * 5) >> 6); - else - reg |= ((adapter->dmac) >> 5); - } else { - reg |= ((adapter->dmac) >> 5); - } - - /* - * Disable BMC-to-OS Watchdog enable - * on devices that support OS-to-BMC - */ - if (hw->mac.type != e1000_i354) - reg &= ~E1000_DMACR_DC_BMC2OSW_EN; - E1000_WRITE_REG(hw, E1000_DMACR, reg); - - /* no lower threshold to disable coalescing(smart fifb)-UTRESH=0*/ - E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); - - /* This sets the time to wait before requesting - * transition to low power state to number of usecs - * needed to receive 1 512 byte frame at gigabit - * line rate. On i350 device, time to make transition - * to Lx state is delayed by 4 usec with flush disable - * bit set to avoid losing mailbox interrupts - */ - reg = E1000_READ_REG(hw, E1000_DMCTLX); - if (hw->mac.type == e1000_i350) - reg |= IGB_DMCTLX_DCFLUSH_DIS; - - /* in 2.5Gb connection, TTLX unit is 0.4 usec - * which is 0x4*2 = 0xA. But delay is still 4 usec - */ - if (hw->mac.type == e1000_i354) { - status = E1000_READ_REG(hw, E1000_STATUS); - if ((status & E1000_STATUS_2P5_SKU) && - (!(status & E1000_STATUS_2P5_SKU_OVER))) - reg |= 0xA; - else - reg |= 0x4; - } else { - reg |= 0x4; - } - E1000_WRITE_REG(hw, E1000_DMCTLX, reg); - - /* free space in tx packet buffer to wake from DMA coal */ - E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - - (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); - - /* make low power state decision controlled by DMA coal */ - reg = E1000_READ_REG(hw, E1000_PCIEMISC); - reg &= ~E1000_PCIEMISC_LX_DECISION; - E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); - } /* endif adapter->dmac is not disabled */ - } else if (hw->mac.type == e1000_82580) { - u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); - E1000_WRITE_REG(hw, E1000_PCIEMISC, - reg & ~E1000_PCIEMISC_LX_DECISION); - E1000_WRITE_REG(hw, E1000_DMACR, 0); - } -} - -#ifdef HAVE_I2C_SUPPORT -/* igb_read_i2c_byte - Reads 8 bit word over I2C - * @hw: pointer to hardware structure - * @byte_offset: byte offset to read - * @dev_addr: device address - * @data: value read - * - * Performs byte read operation over I2C interface at - * a specified device address. - */ -s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 *data) -{ - struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); - struct i2c_client *this_client = adapter->i2c_client; - s32 status; - u16 swfw_mask = 0; - - if (!this_client) - return E1000_ERR_I2C; - - swfw_mask = E1000_SWFW_PHY0_SM; - - if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) - != E1000_SUCCESS) - return E1000_ERR_SWFW_SYNC; - - status = i2c_smbus_read_byte_data(this_client, byte_offset); - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - - if (status < 0) - return E1000_ERR_I2C; - else { - *data = status; - return E1000_SUCCESS; - } -} - -/* igb_write_i2c_byte - Writes 8 bit word over I2C - * @hw: pointer to hardware structure - * @byte_offset: byte offset to write - * @dev_addr: device address - * @data: value to write - * - * Performs byte write operation over I2C interface at - * a specified device address. - */ -s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 data) -{ - struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); - struct i2c_client *this_client = adapter->i2c_client; - s32 status; - u16 swfw_mask = E1000_SWFW_PHY0_SM; - - if (!this_client) - return E1000_ERR_I2C; - - if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) - return E1000_ERR_SWFW_SYNC; - status = i2c_smbus_write_byte_data(this_client, byte_offset, data); - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - - if (status) - return E1000_ERR_I2C; - else - return E1000_SUCCESS; -} -#endif /* HAVE_I2C_SUPPORT */ -/* igb_main.c */ - - -/** - * igb_probe - Device Initialization Routine - * @pdev: PCI device information struct - * @ent: entry in igb_pci_tbl - * - * Returns 0 on success, negative on failure - * - * igb_probe initializes an adapter identified by a pci_dev structure. - * The OS initialization, configuring of the adapter private structure, - * and a hardware reset occur. - **/ -int igb_kni_probe(struct pci_dev *pdev, - struct net_device **lad_dev) -{ - struct net_device *netdev; - struct igb_adapter *adapter; - struct e1000_hw *hw; - u16 eeprom_data = 0; - u8 pba_str[E1000_PBANUM_LENGTH]; - s32 ret_val; - static int global_quad_port_a; /* global quad port a indication */ - int i, err, pci_using_dac = 0; - static int cards_found; - - err = pci_enable_device_mem(pdev); - if (err) - return err; - -#ifdef NO_KNI - pci_using_dac = 0; - err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); - if (!err) { - err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); - if (!err) - pci_using_dac = 1; - } else { - err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); - if (err) { - err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); - if (err) { - IGB_ERR("No usable DMA configuration, " - "aborting\n"); - goto err_dma; - } - } - } - -#ifndef HAVE_ASPM_QUIRKS - /* 82575 requires that the pci-e link partner disable the L0s state */ - switch (pdev->device) { - case E1000_DEV_ID_82575EB_COPPER: - case E1000_DEV_ID_82575EB_FIBER_SERDES: - case E1000_DEV_ID_82575GB_QUAD_COPPER: - pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); - default: - break; - } - -#endif /* HAVE_ASPM_QUIRKS */ - err = pci_request_selected_regions(pdev, - pci_select_bars(pdev, - IORESOURCE_MEM), - igb_driver_name); - if (err) - goto err_pci_reg; - - pci_enable_pcie_error_reporting(pdev); - - pci_set_master(pdev); - - err = -ENOMEM; -#endif /* NO_KNI */ -#ifdef HAVE_TX_MQ - netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), - IGB_MAX_TX_QUEUES); -#else - netdev = alloc_etherdev(sizeof(struct igb_adapter)); -#endif /* HAVE_TX_MQ */ - if (!netdev) - goto err_alloc_etherdev; - - SET_MODULE_OWNER(netdev); - SET_NETDEV_DEV(netdev, &pdev->dev); - - //pci_set_drvdata(pdev, netdev); - adapter = netdev_priv(netdev); - adapter->netdev = netdev; - adapter->pdev = pdev; - hw = &adapter->hw; - hw->back = adapter; - adapter->port_num = hw->bus.func; - adapter->msg_enable = (1 << debug) - 1; - -#ifdef HAVE_PCI_ERS - err = pci_save_state(pdev); - if (err) - goto err_ioremap; -#endif - err = -EIO; - hw->hw_addr = ioremap(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); - if (!hw->hw_addr) - goto err_ioremap; - -#ifdef HAVE_NET_DEVICE_OPS - netdev->netdev_ops = &igb_netdev_ops; -#else /* HAVE_NET_DEVICE_OPS */ - netdev->open = &igb_open; - netdev->stop = &igb_close; - netdev->get_stats = &igb_get_stats; -#ifdef HAVE_SET_RX_MODE - netdev->set_rx_mode = &igb_set_rx_mode; -#endif - netdev->set_multicast_list = &igb_set_rx_mode; - netdev->set_mac_address = &igb_set_mac; - netdev->change_mtu = &igb_change_mtu; - netdev->do_ioctl = &igb_ioctl; -#ifdef HAVE_TX_TIMEOUT - netdev->tx_timeout = &igb_tx_timeout; -#endif - netdev->vlan_rx_register = igb_vlan_mode; - netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid; - netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid; -#ifdef CONFIG_NET_POLL_CONTROLLER - netdev->poll_controller = igb_netpoll; -#endif - netdev->hard_start_xmit = &igb_xmit_frame; -#endif /* HAVE_NET_DEVICE_OPS */ - igb_set_ethtool_ops(netdev); -#ifdef HAVE_TX_TIMEOUT - netdev->watchdog_timeo = 5 * HZ; -#endif - - strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); - - adapter->bd_number = cards_found; - - /* setup the private structure */ - err = igb_sw_init(adapter); - if (err) - goto err_sw_init; - - e1000_get_bus_info(hw); - - hw->phy.autoneg_wait_to_complete = FALSE; - hw->mac.adaptive_ifs = FALSE; - - /* Copper options */ - if (hw->phy.media_type == e1000_media_type_copper) { - hw->phy.mdix = AUTO_ALL_MODES; - hw->phy.disable_polarity_correction = FALSE; - hw->phy.ms_type = e1000_ms_hw_default; - } - - if (e1000_check_reset_block(hw)) - dev_info(pci_dev_to_dev(pdev), - "PHY reset is blocked due to SOL/IDER session.\n"); - - /* - * features is initialized to 0 in allocation, it might have bits - * set by igb_sw_init so we should use an or instead of an - * assignment. - */ - netdev->features |= NETIF_F_SG | - NETIF_F_IP_CSUM | -#ifdef NETIF_F_IPV6_CSUM - NETIF_F_IPV6_CSUM | -#endif -#ifdef NETIF_F_TSO - NETIF_F_TSO | -#ifdef NETIF_F_TSO6 - NETIF_F_TSO6 | -#endif -#endif /* NETIF_F_TSO */ -#ifdef NETIF_F_RXHASH - NETIF_F_RXHASH | -#endif - NETIF_F_RXCSUM | -#ifdef NETIF_F_HW_VLAN_CTAG_RX - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_TX; -#else - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_TX; -#endif - - if (hw->mac.type >= e1000_82576) - netdev->features |= NETIF_F_SCTP_CSUM; - -#ifdef HAVE_NDO_SET_FEATURES - /* copy netdev features into list of user selectable features */ - netdev->hw_features |= netdev->features; -#ifndef IGB_NO_LRO - - /* give us the option of enabling LRO later */ - netdev->hw_features |= NETIF_F_LRO; -#endif -#else -#ifdef NETIF_F_GRO - - /* this is only needed on kernels prior to 2.6.39 */ - netdev->features |= NETIF_F_GRO; -#endif -#endif - - /* set this bit last since it cannot be part of hw_features */ -#ifdef NETIF_F_HW_VLAN_CTAG_FILTER - netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#else - netdev->features |= NETIF_F_HW_VLAN_FILTER; -#endif - -#ifdef HAVE_NETDEV_VLAN_FEATURES - netdev->vlan_features |= NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_SG; - -#endif - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; - -#ifdef NO_KNI - adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); -#ifdef DEBUG - if (adapter->dmac != IGB_DMAC_DISABLE) - printk("%s: DMA Coalescing is enabled..\n", netdev->name); -#endif - - /* before reading the NVM, reset the controller to put the device in a - * known good starting state */ - e1000_reset_hw(hw); -#endif /* NO_KNI */ - - /* make sure the NVM is good */ - if (e1000_validate_nvm_checksum(hw) < 0) { - dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not" - " Valid\n"); - err = -EIO; - goto err_eeprom; - } - - /* copy the MAC address out of the NVM */ - if (e1000_read_mac_addr(hw)) - dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n"); - memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); -#ifdef ETHTOOL_GPERMADDR - memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); - - if (!is_valid_ether_addr(netdev->perm_addr)) { -#else - if (!is_valid_ether_addr(netdev->dev_addr)) { -#endif - dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n"); - err = -EIO; - goto err_eeprom; - } - - memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len); - adapter->mac_table[0].queue = adapter->vfs_allocated_count; - adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE); - igb_rar_set(adapter, 0); - - /* get firmware version for ethtool -i */ - igb_set_fw_version(adapter); - - /* Check if Media Autosense is enabled */ - if (hw->mac.type == e1000_82580) - igb_init_mas(adapter); - -#ifdef NO_KNI -#ifdef HAVE_TIMER_SETUP - timer_setup(&adapter->watchdog_timer, &igb_watchdog, 0); - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - timer_setup(&adapter->dma_err_timer, &igb_dma_err_timer, 0); - timer_setup(&adapter->phy_info_timer, &igb_update_phy_info, 0); -#else - setup_timer(&adapter->watchdog_timer, &igb_watchdog, - (unsigned long) adapter); - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer, - (unsigned long) adapter); - setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, - (unsigned long) adapter); -#endif - - INIT_WORK(&adapter->reset_task, igb_reset_task); - INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); - if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) - INIT_WORK(&adapter->dma_err_task, igb_dma_err_task); -#endif - - /* Initialize link properties that are user-changeable */ - adapter->fc_autoneg = true; - hw->mac.autoneg = true; - hw->phy.autoneg_advertised = 0x2f; - - hw->fc.requested_mode = e1000_fc_default; - hw->fc.current_mode = e1000_fc_default; - - e1000_validate_mdi_setting(hw); - - /* By default, support wake on port A */ - if (hw->bus.func == 0) - adapter->flags |= IGB_FLAG_WOL_SUPPORTED; - - /* Check the NVM for wake support for non-port A ports */ - if (hw->mac.type >= e1000_82580) - hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + - NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, - &eeprom_data); - else if (hw->bus.func == 1) - e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); - - if (eeprom_data & IGB_EEPROM_APME) - adapter->flags |= IGB_FLAG_WOL_SUPPORTED; - - /* now that we have the eeprom settings, apply the special cases where - * the eeprom may be wrong or the board simply won't support wake on - * lan on a particular port */ - switch (pdev->device) { - case E1000_DEV_ID_82575GB_QUAD_COPPER: - adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; - break; - case E1000_DEV_ID_82575EB_FIBER_SERDES: - case E1000_DEV_ID_82576_FIBER: - case E1000_DEV_ID_82576_SERDES: - /* Wake events only supported on port A for dual fiber - * regardless of eeprom setting */ - if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1) - adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; - break; - case E1000_DEV_ID_82576_QUAD_COPPER: - case E1000_DEV_ID_82576_QUAD_COPPER_ET2: - /* if quad port adapter, disable WoL on all but port A */ - if (global_quad_port_a != 0) - adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; - else - adapter->flags |= IGB_FLAG_QUAD_PORT_A; - /* Reset for multiple quad port adapters */ - if (++global_quad_port_a == 4) - global_quad_port_a = 0; - break; - default: - /* If the device can't wake, don't set software support */ - if (!device_can_wakeup(&adapter->pdev->dev)) - adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; - break; - } - - /* initialize the wol settings based on the eeprom settings */ - if (adapter->flags & IGB_FLAG_WOL_SUPPORTED) - adapter->wol |= E1000_WUFC_MAG; - - /* Some vendors want WoL disabled by default, but still supported */ - if ((hw->mac.type == e1000_i350) && - (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) { - adapter->flags |= IGB_FLAG_WOL_SUPPORTED; - adapter->wol = 0; - } - -#ifdef NO_KNI - device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), - adapter->flags & IGB_FLAG_WOL_SUPPORTED); - - /* reset the hardware with the new settings */ - igb_reset(adapter); - adapter->devrc = 0; - -#ifdef HAVE_I2C_SUPPORT - /* Init the I2C interface */ - err = igb_init_i2c(adapter); - if (err) { - dev_err(&pdev->dev, "failed to init i2c interface\n"); - goto err_eeprom; - } -#endif /* HAVE_I2C_SUPPORT */ - - /* let the f/w know that the h/w is now under the control of the - * driver. */ - igb_get_hw_control(adapter); - - strncpy(netdev->name, "eth%d", IFNAMSIZ); - err = register_netdev(netdev); - if (err) - goto err_register; - -#ifdef CONFIG_IGB_VMDQ_NETDEV - err = igb_init_vmdq_netdevs(adapter); - if (err) - goto err_register; -#endif - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -#ifdef IGB_DCA - if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) { - adapter->flags |= IGB_FLAG_DCA_ENABLED; - dev_info(pci_dev_to_dev(pdev), "DCA enabled\n"); - igb_setup_dca(adapter); - } - -#endif -#ifdef HAVE_PTP_1588_CLOCK - /* do hw tstamp init after resetting */ - igb_ptp_init(adapter); -#endif /* HAVE_PTP_1588_CLOCK */ - -#endif /* NO_KNI */ - dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n"); - /* print bus type/speed/width info */ - dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ", - netdev->name, - ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" : - (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" : - (hw->mac.type == e1000_i354) ? "integrated" : - "unknown"), - ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : - (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : - (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : - (hw->mac.type == e1000_i354) ? "integrated" : - "unknown")); - dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name); - for (i = 0; i < 6; i++) - printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); - - ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH); - if (ret_val) - strncpy(pba_str, "Unknown", sizeof(pba_str) - 1); - dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name, - pba_str); - - - /* Initialize the thermal sensor on i350 devices. */ - if (hw->mac.type == e1000_i350) { - if (hw->bus.func == 0) { - u16 ets_word; - - /* - * Read the NVM to determine if this i350 device - * supports an external thermal sensor. - */ - e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word); - if (ets_word != 0x0000 && ets_word != 0xFFFF) - adapter->ets = true; - else - adapter->ets = false; - } -#ifdef NO_KNI -#ifdef IGB_HWMON - - igb_sysfs_init(adapter); -#else -#ifdef IGB_PROCFS - - igb_procfs_init(adapter); -#endif /* IGB_PROCFS */ -#endif /* IGB_HWMON */ -#endif /* NO_KNI */ - } else { - adapter->ets = false; - } - - if (hw->phy.media_type == e1000_media_type_copper) { - switch (hw->mac.type) { - case e1000_i350: - case e1000_i210: - case e1000_i211: - /* Enable EEE for internal copper PHY devices */ - err = e1000_set_eee_i350(hw); - if ((!err) && - (adapter->flags & IGB_FLAG_EEE)) - adapter->eee_advert = - MDIO_EEE_100TX | MDIO_EEE_1000T; - break; - case e1000_i354: - if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) & - (E1000_CTRL_EXT_LINK_MODE_SGMII)) { - err = e1000_set_eee_i354(hw); - if ((!err) && - (adapter->flags & IGB_FLAG_EEE)) - adapter->eee_advert = - MDIO_EEE_100TX | MDIO_EEE_1000T; - } - break; - default: - break; - } - } - - /* send driver version info to firmware */ - if (hw->mac.type >= e1000_i350) - igb_init_fw(adapter); - -#ifndef IGB_NO_LRO - if (netdev->features & NETIF_F_LRO) - dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n"); - else - dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n"); -#endif - dev_info(pci_dev_to_dev(pdev), - "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", - adapter->msix_entries ? "MSI-X" : - (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", - adapter->num_rx_queues, adapter->num_tx_queues); - - cards_found++; - *lad_dev = netdev; - - pm_runtime_put_noidle(&pdev->dev); - return 0; - -//err_register: -// igb_release_hw_control(adapter); -#ifdef HAVE_I2C_SUPPORT - memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); -#endif /* HAVE_I2C_SUPPORT */ -err_eeprom: -// if (!e1000_check_reset_block(hw)) -// e1000_phy_hw_reset(hw); - - if (hw->flash_address) - iounmap(hw->flash_address); -err_sw_init: -// igb_clear_interrupt_scheme(adapter); -// igb_reset_sriov_capability(adapter); - iounmap(hw->hw_addr); -err_ioremap: - free_netdev(netdev); -err_alloc_etherdev: -// pci_release_selected_regions(pdev, -// pci_select_bars(pdev, IORESOURCE_MEM)); -//err_pci_reg: -//err_dma: - pci_disable_device(pdev); - return err; -} - - -void igb_kni_remove(struct pci_dev *pdev) -{ - pci_disable_device(pdev); -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c deleted file mode 100644 index 98209a10..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c +++ /dev/null @@ -1,832 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - - -#include - -#include "igb.h" - -/* This is the only thing that needs to be changed to adjust the - * maximum number of ports that the driver can manage. - */ - -#define IGB_MAX_NIC 32 - -#define OPTION_UNSET -1 -#define OPTION_DISABLED 0 -#define OPTION_ENABLED 1 -#define MAX_NUM_LIST_OPTS 15 - -/* All parameters are treated the same, as an integer array of values. - * This macro just reduces the need to repeat the same declaration code - * over and over (plus this helps to avoid typo bugs). - */ - -#define IGB_PARAM_INIT { [0 ... IGB_MAX_NIC] = OPTION_UNSET } -#ifndef module_param_array -/* Module Parameters are always initialized to -1, so that the driver - * can tell the difference between no user specified value or the - * user asking for the default value. - * The true default values are loaded in when igb_check_options is called. - * - * This is a GCC extension to ANSI C. - * See the item "Labeled Elements in Initializers" in the section - * "Extensions to the C Language Family" of the GCC documentation. - */ - -#define IGB_PARAM(X, desc) \ - static const int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \ - MODULE_PARM(X, "1-" __MODULE_STRING(IGB_MAX_NIC) "i"); \ - MODULE_PARM_DESC(X, desc); -#else -#define IGB_PARAM(X, desc) \ - static int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \ - static unsigned int num_##X; \ - module_param_array_named(X, X, int, &num_##X, 0); \ - MODULE_PARM_DESC(X, desc); -#endif - -/* Interrupt Throttle Rate (interrupts/sec) - * - * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative) - */ -IGB_PARAM(InterruptThrottleRate, - "Maximum interrupts per second, per vector, (max 100000), default 3=adaptive"); -#define DEFAULT_ITR 3 -#define MAX_ITR 100000 -/* #define MIN_ITR 120 */ -#define MIN_ITR 0 -/* IntMode (Interrupt Mode) - * - * Valid Range: 0 - 2 - * - * Default Value: 2 (MSI-X) - */ -IGB_PARAM(IntMode, "Change Interrupt Mode (0=Legacy, 1=MSI, 2=MSI-X), default 2"); -#define MAX_INTMODE IGB_INT_MODE_MSIX -#define MIN_INTMODE IGB_INT_MODE_LEGACY - -IGB_PARAM(Node, "set the starting node to allocate memory on, default -1"); - -/* LLIPort (Low Latency Interrupt TCP Port) - * - * Valid Range: 0 - 65535 - * - * Default Value: 0 (disabled) - */ -IGB_PARAM(LLIPort, "Low Latency Interrupt TCP Port (0-65535), default 0=off"); - -#define DEFAULT_LLIPORT 0 -#define MAX_LLIPORT 0xFFFF -#define MIN_LLIPORT 0 - -/* LLIPush (Low Latency Interrupt on TCP Push flag) - * - * Valid Range: 0, 1 - * - * Default Value: 0 (disabled) - */ -IGB_PARAM(LLIPush, "Low Latency Interrupt on TCP Push flag (0,1), default 0=off"); - -#define DEFAULT_LLIPUSH 0 -#define MAX_LLIPUSH 1 -#define MIN_LLIPUSH 0 - -/* LLISize (Low Latency Interrupt on Packet Size) - * - * Valid Range: 0 - 1500 - * - * Default Value: 0 (disabled) - */ -IGB_PARAM(LLISize, "Low Latency Interrupt on Packet Size (0-1500), default 0=off"); - -#define DEFAULT_LLISIZE 0 -#define MAX_LLISIZE 1500 -#define MIN_LLISIZE 0 - -/* RSS (Enable RSS multiqueue receive) - * - * Valid Range: 0 - 8 - * - * Default Value: 1 - */ -IGB_PARAM(RSS, "Number of Receive-Side Scaling Descriptor Queues (0-8), default 1, 0=number of cpus"); - -#define DEFAULT_RSS 1 -#define MAX_RSS 8 -#define MIN_RSS 0 - -/* VMDQ (Enable VMDq multiqueue receive) - * - * Valid Range: 0 - 8 - * - * Default Value: 0 - */ -IGB_PARAM(VMDQ, "Number of Virtual Machine Device Queues: 0-1 = disable, 2-8 enable, default 0"); - -#define DEFAULT_VMDQ 0 -#define MAX_VMDQ MAX_RSS -#define MIN_VMDQ 0 - -/* max_vfs (Enable SR-IOV VF devices) - * - * Valid Range: 0 - 7 - * - * Default Value: 0 - */ -IGB_PARAM(max_vfs, "Number of Virtual Functions: 0 = disable, 1-7 enable, default 0"); - -#define DEFAULT_SRIOV 0 -#define MAX_SRIOV 7 -#define MIN_SRIOV 0 - -/* MDD (Enable Malicious Driver Detection) - * - * Only available when SR-IOV is enabled - max_vfs is greater than 0 - * - * Valid Range: 0, 1 - * - * Default Value: 1 - */ -IGB_PARAM(MDD, "Malicious Driver Detection (0/1), default 1 = enabled. " - "Only available when max_vfs is greater than 0"); - -#ifdef DEBUG - -/* Disable Hardware Reset on Tx Hang - * - * Valid Range: 0, 1 - * - * Default Value: 0 (disabled, i.e. h/w will reset) - */ -IGB_PARAM(DisableHwReset, "Disable reset of hardware on Tx hang"); - -/* Dump Transmit and Receive buffers - * - * Valid Range: 0, 1 - * - * Default Value: 0 - */ -IGB_PARAM(DumpBuffers, "Dump Tx/Rx buffers on Tx hang or by request"); - -#endif /* DEBUG */ - -/* QueuePairs (Enable TX/RX queue pairs for interrupt handling) - * - * Valid Range: 0 - 1 - * - * Default Value: 1 - */ -IGB_PARAM(QueuePairs, "Enable Tx/Rx queue pairs for interrupt handling (0,1), default 1=on"); - -#define DEFAULT_QUEUE_PAIRS 1 -#define MAX_QUEUE_PAIRS 1 -#define MIN_QUEUE_PAIRS 0 - -/* Enable/disable EEE (a.k.a. IEEE802.3az) - * - * Valid Range: 0, 1 - * - * Default Value: 1 - */ - IGB_PARAM(EEE, "Enable/disable on parts that support the feature"); - -/* Enable/disable DMA Coalescing - * - * Valid Values: 0(off), 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, - * 9000, 10000(msec), 250(usec), 500(usec) - * - * Default Value: 0 - */ - IGB_PARAM(DMAC, "Disable or set latency for DMA Coalescing ((0=off, 1000-10000(msec), 250, 500 (usec))"); - -#ifndef IGB_NO_LRO -/* Enable/disable Large Receive Offload - * - * Valid Values: 0(off), 1(on) - * - * Default Value: 0 - */ - IGB_PARAM(LRO, "Large Receive Offload (0,1), default 0=off"); - -#endif -struct igb_opt_list { - int i; - char *str; -}; -struct igb_option { - enum { enable_option, range_option, list_option } type; - const char *name; - const char *err; - int def; - union { - struct { /* range_option info */ - int min; - int max; - } r; - struct { /* list_option info */ - int nr; - struct igb_opt_list *p; - } l; - } arg; -}; - -static int igb_validate_option(unsigned int *value, - struct igb_option *opt, - struct igb_adapter *adapter) -{ - if (*value == OPTION_UNSET) { - *value = opt->def; - return 0; - } - - switch (opt->type) { - case enable_option: - switch (*value) { - case OPTION_ENABLED: - DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name); - return 0; - case OPTION_DISABLED: - DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name); - return 0; - } - break; - case range_option: - if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) { - DPRINTK(PROBE, INFO, - "%s set to %d\n", opt->name, *value); - return 0; - } - break; - case list_option: { - int i; - struct igb_opt_list *ent; - - for (i = 0; i < opt->arg.l.nr; i++) { - ent = &opt->arg.l.p[i]; - if (*value == ent->i) { - if (ent->str[0] != '\0') - DPRINTK(PROBE, INFO, "%s\n", ent->str); - return 0; - } - } - } - break; - default: - BUG(); - } - - DPRINTK(PROBE, INFO, "Invalid %s value specified (%d) %s\n", - opt->name, *value, opt->err); - *value = opt->def; - return -1; -} - -/** - * igb_check_options - Range Checking for Command Line Parameters - * @adapter: board private structure - * - * This routine checks all command line parameters for valid user - * input. If an invalid value is given, or if no user specified - * value exists, a default value is used. The final value is stored - * in a variable in the adapter structure. - **/ - -void igb_check_options(struct igb_adapter *adapter) -{ - int bd = adapter->bd_number; - struct e1000_hw *hw = &adapter->hw; - - if (bd >= IGB_MAX_NIC) { - DPRINTK(PROBE, NOTICE, - "Warning: no configuration for board #%d\n", bd); - DPRINTK(PROBE, NOTICE, "Using defaults for all values\n"); -#ifndef module_param_array - bd = IGB_MAX_NIC; -#endif - } - - { /* Interrupt Throttling Rate */ - struct igb_option opt = { - .type = range_option, - .name = "Interrupt Throttling Rate (ints/sec)", - .err = "using default of " __MODULE_STRING(DEFAULT_ITR), - .def = DEFAULT_ITR, - .arg = { .r = { .min = MIN_ITR, - .max = MAX_ITR } } - }; - -#ifdef module_param_array - if (num_InterruptThrottleRate > bd) { -#endif - unsigned int itr = InterruptThrottleRate[bd]; - - switch (itr) { - case 0: - DPRINTK(PROBE, INFO, "%s turned off\n", - opt.name); - if (hw->mac.type >= e1000_i350) - adapter->dmac = IGB_DMAC_DISABLE; - adapter->rx_itr_setting = itr; - break; - case 1: - DPRINTK(PROBE, INFO, "%s set to dynamic mode\n", - opt.name); - adapter->rx_itr_setting = itr; - break; - case 3: - DPRINTK(PROBE, INFO, - "%s set to dynamic conservative mode\n", - opt.name); - adapter->rx_itr_setting = itr; - break; - default: - igb_validate_option(&itr, &opt, adapter); - /* Save the setting, because the dynamic bits - * change itr. In case of invalid user value, - * default to conservative mode, else need to - * clear the lower two bits because they are - * used as control */ - if (itr == 3) { - adapter->rx_itr_setting = itr; - } else { - adapter->rx_itr_setting = 1000000000 / - (itr * 256); - adapter->rx_itr_setting &= ~3; - } - break; - } -#ifdef module_param_array - } else { - adapter->rx_itr_setting = opt.def; - } -#endif - adapter->tx_itr_setting = adapter->rx_itr_setting; - } - { /* Interrupt Mode */ - struct igb_option opt = { - .type = range_option, - .name = "Interrupt Mode", - .err = "defaulting to 2 (MSI-X)", - .def = IGB_INT_MODE_MSIX, - .arg = { .r = { .min = MIN_INTMODE, - .max = MAX_INTMODE } } - }; - -#ifdef module_param_array - if (num_IntMode > bd) { -#endif - unsigned int int_mode = IntMode[bd]; - igb_validate_option(&int_mode, &opt, adapter); - adapter->int_mode = int_mode; -#ifdef module_param_array - } else { - adapter->int_mode = opt.def; - } -#endif - } - { /* Low Latency Interrupt TCP Port */ - struct igb_option opt = { - .type = range_option, - .name = "Low Latency Interrupt TCP Port", - .err = "using default of " __MODULE_STRING(DEFAULT_LLIPORT), - .def = DEFAULT_LLIPORT, - .arg = { .r = { .min = MIN_LLIPORT, - .max = MAX_LLIPORT } } - }; - -#ifdef module_param_array - if (num_LLIPort > bd) { -#endif - adapter->lli_port = LLIPort[bd]; - if (adapter->lli_port) { - igb_validate_option(&adapter->lli_port, &opt, - adapter); - } else { - DPRINTK(PROBE, INFO, "%s turned off\n", - opt.name); - } -#ifdef module_param_array - } else { - adapter->lli_port = opt.def; - } -#endif - } - { /* Low Latency Interrupt on Packet Size */ - struct igb_option opt = { - .type = range_option, - .name = "Low Latency Interrupt on Packet Size", - .err = "using default of " __MODULE_STRING(DEFAULT_LLISIZE), - .def = DEFAULT_LLISIZE, - .arg = { .r = { .min = MIN_LLISIZE, - .max = MAX_LLISIZE } } - }; - -#ifdef module_param_array - if (num_LLISize > bd) { -#endif - adapter->lli_size = LLISize[bd]; - if (adapter->lli_size) { - igb_validate_option(&adapter->lli_size, &opt, - adapter); - } else { - DPRINTK(PROBE, INFO, "%s turned off\n", - opt.name); - } -#ifdef module_param_array - } else { - adapter->lli_size = opt.def; - } -#endif - } - { /* Low Latency Interrupt on TCP Push flag */ - struct igb_option opt = { - .type = enable_option, - .name = "Low Latency Interrupt on TCP Push flag", - .err = "defaulting to Disabled", - .def = OPTION_DISABLED - }; - -#ifdef module_param_array - if (num_LLIPush > bd) { -#endif - unsigned int lli_push = LLIPush[bd]; - igb_validate_option(&lli_push, &opt, adapter); - adapter->flags |= lli_push ? IGB_FLAG_LLI_PUSH : 0; -#ifdef module_param_array - } else { - adapter->flags |= opt.def ? IGB_FLAG_LLI_PUSH : 0; - } -#endif - } - { /* SRIOV - Enable SR-IOV VF devices */ - struct igb_option opt = { - .type = range_option, - .name = "max_vfs - SR-IOV VF devices", - .err = "using default of " __MODULE_STRING(DEFAULT_SRIOV), - .def = DEFAULT_SRIOV, - .arg = { .r = { .min = MIN_SRIOV, - .max = MAX_SRIOV } } - }; - -#ifdef module_param_array - if (num_max_vfs > bd) { -#endif - adapter->vfs_allocated_count = max_vfs[bd]; - igb_validate_option(&adapter->vfs_allocated_count, &opt, adapter); - -#ifdef module_param_array - } else { - adapter->vfs_allocated_count = opt.def; - } -#endif - if (adapter->vfs_allocated_count) { - switch (hw->mac.type) { - case e1000_82575: - case e1000_82580: - case e1000_i210: - case e1000_i211: - case e1000_i354: - adapter->vfs_allocated_count = 0; - DPRINTK(PROBE, INFO, "SR-IOV option max_vfs not supported.\n"); - default: - break; - } - } - } - { /* VMDQ - Enable VMDq multiqueue receive */ - struct igb_option opt = { - .type = range_option, - .name = "VMDQ - VMDq multiqueue queue count", - .err = "using default of " __MODULE_STRING(DEFAULT_VMDQ), - .def = DEFAULT_VMDQ, - .arg = { .r = { .min = MIN_VMDQ, - .max = (MAX_VMDQ - adapter->vfs_allocated_count) } } - }; - if ((hw->mac.type != e1000_i210) || - (hw->mac.type != e1000_i211)) { -#ifdef module_param_array - if (num_VMDQ > bd) { -#endif - adapter->vmdq_pools = (VMDQ[bd] == 1 ? 0 : VMDQ[bd]); - if (adapter->vfs_allocated_count && !adapter->vmdq_pools) { - DPRINTK(PROBE, INFO, "Enabling SR-IOV requires VMDq be set to at least 1\n"); - adapter->vmdq_pools = 1; - } - igb_validate_option(&adapter->vmdq_pools, &opt, adapter); - -#ifdef module_param_array - } else { - if (!adapter->vfs_allocated_count) - adapter->vmdq_pools = (opt.def == 1 ? 0 : opt.def); - else - adapter->vmdq_pools = 1; - } -#endif -#ifdef CONFIG_IGB_VMDQ_NETDEV - if (hw->mac.type == e1000_82575 && adapter->vmdq_pools) { - DPRINTK(PROBE, INFO, "VMDq not supported on this part.\n"); - adapter->vmdq_pools = 0; - } -#endif - - } else { - DPRINTK(PROBE, INFO, "VMDq option is not supported.\n"); - adapter->vmdq_pools = opt.def; - } - } - { /* RSS - Enable RSS multiqueue receives */ - struct igb_option opt = { - .type = range_option, - .name = "RSS - RSS multiqueue receive count", - .err = "using default of " __MODULE_STRING(DEFAULT_RSS), - .def = DEFAULT_RSS, - .arg = { .r = { .min = MIN_RSS, - .max = MAX_RSS } } - }; - - switch (hw->mac.type) { - case e1000_82575: -#ifndef CONFIG_IGB_VMDQ_NETDEV - if (!!adapter->vmdq_pools) { - if (adapter->vmdq_pools <= 2) { - if (adapter->vmdq_pools == 2) - opt.arg.r.max = 3; - } else { - opt.arg.r.max = 1; - } - } else { - opt.arg.r.max = 4; - } -#else - opt.arg.r.max = !!adapter->vmdq_pools ? 1 : 4; -#endif /* CONFIG_IGB_VMDQ_NETDEV */ - break; - case e1000_i210: - opt.arg.r.max = 4; - break; - case e1000_i211: - opt.arg.r.max = 2; - break; - case e1000_82576: -#ifndef CONFIG_IGB_VMDQ_NETDEV - if (!!adapter->vmdq_pools) - opt.arg.r.max = 2; - break; -#endif /* CONFIG_IGB_VMDQ_NETDEV */ - case e1000_82580: - case e1000_i350: - case e1000_i354: - default: - if (!!adapter->vmdq_pools) - opt.arg.r.max = 1; - break; - } - - if (adapter->int_mode != IGB_INT_MODE_MSIX) { - DPRINTK(PROBE, INFO, "RSS is not supported when in MSI/Legacy Interrupt mode, %s\n", - opt.err); - opt.arg.r.max = 1; - } - -#ifdef module_param_array - if (num_RSS > bd) { -#endif - adapter->rss_queues = RSS[bd]; - switch (adapter->rss_queues) { - case 1: - break; - default: - igb_validate_option(&adapter->rss_queues, &opt, adapter); - if (adapter->rss_queues) - break; - case 0: - adapter->rss_queues = min_t(u32, opt.arg.r.max, num_online_cpus()); - break; - } -#ifdef module_param_array - } else { - adapter->rss_queues = opt.def; - } -#endif - } - { /* QueuePairs - Enable Tx/Rx queue pairs for interrupt handling */ - struct igb_option opt = { - .type = enable_option, - .name = "QueuePairs - Tx/Rx queue pairs for interrupt handling", - .err = "defaulting to Enabled", - .def = OPTION_ENABLED - }; -#ifdef module_param_array - if (num_QueuePairs > bd) { -#endif - unsigned int qp = QueuePairs[bd]; - /* - * We must enable queue pairs if the number of queues - * exceeds the number of available interrupts. We are - * limited to 10, or 3 per unallocated vf. On I210 and - * I211 devices, we are limited to 5 interrupts. - * However, since I211 only supports 2 queues, we do not - * need to check and override the user option. - */ - if (qp == OPTION_DISABLED) { - if (adapter->rss_queues > 4) - qp = OPTION_ENABLED; - - if (adapter->vmdq_pools > 4) - qp = OPTION_ENABLED; - - if (adapter->rss_queues > 1 && - (adapter->vmdq_pools > 3 || - adapter->vfs_allocated_count > 6)) - qp = OPTION_ENABLED; - - if (hw->mac.type == e1000_i210 && - adapter->rss_queues > 2) - qp = OPTION_ENABLED; - - if (qp == OPTION_ENABLED) - DPRINTK(PROBE, INFO, "Number of queues exceeds available interrupts, %s\n", - opt.err); - } - igb_validate_option(&qp, &opt, adapter); - adapter->flags |= qp ? IGB_FLAG_QUEUE_PAIRS : 0; -#ifdef module_param_array - } else { - adapter->flags |= opt.def ? IGB_FLAG_QUEUE_PAIRS : 0; - } -#endif - } - { /* EEE - Enable EEE for capable adapters */ - - if (hw->mac.type >= e1000_i350) { - struct igb_option opt = { - .type = enable_option, - .name = "EEE Support", - .err = "defaulting to Enabled", - .def = OPTION_ENABLED - }; -#ifdef module_param_array - if (num_EEE > bd) { -#endif - unsigned int eee = EEE[bd]; - igb_validate_option(&eee, &opt, adapter); - adapter->flags |= eee ? IGB_FLAG_EEE : 0; - if (eee) - hw->dev_spec._82575.eee_disable = false; - else - hw->dev_spec._82575.eee_disable = true; - -#ifdef module_param_array - } else { - adapter->flags |= opt.def ? IGB_FLAG_EEE : 0; - if (adapter->flags & IGB_FLAG_EEE) - hw->dev_spec._82575.eee_disable = false; - else - hw->dev_spec._82575.eee_disable = true; - } -#endif - } - } - { /* DMAC - Enable DMA Coalescing for capable adapters */ - - if (hw->mac.type >= e1000_i350) { - struct igb_opt_list list [] = { - { IGB_DMAC_DISABLE, "DMAC Disable"}, - { IGB_DMAC_MIN, "DMAC 250 usec"}, - { IGB_DMAC_500, "DMAC 500 usec"}, - { IGB_DMAC_EN_DEFAULT, "DMAC 1000 usec"}, - { IGB_DMAC_2000, "DMAC 2000 usec"}, - { IGB_DMAC_3000, "DMAC 3000 usec"}, - { IGB_DMAC_4000, "DMAC 4000 usec"}, - { IGB_DMAC_5000, "DMAC 5000 usec"}, - { IGB_DMAC_6000, "DMAC 6000 usec"}, - { IGB_DMAC_7000, "DMAC 7000 usec"}, - { IGB_DMAC_8000, "DMAC 8000 usec"}, - { IGB_DMAC_9000, "DMAC 9000 usec"}, - { IGB_DMAC_MAX, "DMAC 10000 usec"} - }; - struct igb_option opt = { - .type = list_option, - .name = "DMA Coalescing", - .err = "using default of "__MODULE_STRING(IGB_DMAC_DISABLE), - .def = IGB_DMAC_DISABLE, - .arg = { .l = { .nr = 13, - .p = list - } - } - }; -#ifdef module_param_array - if (num_DMAC > bd) { -#endif - unsigned int dmac = DMAC[bd]; - if (adapter->rx_itr_setting == IGB_DMAC_DISABLE) - dmac = IGB_DMAC_DISABLE; - igb_validate_option(&dmac, &opt, adapter); - switch (dmac) { - case IGB_DMAC_DISABLE: - adapter->dmac = dmac; - break; - case IGB_DMAC_MIN: - adapter->dmac = dmac; - break; - case IGB_DMAC_500: - adapter->dmac = dmac; - break; - case IGB_DMAC_EN_DEFAULT: - adapter->dmac = dmac; - break; - case IGB_DMAC_2000: - adapter->dmac = dmac; - break; - case IGB_DMAC_3000: - adapter->dmac = dmac; - break; - case IGB_DMAC_4000: - adapter->dmac = dmac; - break; - case IGB_DMAC_5000: - adapter->dmac = dmac; - break; - case IGB_DMAC_6000: - adapter->dmac = dmac; - break; - case IGB_DMAC_7000: - adapter->dmac = dmac; - break; - case IGB_DMAC_8000: - adapter->dmac = dmac; - break; - case IGB_DMAC_9000: - adapter->dmac = dmac; - break; - case IGB_DMAC_MAX: - adapter->dmac = dmac; - break; - default: - adapter->dmac = opt.def; - DPRINTK(PROBE, INFO, - "Invalid DMAC setting, " - "resetting DMAC to %d\n", opt.def); - } -#ifdef module_param_array - } else - adapter->dmac = opt.def; -#endif - } - } -#ifndef IGB_NO_LRO - { /* LRO - Enable Large Receive Offload */ - struct igb_option opt = { - .type = enable_option, - .name = "LRO - Large Receive Offload", - .err = "defaulting to Disabled", - .def = OPTION_DISABLED - }; - struct net_device *netdev = adapter->netdev; -#ifdef module_param_array - if (num_LRO > bd) { -#endif - unsigned int lro = LRO[bd]; - igb_validate_option(&lro, &opt, adapter); - netdev->features |= lro ? NETIF_F_LRO : 0; -#ifdef module_param_array - } else if (opt.def == OPTION_ENABLED) { - netdev->features |= NETIF_F_LRO; - } -#endif - } -#endif /* IGB_NO_LRO */ - { /* MDD - Enable Malicious Driver Detection. Only available when - SR-IOV is enabled. */ - struct igb_option opt = { - .type = enable_option, - .name = "Malicious Driver Detection", - .err = "defaulting to 1", - .def = OPTION_ENABLED, - .arg = { .r = { .min = OPTION_DISABLED, - .max = OPTION_ENABLED } } - }; - -#ifdef module_param_array - if (num_MDD > bd) { -#endif - adapter->mdd = MDD[bd]; - igb_validate_option((uint *)&adapter->mdd, &opt, - adapter); -#ifdef module_param_array - } else { - adapter->mdd = opt.def; - } -#endif - } -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h deleted file mode 100644 index ec2b86a0..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h +++ /dev/null @@ -1,234 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/* ethtool register test data */ -struct igb_reg_test { - u16 reg; - u16 reg_offset; - u16 array_len; - u16 test_type; - u32 mask; - u32 write; -}; - -/* In the hardware, registers are laid out either singly, in arrays - * spaced 0x100 bytes apart, or in contiguous tables. We assume - * most tests take place on arrays or single registers (handled - * as a single-element array) and special-case the tables. - * Table tests are always pattern tests. - * - * We also make provision for some required setup steps by specifying - * registers to be written without any read-back testing. - */ - -#define PATTERN_TEST 1 -#define SET_READ_TEST 2 -#define WRITE_NO_TEST 3 -#define TABLE32_TEST 4 -#define TABLE64_TEST_LO 5 -#define TABLE64_TEST_HI 6 - -/* i210 reg test */ -static struct igb_reg_test reg_test_i210[] = { - { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - /* RDH is read-only for i210, only test RDT. */ - { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0003FFF0, 0x0003FFF0 }, - { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, - { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, - { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RA, 0, 16, TABLE64_TEST_LO, - 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RA, 0, 16, TABLE64_TEST_HI, - 0x900FFFFF, 0xFFFFFFFF }, - { E1000_MTA, 0, 128, TABLE32_TEST, - 0xFFFFFFFF, 0xFFFFFFFF }, - { 0, 0, 0, 0 } -}; - -/* i350 reg test */ -static struct igb_reg_test reg_test_i350[] = { - { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - /* VET is readonly on i350 */ - { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - { E1000_RDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_RDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - /* RDH is read-only for i350, only test RDT. */ - { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_RDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, - { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, - { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - { E1000_TDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_TDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_TDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, - { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RA, 0, 16, TABLE64_TEST_LO, - 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RA, 0, 16, TABLE64_TEST_HI, - 0xC3FFFFFF, 0xFFFFFFFF }, - { E1000_RA2, 0, 16, TABLE64_TEST_LO, - 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RA2, 0, 16, TABLE64_TEST_HI, - 0xC3FFFFFF, 0xFFFFFFFF }, - { E1000_MTA, 0, 128, TABLE32_TEST, - 0xFFFFFFFF, 0xFFFFFFFF }, - { 0, 0, 0, 0 } -}; - -/* 82580 reg test */ -static struct igb_reg_test reg_test_82580[] = { - { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, - { E1000_RDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_RDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, - /* RDH is read-only for 82580, only test RDT. */ - { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_RDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, - { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, - { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, - { E1000_TDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_TDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_TDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, - { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, - { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RA, 0, 16, TABLE64_TEST_LO, - 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RA, 0, 16, TABLE64_TEST_HI, - 0x83FFFFFF, 0xFFFFFFFF }, - { E1000_RA2, 0, 8, TABLE64_TEST_LO, - 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RA2, 0, 8, TABLE64_TEST_HI, - 0x83FFFFFF, 0xFFFFFFFF }, - { E1000_MTA, 0, 128, TABLE32_TEST, - 0xFFFFFFFF, 0xFFFFFFFF }, - { 0, 0, 0, 0 } -}; - -/* 82576 reg test */ -static struct igb_reg_test reg_test_82576[] = { - { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, - { E1000_RDBAL(4), 0x40, 12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_RDBAH(4), 0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDLEN(4), 0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, - /* Enable all queues before testing. */ - { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE }, - { E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE }, - /* RDH is read-only for 82576, only test RDT. */ - { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_RDT(4), 0x40, 12, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 }, - { E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0, 0 }, - { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, - { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, - { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, - { E1000_TDBAL(4), 0x40, 12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_TDBAH(4), 0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_TDLEN(4), 0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, - { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RA, 0, 16, TABLE64_TEST_LO, - 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RA, 0, 16, TABLE64_TEST_HI, - 0x83FFFFFF, 0xFFFFFFFF }, - { E1000_RA2, 0, 8, TABLE64_TEST_LO, - 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RA2, 0, 8, TABLE64_TEST_HI, - 0x83FFFFFF, 0xFFFFFFFF }, - { E1000_MTA, 0, 128, TABLE32_TEST, - 0xFFFFFFFF, 0xFFFFFFFF }, - { 0, 0, 0, 0 } -}; - -/* 82575 register test */ -static struct igb_reg_test reg_test_82575[] = { - { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, - { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - /* Enable all four RX queues before testing. */ - { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE }, - /* RDH is read-only for 82575, only test RDT. */ - { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 }, - { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, - { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, - { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0x003FFFFB }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0xFFFFFFFF }, - { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_TXCW, 0x100, 1, PATTERN_TEST, 0xC000FFFF, 0x0000FFFF }, - { E1000_RA, 0, 16, TABLE64_TEST_LO, - 0xFFFFFFFF, 0xFFFFFFFF }, - { E1000_RA, 0, 16, TABLE64_TEST_HI, - 0x800FFFFF, 0xFFFFFFFF }, - { E1000_MTA, 0, 128, TABLE32_TEST, - 0xFFFFFFFF, 0xFFFFFFFF }, - { 0, 0, 0, 0 } -}; diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c deleted file mode 100644 index cdd807b9..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c +++ /dev/null @@ -1,421 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - - -#include - -#include "igb.h" -#include "igb_vmdq.h" -#include - -#ifdef CONFIG_IGB_VMDQ_NETDEV -int igb_vmdq_open(struct net_device *dev) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - struct igb_adapter *adapter = vadapter->real_adapter; - struct net_device *main_netdev = adapter->netdev; - int hw_queue = vadapter->rx_ring->queue_index + - adapter->vfs_allocated_count; - - if (test_bit(__IGB_DOWN, &adapter->state)) { - DPRINTK(DRV, WARNING, - "Open %s before opening this device.\n", - main_netdev->name); - return -EAGAIN; - } - netif_carrier_off(dev); - vadapter->tx_ring->vmdq_netdev = dev; - vadapter->rx_ring->vmdq_netdev = dev; - if (is_valid_ether_addr(dev->dev_addr)) { - igb_del_mac_filter(adapter, dev->dev_addr, hw_queue); - igb_add_mac_filter(adapter, dev->dev_addr, hw_queue); - } - netif_carrier_on(dev); - return 0; -} - -int igb_vmdq_close(struct net_device *dev) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - struct igb_adapter *adapter = vadapter->real_adapter; - int hw_queue = vadapter->rx_ring->queue_index + - adapter->vfs_allocated_count; - - netif_carrier_off(dev); - igb_del_mac_filter(adapter, dev->dev_addr, hw_queue); - - vadapter->tx_ring->vmdq_netdev = NULL; - vadapter->rx_ring->vmdq_netdev = NULL; - return 0; -} - -netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - - return igb_xmit_frame_ring(skb, vadapter->tx_ring); -} - -struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - struct igb_adapter *adapter = vadapter->real_adapter; - struct e1000_hw *hw = &adapter->hw; - int hw_queue = vadapter->rx_ring->queue_index + - adapter->vfs_allocated_count; - - vadapter->net_stats.rx_packets += - E1000_READ_REG(hw, E1000_PFVFGPRC(hw_queue)); - E1000_WRITE_REG(hw, E1000_PFVFGPRC(hw_queue), 0); - vadapter->net_stats.tx_packets += - E1000_READ_REG(hw, E1000_PFVFGPTC(hw_queue)); - E1000_WRITE_REG(hw, E1000_PFVFGPTC(hw_queue), 0); - vadapter->net_stats.rx_bytes += - E1000_READ_REG(hw, E1000_PFVFGORC(hw_queue)); - E1000_WRITE_REG(hw, E1000_PFVFGORC(hw_queue), 0); - vadapter->net_stats.tx_bytes += - E1000_READ_REG(hw, E1000_PFVFGOTC(hw_queue)); - E1000_WRITE_REG(hw, E1000_PFVFGOTC(hw_queue), 0); - vadapter->net_stats.multicast += - E1000_READ_REG(hw, E1000_PFVFMPRC(hw_queue)); - E1000_WRITE_REG(hw, E1000_PFVFMPRC(hw_queue), 0); - /* only return the current stats */ - return &vadapter->net_stats; -} - -/** - * igb_write_vm_addr_list - write unicast addresses to RAR table - * @netdev: network interface device structure - * - * Writes unicast address list to the RAR table. - * Returns: -ENOMEM on failure/insufficient address space - * 0 on no addresses written - * X on writing X addresses to the RAR table - **/ -static int igb_write_vm_addr_list(struct net_device *netdev) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); - struct igb_adapter *adapter = vadapter->real_adapter; - int count = 0; - int hw_queue = vadapter->rx_ring->queue_index + - adapter->vfs_allocated_count; - - /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > igb_available_rars(adapter)) - return -ENOMEM; - - if (!netdev_uc_empty(netdev)) { -#ifdef NETDEV_HW_ADDR_T_UNICAST - struct netdev_hw_addr *ha; -#else - struct dev_mc_list *ha; -#endif - netdev_for_each_uc_addr(ha, netdev) { -#ifdef NETDEV_HW_ADDR_T_UNICAST - igb_del_mac_filter(adapter, ha->addr, hw_queue); - igb_add_mac_filter(adapter, ha->addr, hw_queue); -#else - igb_del_mac_filter(adapter, ha->da_addr, hw_queue); - igb_add_mac_filter(adapter, ha->da_addr, hw_queue); -#endif - count++; - } - } - return count; -} - - -#define E1000_VMOLR_UPE 0x20000000 /* Unicast promiscuous mode */ -void igb_vmdq_set_rx_mode(struct net_device *dev) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - struct igb_adapter *adapter = vadapter->real_adapter; - struct e1000_hw *hw = &adapter->hw; - u32 vmolr, rctl; - int hw_queue = vadapter->rx_ring->queue_index + - adapter->vfs_allocated_count; - - /* Check for Promiscuous and All Multicast modes */ - vmolr = E1000_READ_REG(hw, E1000_VMOLR(hw_queue)); - - /* clear the affected bits */ - vmolr &= ~(E1000_VMOLR_UPE | E1000_VMOLR_MPME | - E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE); - - if (dev->flags & IFF_PROMISC) { - vmolr |= E1000_VMOLR_UPE; - rctl = E1000_READ_REG(hw, E1000_RCTL); - rctl |= E1000_RCTL_UPE; - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - } else { - rctl = E1000_READ_REG(hw, E1000_RCTL); - rctl &= ~E1000_RCTL_UPE; - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - if (dev->flags & IFF_ALLMULTI) { - vmolr |= E1000_VMOLR_MPME; - } else { - /* - * Write addresses to the MTA, if the attempt fails - * then we should just turn on promiscuous mode so - * that we can at least receive multicast traffic - */ - if (igb_write_mc_addr_list(adapter->netdev) != 0) - vmolr |= E1000_VMOLR_ROMPE; - } -#ifdef HAVE_SET_RX_MODE - /* - * Write addresses to available RAR registers, if there is not - * sufficient space to store all the addresses then enable - * unicast promiscuous mode - */ - if (igb_write_vm_addr_list(dev) < 0) - vmolr |= E1000_VMOLR_UPE; -#endif - } - E1000_WRITE_REG(hw, E1000_VMOLR(hw_queue), vmolr); - - return; -} - -int igb_vmdq_set_mac(struct net_device *dev, void *p) -{ - struct sockaddr *addr = p; - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - struct igb_adapter *adapter = vadapter->real_adapter; - int hw_queue = vadapter->rx_ring->queue_index + - adapter->vfs_allocated_count; - - igb_del_mac_filter(adapter, dev->dev_addr, hw_queue); - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - return igb_add_mac_filter(adapter, dev->dev_addr, hw_queue); -} - -int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - struct igb_adapter *adapter = vadapter->real_adapter; - - if (adapter->netdev->mtu < new_mtu) { - DPRINTK(PROBE, INFO, - "Set MTU on %s to >= %d " - "before changing MTU on %s\n", - adapter->netdev->name, new_mtu, dev->name); - return -EINVAL; - } - dev->mtu = new_mtu; - return 0; -} - -void igb_vmdq_tx_timeout(struct net_device *dev) -{ - return; -} - -void igb_vmdq_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - struct igb_adapter *adapter = vadapter->real_adapter; - struct e1000_hw *hw = &adapter->hw; - int hw_queue = vadapter->rx_ring->queue_index + - adapter->vfs_allocated_count; - - vadapter->vlgrp = grp; - - igb_enable_vlan_tags(adapter); - E1000_WRITE_REG(hw, E1000_VMVIR(hw_queue), 0); - - return; -} -void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - struct igb_adapter *adapter = vadapter->real_adapter; -#ifndef HAVE_NETDEV_VLAN_FEATURES - struct net_device *v_netdev; -#endif - int hw_queue = vadapter->rx_ring->queue_index + - adapter->vfs_allocated_count; - - /* attempt to add filter to vlvf array */ - igb_vlvf_set(adapter, vid, TRUE, hw_queue); - -#ifndef HAVE_NETDEV_VLAN_FEATURES - - /* Copy feature flags from netdev to the vlan netdev for this vid. - * This allows things like TSO to bubble down to our vlan device. - */ - v_netdev = vlan_group_get_device(vadapter->vlgrp, vid); - v_netdev->features |= adapter->netdev->features; - vlan_group_set_device(vadapter->vlgrp, vid, v_netdev); -#endif - - return; -} -void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(dev); - struct igb_adapter *adapter = vadapter->real_adapter; - int hw_queue = vadapter->rx_ring->queue_index + - adapter->vfs_allocated_count; - - vlan_group_set_device(vadapter->vlgrp, vid, NULL); - /* remove vlan from VLVF table array */ - igb_vlvf_set(adapter, vid, FALSE, hw_queue); - - - return; -} - -static int igb_vmdq_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); - struct igb_adapter *adapter = vadapter->real_adapter; - struct e1000_hw *hw = &adapter->hw; - u32 status; - - if (hw->phy.media_type == e1000_media_type_copper) { - - ecmd->supported = (SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_1000baseT_Full| - SUPPORTED_Autoneg | - SUPPORTED_TP); - ecmd->advertising = ADVERTISED_TP; - - if (hw->mac.autoneg == 1) { - ecmd->advertising |= ADVERTISED_Autoneg; - /* the e1000 autoneg seems to match ethtool nicely */ - ecmd->advertising |= hw->phy.autoneg_advertised; - } - - ecmd->port = PORT_TP; - ecmd->phy_address = hw->phy.addr; - } else { - ecmd->supported = (SUPPORTED_1000baseT_Full | - SUPPORTED_FIBRE | - SUPPORTED_Autoneg); - - ecmd->advertising = (ADVERTISED_1000baseT_Full | - ADVERTISED_FIBRE | - ADVERTISED_Autoneg); - - ecmd->port = PORT_FIBRE; - } - - ecmd->transceiver = XCVR_INTERNAL; - - status = E1000_READ_REG(hw, E1000_STATUS); - - if (status & E1000_STATUS_LU) { - - if ((status & E1000_STATUS_SPEED_1000) || - hw->phy.media_type != e1000_media_type_copper) - ecmd->speed = SPEED_1000; - else if (status & E1000_STATUS_SPEED_100) - ecmd->speed = SPEED_100; - else - ecmd->speed = SPEED_10; - - if ((status & E1000_STATUS_FD) || - hw->phy.media_type != e1000_media_type_copper) - ecmd->duplex = DUPLEX_FULL; - else - ecmd->duplex = DUPLEX_HALF; - } else { - ecmd->speed = -1; - ecmd->duplex = -1; - } - - ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; - return 0; -} - - -static u32 igb_vmdq_get_msglevel(struct net_device *netdev) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); - struct igb_adapter *adapter = vadapter->real_adapter; - return adapter->msg_enable; -} - -static void igb_vmdq_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); - struct igb_adapter *adapter = vadapter->real_adapter; - struct net_device *main_netdev = adapter->netdev; - - strncpy(drvinfo->driver, igb_driver_name, 32); - strncpy(drvinfo->version, igb_driver_version, 32); - - strncpy(drvinfo->fw_version, "N/A", 4); - snprintf(drvinfo->bus_info, 32, "%s VMDQ %d", main_netdev->name, - vadapter->rx_ring->queue_index); - drvinfo->n_stats = 0; - drvinfo->testinfo_len = 0; - drvinfo->regdump_len = 0; -} - -static void igb_vmdq_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); - - struct igb_ring *tx_ring = vadapter->tx_ring; - struct igb_ring *rx_ring = vadapter->rx_ring; - - ring->rx_max_pending = IGB_MAX_RXD; - ring->tx_max_pending = IGB_MAX_TXD; - ring->rx_mini_max_pending = 0; - ring->rx_jumbo_max_pending = 0; - ring->rx_pending = rx_ring->count; - ring->tx_pending = tx_ring->count; - ring->rx_mini_pending = 0; - ring->rx_jumbo_pending = 0; -} -static u32 igb_vmdq_get_rx_csum(struct net_device *netdev) -{ - struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); - struct igb_adapter *adapter = vadapter->real_adapter; - - return test_bit(IGB_RING_FLAG_RX_CSUM, &adapter->rx_ring[0]->flags); -} - - -static struct ethtool_ops igb_vmdq_ethtool_ops = { - .get_settings = igb_vmdq_get_settings, - .get_drvinfo = igb_vmdq_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_ringparam = igb_vmdq_get_ringparam, - .get_rx_csum = igb_vmdq_get_rx_csum, - .get_tx_csum = ethtool_op_get_tx_csum, - .get_sg = ethtool_op_get_sg, - .set_sg = ethtool_op_set_sg, - .get_msglevel = igb_vmdq_get_msglevel, -#ifdef NETIF_F_TSO - .get_tso = ethtool_op_get_tso, -#endif -#ifdef HAVE_ETHTOOL_GET_PERM_ADDR - .get_perm_addr = ethtool_op_get_perm_addr, -#endif -}; - -void igb_vmdq_set_ethtool_ops(struct net_device *netdev) -{ - SET_ETHTOOL_OPS(netdev, &igb_vmdq_ethtool_ops); -} - - -#endif /* CONFIG_IGB_VMDQ_NETDEV */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h deleted file mode 100644 index e68c48cf..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IGB_VMDQ_H_ -#define _IGB_VMDQ_H_ - -#ifdef CONFIG_IGB_VMDQ_NETDEV -int igb_vmdq_open(struct net_device *dev); -int igb_vmdq_close(struct net_device *dev); -netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev); -struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev); -void igb_vmdq_set_rx_mode(struct net_device *dev); -int igb_vmdq_set_mac(struct net_device *dev, void *addr); -int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu); -void igb_vmdq_tx_timeout(struct net_device *dev); -void igb_vmdq_vlan_rx_register(struct net_device *dev, - struct vlan_group *grp); -void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid); -void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid); -void igb_vmdq_set_ethtool_ops(struct net_device *netdev); -#endif /* CONFIG_IGB_VMDQ_NETDEV */ -#endif /* _IGB_VMDQ_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h deleted file mode 100644 index fd3175b5..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ /dev/null @@ -1,3933 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _KCOMPAT_H_ -#define _KCOMPAT_H_ - -#ifndef LINUX_VERSION_CODE -#include -#else -#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* NAPI enable/disable flags here */ -#define NAPI - -#define adapter_struct igb_adapter -#define adapter_q_vector igb_q_vector -#define NAPI - -/* and finally set defines so that the code sees the changes */ -#ifdef NAPI -#else -#endif /* NAPI */ - -/* packet split disable/enable */ -#ifdef DISABLE_PACKET_SPLIT -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT -#define CONFIG_IGB_DISABLE_PACKET_SPLIT -#endif -#endif /* DISABLE_PACKET_SPLIT */ - -/* MSI compatibility code for all kernels and drivers */ -#ifdef DISABLE_PCI_MSI -#undef CONFIG_PCI_MSI -#endif -#ifndef CONFIG_PCI_MSI -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) -struct msix_entry { - u16 vector; /* kernel uses to write allocated vector */ - u16 entry; /* driver uses to specify entry, OS writes */ -}; -#endif -#undef pci_enable_msi -#define pci_enable_msi(a) -ENOTSUPP -#undef pci_disable_msi -#define pci_disable_msi(a) do {} while (0) -#undef pci_enable_msix -#define pci_enable_msix(a, b, c) -ENOTSUPP -#undef pci_disable_msix -#define pci_disable_msix(a) do {} while (0) -#define msi_remove_pci_irq_vectors(a) do {} while (0) -#endif /* CONFIG_PCI_MSI */ -#ifdef DISABLE_PM -#undef CONFIG_PM -#endif - -#ifdef DISABLE_NET_POLL_CONTROLLER -#undef CONFIG_NET_POLL_CONTROLLER -#endif - -#ifndef PMSG_SUSPEND -#define PMSG_SUSPEND 3 -#endif - -/* generic boolean compatibility */ -#undef TRUE -#undef FALSE -#define TRUE true -#define FALSE false -#ifdef GCC_VERSION -#if ( GCC_VERSION < 3000 ) -#define _Bool char -#endif -#else -#define _Bool char -#endif - -/* kernels less than 2.4.14 don't have this */ -#ifndef ETH_P_8021Q -#define ETH_P_8021Q 0x8100 -#endif - -#ifndef module_param -#define module_param(v,t,p) MODULE_PARM(v, "i"); -#endif - -#ifndef DMA_64BIT_MASK -#define DMA_64BIT_MASK 0xffffffffffffffffULL -#endif - -#ifndef DMA_32BIT_MASK -#define DMA_32BIT_MASK 0x00000000ffffffffULL -#endif - -#ifndef PCI_CAP_ID_EXP -#define PCI_CAP_ID_EXP 0x10 -#endif - -#ifndef PCIE_LINK_STATE_L0S -#define PCIE_LINK_STATE_L0S 1 -#endif -#ifndef PCIE_LINK_STATE_L1 -#define PCIE_LINK_STATE_L1 2 -#endif - -#ifndef mmiowb -#ifdef CONFIG_IA64 -#define mmiowb() asm volatile ("mf.a" ::: "memory") -#else -#define mmiowb() -#endif -#endif - -#ifndef SET_NETDEV_DEV -#define SET_NETDEV_DEV(net, pdev) -#endif - -#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) -#define free_netdev(x) kfree(x) -#endif - -#ifdef HAVE_POLL_CONTROLLER -#define CONFIG_NET_POLL_CONTROLLER -#endif - -#ifndef SKB_DATAREF_SHIFT -/* if we do not have the infrastructure to detect if skb_header is cloned - just return false in all cases */ -#define skb_header_cloned(x) 0 -#endif - -#ifndef NETIF_F_GSO -#define gso_size tso_size -#define gso_segs tso_segs -#endif - -#ifndef NETIF_F_GRO -#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \ - vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan) -#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb) -#endif - -#ifndef NETIF_F_SCTP_CSUM -#define NETIF_F_SCTP_CSUM 0 -#endif - -#ifndef NETIF_F_LRO -#define NETIF_F_LRO (1 << 15) -#endif - -#ifndef NETIF_F_NTUPLE -#define NETIF_F_NTUPLE (1 << 27) -#endif - -#ifndef IPPROTO_SCTP -#define IPPROTO_SCTP 132 -#endif - -#ifndef CHECKSUM_PARTIAL -#define CHECKSUM_PARTIAL CHECKSUM_HW -#define CHECKSUM_COMPLETE CHECKSUM_HW -#endif - -#ifndef __read_mostly -#define __read_mostly -#endif - -#ifndef MII_RESV1 -#define MII_RESV1 0x17 /* Reserved... */ -#endif - -#ifndef unlikely -#define unlikely(_x) _x -#define likely(_x) _x -#endif - -#ifndef WARN_ON -#define WARN_ON(x) -#endif - -#ifndef PCI_DEVICE -#define PCI_DEVICE(vend,dev) \ - .vendor = (vend), .device = (dev), \ - .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID -#endif - -#ifndef node_online -#define node_online(node) ((node) == 0) -#endif - -#ifndef num_online_cpus -#define num_online_cpus() smp_num_cpus -#endif - -#ifndef cpu_online -#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map) -#endif - -#ifndef _LINUX_RANDOM_H -#include -#endif - -#ifndef DECLARE_BITMAP -#ifndef BITS_TO_LONGS -#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) -#endif -#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)] -#endif - -#ifndef VLAN_HLEN -#define VLAN_HLEN 4 -#endif - -#ifndef VLAN_ETH_HLEN -#define VLAN_ETH_HLEN 18 -#endif - -#ifndef VLAN_ETH_FRAME_LEN -#define VLAN_ETH_FRAME_LEN 1518 -#endif - -#if !defined(IXGBE_DCA) && !defined(IGB_DCA) -#define dca_get_tag(b) 0 -#define dca_add_requester(a) -1 -#define dca_remove_requester(b) do { } while(0) -#define DCA_PROVIDER_ADD 0x0001 -#define DCA_PROVIDER_REMOVE 0x0002 -#endif - -#ifndef DCA_GET_TAG_TWO_ARGS -#define dca3_get_tag(a,b) dca_get_tag(b) -#endif - -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -#if defined(__i386__) || defined(__x86_64__) -#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -#endif -#endif - -/* taken from 2.6.24 definition in linux/kernel.h */ -#ifndef IS_ALIGNED -#define IS_ALIGNED(x,a) (((x) % ((typeof(x))(a))) == 0) -#endif - -#ifdef IS_ENABLED -#undef IS_ENABLED -#undef __ARG_PLACEHOLDER_1 -#undef config_enabled -#undef _config_enabled -#undef __config_enabled -#undef ___config_enabled -#endif - -#define __ARG_PLACEHOLDER_1 0, -#define config_enabled(cfg) _config_enabled(cfg) -#define _config_enabled(value) __config_enabled(__ARG_PLACEHOLDER_##value) -#define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0) -#define ___config_enabled(__ignored, val, ...) val - -#define IS_ENABLED(option) \ - (config_enabled(option) || config_enabled(option##_MODULE)) - -#if !defined(NETIF_F_HW_VLAN_TX) && !defined(NETIF_F_HW_VLAN_CTAG_TX) -struct _kc_vlan_ethhdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; - __be16 h_vlan_proto; - __be16 h_vlan_TCI; - __be16 h_vlan_encapsulated_proto; -}; -#define vlan_ethhdr _kc_vlan_ethhdr -struct _kc_vlan_hdr { - __be16 h_vlan_TCI; - __be16 h_vlan_encapsulated_proto; -}; -#define vlan_hdr _kc_vlan_hdr -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) -#define vlan_tx_tag_present(_skb) 0 -#define vlan_tx_tag_get(_skb) 0 -#endif -#endif /* NETIF_F_HW_VLAN_TX && NETIF_F_HW_VLAN_CTAG_TX */ - -#ifndef VLAN_PRIO_SHIFT -#define VLAN_PRIO_SHIFT 13 -#endif - - -#ifndef __GFP_COLD -#define __GFP_COLD 0 -#endif - -#ifndef __GFP_COMP -#define __GFP_COMP 0 -#endif - -/*****************************************************************************/ -/* Installations with ethtool version without eeprom, adapter id, or statistics - * support */ - -#ifndef ETH_GSTRING_LEN -#define ETH_GSTRING_LEN 32 -#endif - -#ifndef ETHTOOL_GSTATS -#define ETHTOOL_GSTATS 0x1d -#undef ethtool_drvinfo -#define ethtool_drvinfo k_ethtool_drvinfo -struct k_ethtool_drvinfo { - u32 cmd; - char driver[32]; - char version[32]; - char fw_version[32]; - char bus_info[32]; - char reserved1[32]; - char reserved2[16]; - u32 n_stats; - u32 testinfo_len; - u32 eedump_len; - u32 regdump_len; -}; - -struct ethtool_stats { - u32 cmd; - u32 n_stats; - u64 data[0]; -}; -#endif /* ETHTOOL_GSTATS */ - -#ifndef ETHTOOL_PHYS_ID -#define ETHTOOL_PHYS_ID 0x1c -#endif /* ETHTOOL_PHYS_ID */ - -#ifndef ETHTOOL_GSTRINGS -#define ETHTOOL_GSTRINGS 0x1b -enum ethtool_stringset { - ETH_SS_TEST = 0, - ETH_SS_STATS, -}; -struct ethtool_gstrings { - u32 cmd; /* ETHTOOL_GSTRINGS */ - u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/ - u32 len; /* number of strings in the string set */ - u8 data[0]; -}; -#endif /* ETHTOOL_GSTRINGS */ - -#ifndef ETHTOOL_TEST -#define ETHTOOL_TEST 0x1a -enum ethtool_test_flags { - ETH_TEST_FL_OFFLINE = (1 << 0), - ETH_TEST_FL_FAILED = (1 << 1), -}; -struct ethtool_test { - u32 cmd; - u32 flags; - u32 reserved; - u32 len; - u64 data[0]; -}; -#endif /* ETHTOOL_TEST */ - -#ifndef ETHTOOL_GEEPROM -#define ETHTOOL_GEEPROM 0xb -#undef ETHTOOL_GREGS -struct ethtool_eeprom { - u32 cmd; - u32 magic; - u32 offset; - u32 len; - u8 data[0]; -}; - -struct ethtool_value { - u32 cmd; - u32 data; -}; -#endif /* ETHTOOL_GEEPROM */ - -#ifndef ETHTOOL_GLINK -#define ETHTOOL_GLINK 0xa -#endif /* ETHTOOL_GLINK */ - -#ifndef ETHTOOL_GWOL -#define ETHTOOL_GWOL 0x5 -#define ETHTOOL_SWOL 0x6 -#define SOPASS_MAX 6 -struct ethtool_wolinfo { - u32 cmd; - u32 supported; - u32 wolopts; - u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */ -}; -#endif /* ETHTOOL_GWOL */ - -#ifndef ETHTOOL_GREGS -#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */ -#define ethtool_regs _kc_ethtool_regs -/* for passing big chunks of data */ -struct _kc_ethtool_regs { - u32 cmd; - u32 version; /* driver-specific, indicates different chips/revs */ - u32 len; /* bytes */ - u8 data[0]; -}; -#endif /* ETHTOOL_GREGS */ - -#ifndef ETHTOOL_GMSGLVL -#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ -#endif -#ifndef ETHTOOL_SMSGLVL -#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ -#endif -#ifndef ETHTOOL_NWAY_RST -#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */ -#endif -#ifndef ETHTOOL_GLINK -#define ETHTOOL_GLINK 0x0000000a /* Get link status */ -#endif -#ifndef ETHTOOL_GEEPROM -#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ -#endif -#ifndef ETHTOOL_SEEPROM -#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */ -#endif -#ifndef ETHTOOL_GCOALESCE -#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ -/* for configuring coalescing parameters of chip */ -#define ethtool_coalesce _kc_ethtool_coalesce -struct _kc_ethtool_coalesce { - u32 cmd; /* ETHTOOL_{G,S}COALESCE */ - - /* How many usecs to delay an RX interrupt after - * a packet arrives. If 0, only rx_max_coalesced_frames - * is used. - */ - u32 rx_coalesce_usecs; - - /* How many packets to delay an RX interrupt after - * a packet arrives. If 0, only rx_coalesce_usecs is - * used. It is illegal to set both usecs and max frames - * to zero as this would cause RX interrupts to never be - * generated. - */ - u32 rx_max_coalesced_frames; - - /* Same as above two parameters, except that these values - * apply while an IRQ is being serviced by the host. Not - * all cards support this feature and the values are ignored - * in that case. - */ - u32 rx_coalesce_usecs_irq; - u32 rx_max_coalesced_frames_irq; - - /* How many usecs to delay a TX interrupt after - * a packet is sent. If 0, only tx_max_coalesced_frames - * is used. - */ - u32 tx_coalesce_usecs; - - /* How many packets to delay a TX interrupt after - * a packet is sent. If 0, only tx_coalesce_usecs is - * used. It is illegal to set both usecs and max frames - * to zero as this would cause TX interrupts to never be - * generated. - */ - u32 tx_max_coalesced_frames; - - /* Same as above two parameters, except that these values - * apply while an IRQ is being serviced by the host. Not - * all cards support this feature and the values are ignored - * in that case. - */ - u32 tx_coalesce_usecs_irq; - u32 tx_max_coalesced_frames_irq; - - /* How many usecs to delay in-memory statistics - * block updates. Some drivers do not have an in-memory - * statistic block, and in such cases this value is ignored. - * This value must not be zero. - */ - u32 stats_block_coalesce_usecs; - - /* Adaptive RX/TX coalescing is an algorithm implemented by - * some drivers to improve latency under low packet rates and - * improve throughput under high packet rates. Some drivers - * only implement one of RX or TX adaptive coalescing. Anything - * not implemented by the driver causes these values to be - * silently ignored. - */ - u32 use_adaptive_rx_coalesce; - u32 use_adaptive_tx_coalesce; - - /* When the packet rate (measured in packets per second) - * is below pkt_rate_low, the {rx,tx}_*_low parameters are - * used. - */ - u32 pkt_rate_low; - u32 rx_coalesce_usecs_low; - u32 rx_max_coalesced_frames_low; - u32 tx_coalesce_usecs_low; - u32 tx_max_coalesced_frames_low; - - /* When the packet rate is below pkt_rate_high but above - * pkt_rate_low (both measured in packets per second) the - * normal {rx,tx}_* coalescing parameters are used. - */ - - /* When the packet rate is (measured in packets per second) - * is above pkt_rate_high, the {rx,tx}_*_high parameters are - * used. - */ - u32 pkt_rate_high; - u32 rx_coalesce_usecs_high; - u32 rx_max_coalesced_frames_high; - u32 tx_coalesce_usecs_high; - u32 tx_max_coalesced_frames_high; - - /* How often to do adaptive coalescing packet rate sampling, - * measured in seconds. Must not be zero. - */ - u32 rate_sample_interval; -}; -#endif /* ETHTOOL_GCOALESCE */ - -#ifndef ETHTOOL_SCOALESCE -#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ -#endif -#ifndef ETHTOOL_GRINGPARAM -#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ -/* for configuring RX/TX ring parameters */ -#define ethtool_ringparam _kc_ethtool_ringparam -struct _kc_ethtool_ringparam { - u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */ - - /* Read only attributes. These indicate the maximum number - * of pending RX/TX ring entries the driver will allow the - * user to set. - */ - u32 rx_max_pending; - u32 rx_mini_max_pending; - u32 rx_jumbo_max_pending; - u32 tx_max_pending; - - /* Values changeable by the user. The valid values are - * in the range 1 to the "*_max_pending" counterpart above. - */ - u32 rx_pending; - u32 rx_mini_pending; - u32 rx_jumbo_pending; - u32 tx_pending; -}; -#endif /* ETHTOOL_GRINGPARAM */ - -#ifndef ETHTOOL_SRINGPARAM -#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */ -#endif -#ifndef ETHTOOL_GPAUSEPARAM -#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ -/* for configuring link flow control parameters */ -#define ethtool_pauseparam _kc_ethtool_pauseparam -struct _kc_ethtool_pauseparam { - u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */ - - /* If the link is being auto-negotiated (via ethtool_cmd.autoneg - * being true) the user may set 'autoneg' here non-zero to have the - * pause parameters be auto-negotiated too. In such a case, the - * {rx,tx}_pause values below determine what capabilities are - * advertised. - * - * If 'autoneg' is zero or the link is not being auto-negotiated, - * then {rx,tx}_pause force the driver to use/not-use pause - * flow control. - */ - u32 autoneg; - u32 rx_pause; - u32 tx_pause; -}; -#endif /* ETHTOOL_GPAUSEPARAM */ - -#ifndef ETHTOOL_SPAUSEPARAM -#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ -#endif -#ifndef ETHTOOL_GRXCSUM -#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_SRXCSUM -#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_GTXCSUM -#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_STXCSUM -#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_GSG -#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable - * (ethtool_value) */ -#endif -#ifndef ETHTOOL_SSG -#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable - * (ethtool_value). */ -#endif -#ifndef ETHTOOL_TEST -#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */ -#endif -#ifndef ETHTOOL_GSTRINGS -#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ -#endif -#ifndef ETHTOOL_PHYS_ID -#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ -#endif -#ifndef ETHTOOL_GSTATS -#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ -#endif -#ifndef ETHTOOL_GTSO -#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_STSO -#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ -#endif - -#ifndef ETHTOOL_BUSINFO_LEN -#define ETHTOOL_BUSINFO_LEN 32 -#endif - -#ifndef RHEL_RELEASE_VERSION -#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b)) -#endif -#ifndef AX_RELEASE_VERSION -#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b)) -#endif - -#ifndef AX_RELEASE_CODE -#define AX_RELEASE_CODE 0 -#endif - -#if (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,0)) -#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,0) -#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,1)) -#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,1) -#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,2)) -#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,3) -#endif - -#ifndef RHEL_RELEASE_CODE -/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */ -#define RHEL_RELEASE_CODE 0 -#endif - -/* SuSE version macro is the same as Linux kernel version */ -#ifndef SLE_VERSION -#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c) -#endif -#ifdef CONFIG_SUSE_KERNEL -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) -/* SLES12SP3 is at least 4.4.57+ based */ -#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) -#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) ) -/* SLES12 is at least 3.12.28+ based */ -#define SLE_VERSION_CODE SLE_VERSION(12,0,0) -#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0))) -/* SLES11 SP3 is at least 3.0.61+ based */ -#define SLE_VERSION_CODE SLE_VERSION(11,3,0) -#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) -/* SLES11 SP1 is 2.6.32 based */ -#define SLE_VERSION_CODE SLE_VERSION(11,1,0) -#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) -/* SLES11 GA is 2.6.27 based */ -#define SLE_VERSION_CODE SLE_VERSION(11,0,0) -#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ -#endif /* CONFIG_SUSE_KERNEL */ -#ifndef SLE_VERSION_CODE -#define SLE_VERSION_CODE 0 -#endif /* SLE_VERSION_CODE */ - -/* Ubuntu release and kernel codes must be specified from Makefile */ -#ifndef UBUNTU_RELEASE_VERSION -#define UBUNTU_RELEASE_VERSION(a,b) (((a) * 100) + (b)) -#endif -#ifndef UBUNTU_KERNEL_VERSION -#define UBUNTU_KERNEL_VERSION(a,b,c,abi,upload) (((a) << 40) + ((b) << 32) + ((c) << 24) + ((abi) << 8) + (upload)) -#endif -#ifndef UBUNTU_RELEASE_CODE -#define UBUNTU_RELEASE_CODE 0 -#endif -#ifndef UBUNTU_KERNEL_CODE -#define UBUNTU_KERNEL_CODE 0 -#endif - -#ifdef __KLOCWORK__ -#ifdef ARRAY_SIZE -#undef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif -#endif /* __KLOCWORK__ */ - -/*****************************************************************************/ -/* 2.4.3 => 2.4.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) - -/**************************************/ -/* PCI DRIVER API */ - -#ifndef pci_set_dma_mask -#define pci_set_dma_mask _kc_pci_set_dma_mask -extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask); -#endif - -#ifndef pci_request_regions -#define pci_request_regions _kc_pci_request_regions -extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name); -#endif - -#ifndef pci_release_regions -#define pci_release_regions _kc_pci_release_regions -extern void _kc_pci_release_regions(struct pci_dev *pdev); -#endif - -/**************************************/ -/* NETWORK DRIVER API */ - -#ifndef alloc_etherdev -#define alloc_etherdev _kc_alloc_etherdev -extern struct net_device * _kc_alloc_etherdev(int sizeof_priv); -#endif - -#ifndef is_valid_ether_addr -#define is_valid_ether_addr _kc_is_valid_ether_addr -extern int _kc_is_valid_ether_addr(u8 *addr); -#endif - -/**************************************/ -/* MISCELLANEOUS */ - -#ifndef INIT_TQUEUE -#define INIT_TQUEUE(_tq, _routine, _data) \ - do { \ - INIT_LIST_HEAD(&(_tq)->list); \ - (_tq)->sync = 0; \ - (_tq)->routine = _routine; \ - (_tq)->data = _data; \ - } while (0) -#endif - -#endif /* 2.4.3 => 2.4.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) ) -/* Generic MII registers. */ -#define MII_BMCR 0x00 /* Basic mode control register */ -#define MII_BMSR 0x01 /* Basic mode status register */ -#define MII_PHYSID1 0x02 /* PHYS ID 1 */ -#define MII_PHYSID2 0x03 /* PHYS ID 2 */ -#define MII_ADVERTISE 0x04 /* Advertisement control reg */ -#define MII_LPA 0x05 /* Link partner ability reg */ -#define MII_EXPANSION 0x06 /* Expansion register */ -/* Basic mode control register. */ -#define BMCR_FULLDPLX 0x0100 /* Full duplex */ -#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */ -/* Basic mode status register. */ -#define BMSR_ERCAP 0x0001 /* Ext-reg capability */ -#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ -#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ -#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ -#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ -#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */ -/* Advertisement control register. */ -#define ADVERTISE_CSMA 0x0001 /* Only selector supported */ -#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ -#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ -#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ -#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ -#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \ - ADVERTISE_100HALF | ADVERTISE_100FULL) -/* Expansion register for auto-negotiation. */ -#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */ -#endif - -/*****************************************************************************/ -/* 2.4.6 => 2.4.3 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) - -#ifndef pci_set_power_state -#define pci_set_power_state _kc_pci_set_power_state -extern int _kc_pci_set_power_state(struct pci_dev *dev, int state); -#endif - -#ifndef pci_enable_wake -#define pci_enable_wake _kc_pci_enable_wake -extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable); -#endif - -#ifndef pci_disable_device -#define pci_disable_device _kc_pci_disable_device -extern void _kc_pci_disable_device(struct pci_dev *pdev); -#endif - -/* PCI PM entry point syntax changed, so don't support suspend/resume */ -#undef CONFIG_PM - -#endif /* 2.4.6 => 2.4.3 */ - -#ifndef HAVE_PCI_SET_MWI -#define pci_set_mwi(X) pci_write_config_word(X, \ - PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \ - PCI_COMMAND_INVALIDATE); -#define pci_clear_mwi(X) pci_write_config_word(X, \ - PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \ - ~PCI_COMMAND_INVALIDATE); -#endif - -/*****************************************************************************/ -/* 2.4.10 => 2.4.9 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) ) - -/**************************************/ -/* MODULE API */ - -#ifndef MODULE_LICENSE - #define MODULE_LICENSE(X) -#endif - -/**************************************/ -/* OTHER */ - -#undef min -#define min(x,y) ({ \ - const typeof(x) _x = (x); \ - const typeof(y) _y = (y); \ - (void) (&_x == &_y); \ - _x < _y ? _x : _y; }) - -#undef max -#define max(x,y) ({ \ - const typeof(x) _x = (x); \ - const typeof(y) _y = (y); \ - (void) (&_x == &_y); \ - _x > _y ? _x : _y; }) - -#define min_t(type,x,y) ({ \ - type _x = (x); \ - type _y = (y); \ - _x < _y ? _x : _y; }) - -#define max_t(type,x,y) ({ \ - type _x = (x); \ - type _y = (y); \ - _x > _y ? _x : _y; }) - -#ifndef list_for_each_safe -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) -#endif - -#ifndef ____cacheline_aligned_in_smp -#ifdef CONFIG_SMP -#define ____cacheline_aligned_in_smp ____cacheline_aligned -#else -#define ____cacheline_aligned_in_smp -#endif /* CONFIG_SMP */ -#endif - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) -extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...); -#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args) -extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args); -#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args) -#else /* 2.4.8 => 2.4.9 */ -extern int snprintf(char * buf, size_t size, const char *fmt, ...); -extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); -#endif -#endif /* 2.4.10 -> 2.4.6 */ - - -/*****************************************************************************/ -/* 2.4.12 => 2.4.10 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) ) -#ifndef HAVE_NETIF_MSG -#define HAVE_NETIF_MSG 1 -enum { - NETIF_MSG_DRV = 0x0001, - NETIF_MSG_PROBE = 0x0002, - NETIF_MSG_LINK = 0x0004, - NETIF_MSG_TIMER = 0x0008, - NETIF_MSG_IFDOWN = 0x0010, - NETIF_MSG_IFUP = 0x0020, - NETIF_MSG_RX_ERR = 0x0040, - NETIF_MSG_TX_ERR = 0x0080, - NETIF_MSG_TX_QUEUED = 0x0100, - NETIF_MSG_INTR = 0x0200, - NETIF_MSG_TX_DONE = 0x0400, - NETIF_MSG_RX_STATUS = 0x0800, - NETIF_MSG_PKTDATA = 0x1000, - NETIF_MSG_HW = 0x2000, - NETIF_MSG_WOL = 0x4000, -}; - -#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) -#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) -#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) -#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) -#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) -#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) -#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) -#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) -#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) -#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) -#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) -#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) -#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) -#endif /* !HAVE_NETIF_MSG */ -#endif /* 2.4.12 => 2.4.10 */ - -/*****************************************************************************/ -/* 2.4.13 => 2.4.12 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) - -/**************************************/ -/* PCI DMA MAPPING */ - -#ifndef virt_to_page - #define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT)) -#endif - -#ifndef pci_map_page -#define pci_map_page _kc_pci_map_page -extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction); -#endif - -#ifndef pci_unmap_page -#define pci_unmap_page _kc_pci_unmap_page -extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction); -#endif - -/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */ - -#undef DMA_32BIT_MASK -#define DMA_32BIT_MASK 0xffffffff -#undef DMA_64BIT_MASK -#define DMA_64BIT_MASK 0xffffffff - -/**************************************/ -/* OTHER */ - -#ifndef cpu_relax -#define cpu_relax() rep_nop() -#endif - -struct vlan_ethhdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; - unsigned short h_vlan_proto; - unsigned short h_vlan_TCI; - unsigned short h_vlan_encapsulated_proto; -}; -#endif /* 2.4.13 => 2.4.12 */ - -/*****************************************************************************/ -/* 2.4.17 => 2.4.12 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) ) - -#ifndef __devexit_p - #define __devexit_p(x) &(x) -#endif - -#else - /* For Kernel 3.8 these are not defined - so undefine all */ - #undef __devexit_p - #undef __devexit - #undef __devinit - #undef __devinitdata - #define __devexit_p(x) &(x) - #define __devexit - #define __devinit - #define __devinitdata - -#endif /* 2.4.17 => 2.4.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) ) -#define NETIF_MSG_HW 0x2000 -#define NETIF_MSG_WOL 0x4000 - -#ifndef netif_msg_hw -#define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) -#endif -#ifndef netif_msg_wol -#define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) -#endif -#endif /* 2.4.18 */ - -/*****************************************************************************/ - -/*****************************************************************************/ -/* 2.4.20 => 2.4.19 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) ) - -/* we won't support NAPI on less than 2.4.20 */ -#ifdef NAPI -#undef NAPI -#endif - -#endif /* 2.4.20 => 2.4.19 */ - -/*****************************************************************************/ -/* 2.4.22 => 2.4.17 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) -#define pci_name(x) ((x)->slot_name) - -#ifndef SUPPORTED_10000baseT_Full -#define SUPPORTED_10000baseT_Full (1 << 12) -#endif -#ifndef ADVERTISED_10000baseT_Full -#define ADVERTISED_10000baseT_Full (1 << 12) -#endif -#endif - -/*****************************************************************************/ -/* 2.4.22 => 2.4.17 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) -#ifndef IGB_NO_LRO -#define IGB_NO_LRO -#endif -#endif - -/*****************************************************************************/ -/*****************************************************************************/ -/* 2.4.23 => 2.4.22 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) ) -/*****************************************************************************/ -#ifdef NAPI -#ifndef netif_poll_disable -#define netif_poll_disable(x) _kc_netif_poll_disable(x) -static inline void _kc_netif_poll_disable(struct net_device *netdev) -{ - while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) { - /* No hurry */ - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } -} -#endif -#ifndef netif_poll_enable -#define netif_poll_enable(x) _kc_netif_poll_enable(x) -static inline void _kc_netif_poll_enable(struct net_device *netdev) -{ - clear_bit(__LINK_STATE_RX_SCHED, &netdev->state); -} -#endif -#endif /* NAPI */ -#ifndef netif_tx_disable -#define netif_tx_disable(x) _kc_netif_tx_disable(x) -static inline void _kc_netif_tx_disable(struct net_device *dev) -{ - spin_lock_bh(&dev->xmit_lock); - netif_stop_queue(dev); - spin_unlock_bh(&dev->xmit_lock); -} -#endif -#else /* 2.4.23 => 2.4.22 */ -#define HAVE_SCTP -#endif /* 2.4.23 => 2.4.22 */ - -/*****************************************************************************/ -/* 2.6.4 => 2.6.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \ - ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ - LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) ) -#define ETHTOOL_OPS_COMPAT -#endif /* 2.6.4 => 2.6.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) -#define __user -#endif /* < 2.4.27 */ - -/*****************************************************************************/ -/* 2.5.71 => 2.4.x */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) ) -#define sk_protocol protocol -#define pci_get_device pci_find_device -#endif /* 2.5.70 => 2.4.x */ - -/*****************************************************************************/ -/* < 2.4.27 or 2.6.0 <= 2.6.5 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \ - ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ - LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) ) - -#ifndef netif_msg_init -#define netif_msg_init _kc_netif_msg_init -static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits) -{ - /* use default */ - if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) - return default_msg_enable_bits; - if (debug_value == 0) /* no output */ - return 0; - /* set low N bits */ - return (1 << debug_value) -1; -} -#endif - -#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */ -/*****************************************************************************/ -#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \ - (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \ - ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) ))) -#define netdev_priv(x) x->priv -#endif - -/*****************************************************************************/ -/* <= 2.5.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) ) -#include -#undef pci_register_driver -#define pci_register_driver pci_module_init - -/* - * Most of the dma compat code is copied/modified from the 2.4.37 - * /include/linux/libata-compat.h header file - */ -/* These definitions mirror those in pci.h, so they can be used - * interchangeably with their PCI_ counterparts */ -enum dma_data_direction { - DMA_BIDIRECTIONAL = 0, - DMA_TO_DEVICE = 1, - DMA_FROM_DEVICE = 2, - DMA_NONE = 3, -}; - -struct device { - struct pci_dev pdev; -}; - -static inline struct pci_dev *to_pci_dev (struct device *dev) -{ - return (struct pci_dev *) dev; -} -static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) -{ - return (struct device *) pdev; -} - -#define pdev_printk(lvl, pdev, fmt, args...) \ - printk("%s %s: " fmt, lvl, pci_name(pdev), ## args) -#define dev_err(dev, fmt, args...) \ - pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args) -#define dev_info(dev, fmt, args...) \ - pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args) -#define dev_warn(dev, fmt, args...) \ - pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args) -#define dev_notice(dev, fmt, args...) \ - pdev_printk(KERN_NOTICE, to_pci_dev(dev), fmt, ## args) -#define dev_dbg(dev, fmt, args...) \ - pdev_printk(KERN_DEBUG, to_pci_dev(dev), fmt, ## args) - -/* NOTE: dangerous! we ignore the 'gfp' argument */ -#define dma_alloc_coherent(dev,sz,dma,gfp) \ - pci_alloc_consistent(to_pci_dev(dev),(sz),(dma)) -#define dma_free_coherent(dev,sz,addr,dma_addr) \ - pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr)) - -#define dma_map_page(dev,a,b,c,d) \ - pci_map_page(to_pci_dev(dev),(a),(b),(c),(d)) -#define dma_unmap_page(dev,a,b,c) \ - pci_unmap_page(to_pci_dev(dev),(a),(b),(c)) - -#define dma_map_single(dev,a,b,c) \ - pci_map_single(to_pci_dev(dev),(a),(b),(c)) -#define dma_unmap_single(dev,a,b,c) \ - pci_unmap_single(to_pci_dev(dev),(a),(b),(c)) - -#define dma_map_sg(dev, sg, nents, dir) \ - pci_map_sg(to_pci_dev(dev), (sg), (nents), (dir) -#define dma_unmap_sg(dev, sg, nents, dir) \ - pci_unmap_sg(to_pci_dev(dev), (sg), (nents), (dir) - -#define dma_sync_single(dev,a,b,c) \ - pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c)) - -/* for range just sync everything, that's all the pci API can do */ -#define dma_sync_single_range(dev,addr,off,sz,dir) \ - pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir)) - -#define dma_set_mask(dev,mask) \ - pci_set_dma_mask(to_pci_dev(dev),(mask)) - -/* hlist_* code - double linked lists */ -struct hlist_head { - struct hlist_node *first; -}; - -struct hlist_node { - struct hlist_node *next, **pprev; -}; - -static inline void __hlist_del(struct hlist_node *n) -{ - struct hlist_node *next = n->next; - struct hlist_node **pprev = n->pprev; - *pprev = next; - if (next) - next->pprev = pprev; -} - -static inline void hlist_del(struct hlist_node *n) -{ - __hlist_del(n); - n->next = NULL; - n->pprev = NULL; -} - -static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - if (first) - first->pprev = &n->next; - h->first = n; - n->pprev = &h->first; -} - -static inline int hlist_empty(const struct hlist_head *h) -{ - return !h->first; -} -#define HLIST_HEAD_INIT { .first = NULL } -#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } -#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) -static inline void INIT_HLIST_NODE(struct hlist_node *h) -{ - h->next = NULL; - h->pprev = NULL; -} - -#ifndef might_sleep -#define might_sleep() -#endif -#else -static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) -{ - return &pdev->dev; -} -#endif /* <= 2.5.0 */ - -/*****************************************************************************/ -/* 2.5.28 => 2.4.23 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) ) - -#include -#define work_struct tq_struct -#undef INIT_WORK -#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a) -#undef container_of -#define container_of list_entry -#define schedule_work schedule_task -#define flush_scheduled_work flush_scheduled_tasks -#define cancel_work_sync(x) flush_scheduled_work() - -#endif /* 2.5.28 => 2.4.17 */ - -/*****************************************************************************/ -/* 2.6.0 => 2.5.28 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -#ifndef read_barrier_depends -#define read_barrier_depends() rmb() -#endif - -#undef get_cpu -#define get_cpu() smp_processor_id() -#undef put_cpu -#define put_cpu() do { } while(0) -#define MODULE_INFO(version, _version) -#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT -#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1 -#endif -#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT -#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1 -#endif - -#define dma_set_coherent_mask(dev,mask) 1 - -#undef dev_put -#define dev_put(dev) __dev_put(dev) - -#ifndef skb_fill_page_desc -#define skb_fill_page_desc _kc_skb_fill_page_desc -extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size); -#endif - -#undef ALIGN -#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) - -#ifndef page_count -#define page_count(p) atomic_read(&(p)->count) -#endif - -#ifdef MAX_NUMNODES -#undef MAX_NUMNODES -#endif -#define MAX_NUMNODES 1 - -/* find_first_bit and find_next bit are not defined for most - * 2.4 kernels (except for the redhat 2.4.21 kernels - */ -#include -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) -#undef find_next_bit -#define find_next_bit _kc_find_next_bit -extern unsigned long _kc_find_next_bit(const unsigned long *addr, - unsigned long size, - unsigned long offset); -#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) - - -#ifndef netdev_name -static inline const char *_kc_netdev_name(const struct net_device *dev) -{ - if (strchr(dev->name, '%')) - return "(unregistered net_device)"; - return dev->name; -} -#define netdev_name(netdev) _kc_netdev_name(netdev) -#endif /* netdev_name */ - -#ifndef strlcpy -#define strlcpy _kc_strlcpy -extern size_t _kc_strlcpy(char *dest, const char *src, size_t size); -#endif /* strlcpy */ - -#ifndef do_div -#if BITS_PER_LONG == 64 -# define do_div(n,base) ({ \ - uint32_t __base = (base); \ - uint32_t __rem; \ - __rem = ((uint64_t)(n)) % __base; \ - (n) = ((uint64_t)(n)) / __base; \ - __rem; \ - }) -#elif BITS_PER_LONG == 32 -extern uint32_t _kc__div64_32(uint64_t *dividend, uint32_t divisor); -# define do_div(n,base) ({ \ - uint32_t __base = (base); \ - uint32_t __rem; \ - if (likely(((n) >> 32) == 0)) { \ - __rem = (uint32_t)(n) % __base; \ - (n) = (uint32_t)(n) / __base; \ - } else \ - __rem = _kc__div64_32(&(n), __base); \ - __rem; \ - }) -#else /* BITS_PER_LONG == ?? */ -# error do_div() does not yet support the C64 -#endif /* BITS_PER_LONG */ -#endif /* do_div */ - -#ifndef NSEC_PER_SEC -#define NSEC_PER_SEC 1000000000L -#endif - -#undef HAVE_I2C_SUPPORT -#else /* 2.6.0 */ -#if IS_ENABLED(CONFIG_I2C_ALGOBIT) && \ - (RHEL_RELEASE_CODE && (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,9))) -#define HAVE_I2C_SUPPORT -#endif /* IS_ENABLED(CONFIG_I2C_ALGOBIT) */ - -#endif /* 2.6.0 => 2.5.28 */ -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) ) -#define dma_pool pci_pool -#define dma_pool_destroy pci_pool_destroy -#define dma_pool_alloc pci_pool_alloc -#define dma_pool_free pci_pool_free - -#define dma_pool_create(name,dev,size,align,allocation) \ - pci_pool_create((name),to_pci_dev(dev),(size),(align),(allocation)) -#endif /* < 2.6.3 */ - -/*****************************************************************************/ -/* 2.6.4 => 2.6.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) -#define MODULE_VERSION(_version) MODULE_INFO(version, _version) -#endif /* 2.6.4 => 2.6.0 */ - -/*****************************************************************************/ -/* 2.6.5 => 2.6.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) -#define dma_sync_single_for_cpu dma_sync_single -#define dma_sync_single_for_device dma_sync_single -#define dma_sync_single_range_for_cpu dma_sync_single_range -#define dma_sync_single_range_for_device dma_sync_single_range -#ifndef pci_dma_mapping_error -#define pci_dma_mapping_error _kc_pci_dma_mapping_error -static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr) -{ - return dma_addr == 0; -} -#endif -#endif /* 2.6.5 => 2.6.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) -extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...); -#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args) -#endif /* < 2.6.4 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) ) -/* taken from 2.6 include/linux/bitmap.h */ -#undef bitmap_zero -#define bitmap_zero _kc_bitmap_zero -static inline void _kc_bitmap_zero(unsigned long *dst, int nbits) -{ - if (nbits <= BITS_PER_LONG) - *dst = 0UL; - else { - int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); - memset(dst, 0, len); - } -} -#define random_ether_addr _kc_random_ether_addr -static inline void _kc_random_ether_addr(u8 *addr) -{ - get_random_bytes(addr, ETH_ALEN); - addr[0] &= 0xfe; /* clear multicast */ - addr[0] |= 0x02; /* set local assignment */ -} -#define page_to_nid(x) 0 - -#endif /* < 2.6.6 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) ) -#undef if_mii -#define if_mii _kc_if_mii -static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq) -{ - return (struct mii_ioctl_data *) &rq->ifr_ifru; -} - -#ifndef __force -#define __force -#endif -#endif /* < 2.6.7 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) -#ifndef PCI_EXP_DEVCTL -#define PCI_EXP_DEVCTL 8 -#endif -#ifndef PCI_EXP_DEVCTL_CERE -#define PCI_EXP_DEVCTL_CERE 0x0001 -#endif -#define PCI_EXP_FLAGS 2 /* Capabilities register */ -#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ -#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ -#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ -#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ -#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ -#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ -#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ -#define PCI_EXP_DEVCAP 4 /* Device capabilities */ -#define PCI_EXP_DEVSTA 10 /* Device Status */ -#define msleep(x) do { set_current_state(TASK_UNINTERRUPTIBLE); \ - schedule_timeout((x * HZ)/1000 + 2); \ - } while (0) - -#endif /* < 2.6.8 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) -#include -#define __iomem - -#ifndef kcalloc -#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags) -extern void *_kc_kzalloc(size_t size, int flags); -#endif -#define MSEC_PER_SEC 1000L -static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j) -{ -#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) - return (MSEC_PER_SEC / HZ) * j; -#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) - return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); -#else - return (j * MSEC_PER_SEC) / HZ; -#endif -} -static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m) -{ - if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET)) - return MAX_JIFFY_OFFSET; -#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) - return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); -#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) - return m * (HZ / MSEC_PER_SEC); -#else - return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; -#endif -} - -#define msleep_interruptible _kc_msleep_interruptible -static inline unsigned long _kc_msleep_interruptible(unsigned int msecs) -{ - unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1; - - while (timeout && !signal_pending(current)) { - __set_current_state(TASK_INTERRUPTIBLE); - timeout = schedule_timeout(timeout); - } - return _kc_jiffies_to_msecs(timeout); -} - -/* Basic mode control register. */ -#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */ - -#ifndef __le16 -#define __le16 u16 -#endif -#ifndef __le32 -#define __le32 u32 -#endif -#ifndef __le64 -#define __le64 u64 -#endif -#ifndef __be16 -#define __be16 u16 -#endif -#ifndef __be32 -#define __be32 u32 -#endif -#ifndef __be64 -#define __be64 u64 -#endif - -static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) -{ - return (struct vlan_ethhdr *)skb->mac.raw; -} - -/* Wake-On-Lan options. */ -#define WAKE_PHY (1 << 0) -#define WAKE_UCAST (1 << 1) -#define WAKE_MCAST (1 << 2) -#define WAKE_BCAST (1 << 3) -#define WAKE_ARP (1 << 4) -#define WAKE_MAGIC (1 << 5) -#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ - -#define skb_header_pointer _kc_skb_header_pointer -static inline void *_kc_skb_header_pointer(const struct sk_buff *skb, - int offset, int len, void *buffer) -{ - int hlen = skb_headlen(skb); - - if (hlen - offset >= len) - return skb->data + offset; - -#ifdef MAX_SKB_FRAGS - if (skb_copy_bits(skb, offset, buffer, len) < 0) - return NULL; - - return buffer; -#else - return NULL; -#endif - -#ifndef NETDEV_TX_OK -#define NETDEV_TX_OK 0 -#endif -#ifndef NETDEV_TX_BUSY -#define NETDEV_TX_BUSY 1 -#endif -#ifndef NETDEV_TX_LOCKED -#define NETDEV_TX_LOCKED -1 -#endif -} - -#ifndef __bitwise -#define __bitwise -#endif -#endif /* < 2.6.9 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) -#ifdef module_param_array_named -#undef module_param_array_named -#define module_param_array_named(name, array, type, nump, perm) \ - static struct kparam_array __param_arr_##name \ - = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \ - sizeof(array[0]), array }; \ - module_param_call(name, param_array_set, param_array_get, \ - &__param_arr_##name, perm) -#endif /* module_param_array_named */ -/* - * num_online is broken for all < 2.6.10 kernels. This is needed to support - * Node module parameter of ixgbe. - */ -#undef num_online_nodes -#define num_online_nodes(n) 1 -extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES); -#undef node_online_map -#define node_online_map _kcompat_node_online_map -#define pci_get_class pci_find_class -#endif /* < 2.6.10 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) ) -#define PCI_D0 0 -#define PCI_D1 1 -#define PCI_D2 2 -#define PCI_D3hot 3 -#define PCI_D3cold 4 -typedef int pci_power_t; -#define pci_choose_state(pdev,state) state -#define PMSG_SUSPEND 3 -#define PCI_EXP_LNKCTL 16 - -#undef NETIF_F_LLTX - -#ifndef ARCH_HAS_PREFETCH -#define prefetch(X) -#endif - -#ifndef NET_IP_ALIGN -#define NET_IP_ALIGN 2 -#endif - -#define KC_USEC_PER_SEC 1000000L -#define usecs_to_jiffies _kc_usecs_to_jiffies -static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j) -{ -#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) - return (KC_USEC_PER_SEC / HZ) * j; -#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) - return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC); -#else - return (j * KC_USEC_PER_SEC) / HZ; -#endif -} -static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m) -{ - if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET)) - return MAX_JIFFY_OFFSET; -#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) - return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ); -#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) - return m * (HZ / KC_USEC_PER_SEC); -#else - return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC; -#endif -} - -#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ -#define PCI_EXP_LNKSTA 18 /* Link Status */ -#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ -#define PCI_EXP_SLTCTL 24 /* Slot Control */ -#define PCI_EXP_SLTSTA 26 /* Slot Status */ -#define PCI_EXP_RTCTL 28 /* Root Control */ -#define PCI_EXP_RTCAP 30 /* Root Capabilities */ -#define PCI_EXP_RTSTA 32 /* Root Status */ -#endif /* < 2.6.11 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) ) -#include -#define USE_REBOOT_NOTIFIER - -/* Generic MII registers. */ -#define MII_CTRL1000 0x09 /* 1000BASE-T control */ -#define MII_STAT1000 0x0a /* 1000BASE-T status */ -/* Advertisement control register. */ -#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ -#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymmetric pause */ -/* Link partner ability register. */ -#define LPA_PAUSE_CAP 0x0400 /* Can pause */ -#define LPA_PAUSE_ASYM 0x0800 /* Can pause asymetrically */ -/* 1000BASE-T Control register */ -#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ -#define ADVERTISE_1000HALF 0x0100 /* Advertise 1000BASE-T half duplex */ -/* 1000BASE-T Status register */ -#define LPA_1000LOCALRXOK 0x2000 /* Link partner local receiver status */ -#define LPA_1000REMRXOK 0x1000 /* Link partner remote receiver status */ - -#ifndef is_zero_ether_addr -#define is_zero_ether_addr _kc_is_zero_ether_addr -static inline int _kc_is_zero_ether_addr(const u8 *addr) -{ - return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); -} -#endif /* is_zero_ether_addr */ -#ifndef is_multicast_ether_addr -#define is_multicast_ether_addr _kc_is_multicast_ether_addr -static inline int _kc_is_multicast_ether_addr(const u8 *addr) -{ - return addr[0] & 0x01; -} -#endif /* is_multicast_ether_addr */ -#endif /* < 2.6.12 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) -#ifndef kstrdup -#define kstrdup _kc_kstrdup -extern char *_kc_kstrdup(const char *s, unsigned int gfp); -#endif -#endif /* < 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) -#define pm_message_t u32 -#ifndef kzalloc -#define kzalloc _kc_kzalloc -extern void *_kc_kzalloc(size_t size, int flags); -#endif - -/* Generic MII registers. */ -#define MII_ESTATUS 0x0f /* Extended Status */ -/* Basic mode status register. */ -#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ -/* Extended status register. */ -#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */ -#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */ - -#define SUPPORTED_Pause (1 << 13) -#define SUPPORTED_Asym_Pause (1 << 14) -#define ADVERTISED_Pause (1 << 13) -#define ADVERTISED_Asym_Pause (1 << 14) - -#if (!(RHEL_RELEASE_CODE && \ - (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)))) -#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t)) -#define gfp_t unsigned -#else -typedef unsigned gfp_t; -#endif -#endif /* !RHEL4.3->RHEL5.0 */ - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) ) -#ifdef CONFIG_X86_64 -#define dma_sync_single_range_for_cpu(dev, addr, off, sz, dir) \ - dma_sync_single_for_cpu((dev), (addr), (off) + (sz), (dir)) -#define dma_sync_single_range_for_device(dev, addr, off, sz, dir) \ - dma_sync_single_for_device((dev), (addr), (off) + (sz), (dir)) -#endif -#endif -#endif /* < 2.6.14 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) ) -#ifndef vmalloc_node -#define vmalloc_node(a,b) vmalloc(a) -#endif /* vmalloc_node*/ - -#define setup_timer(_timer, _function, _data) \ -do { \ - (_timer)->function = _function; \ - (_timer)->data = _data; \ - init_timer(_timer); \ -} while (0) -#ifndef device_can_wakeup -#define device_can_wakeup(dev) (1) -#endif -#ifndef device_set_wakeup_enable -#define device_set_wakeup_enable(dev, val) do{}while(0) -#endif -#ifndef device_init_wakeup -#define device_init_wakeup(dev,val) do {} while (0) -#endif -static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2) -{ - const u16 *a = (const u16 *) addr1; - const u16 *b = (const u16 *) addr2; - - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; -} -#undef compare_ether_addr -#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2) -#endif /* < 2.6.15 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) ) -#undef DEFINE_MUTEX -#define DEFINE_MUTEX(x) DECLARE_MUTEX(x) -#define mutex_lock(x) down_interruptible(x) -#define mutex_unlock(x) up(x) - -#ifndef ____cacheline_internodealigned_in_smp -#ifdef CONFIG_SMP -#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp -#else -#define ____cacheline_internodealigned_in_smp -#endif /* CONFIG_SMP */ -#endif /* ____cacheline_internodealigned_in_smp */ -#undef HAVE_PCI_ERS -#else /* 2.6.16 and above */ -#undef HAVE_PCI_ERS -#define HAVE_PCI_ERS -#if ( SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(10,4,0) ) -#ifdef device_can_wakeup -#undef device_can_wakeup -#endif /* device_can_wakeup */ -#define device_can_wakeup(dev) 1 -#endif /* SLE_VERSION(10,4,0) */ -#endif /* < 2.6.16 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) ) -#ifndef dev_notice -#define dev_notice(dev, fmt, args...) \ - dev_printk(KERN_NOTICE, dev, fmt, ## args) -#endif - -#ifndef first_online_node -#define first_online_node 0 -#endif -#ifndef NET_SKB_PAD -#define NET_SKB_PAD 16 -#endif -#endif /* < 2.6.17 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ) - -#ifndef IRQ_HANDLED -#define irqreturn_t void -#define IRQ_HANDLED -#define IRQ_NONE -#endif - -#ifndef IRQF_PROBE_SHARED -#ifdef SA_PROBEIRQ -#define IRQF_PROBE_SHARED SA_PROBEIRQ -#else -#define IRQF_PROBE_SHARED 0 -#endif -#endif - -#ifndef IRQF_SHARED -#define IRQF_SHARED SA_SHIRQ -#endif - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - -#ifndef FIELD_SIZEOF -#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) -#endif - -#ifndef skb_is_gso -#ifdef NETIF_F_TSO -#define skb_is_gso _kc_skb_is_gso -static inline int _kc_skb_is_gso(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->gso_size; -} -#else -#define skb_is_gso(a) 0 -#endif -#endif - -#ifndef resource_size_t -#define resource_size_t unsigned long -#endif - -#ifdef skb_pad -#undef skb_pad -#endif -#define skb_pad(x,y) _kc_skb_pad(x, y) -int _kc_skb_pad(struct sk_buff *skb, int pad); -#ifdef skb_padto -#undef skb_padto -#endif -#define skb_padto(x,y) _kc_skb_padto(x, y) -static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len) -{ - unsigned int size = skb->len; - if(likely(size >= len)) - return 0; - return _kc_skb_pad(skb, len - size); -} - -#ifndef DECLARE_PCI_UNMAP_ADDR -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ - dma_addr_t ADDR_NAME -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ - u32 LEN_NAME -#define pci_unmap_addr(PTR, ADDR_NAME) \ - ((PTR)->ADDR_NAME) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ - (((PTR)->ADDR_NAME) = (VAL)) -#define pci_unmap_len(PTR, LEN_NAME) \ - ((PTR)->LEN_NAME) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ - (((PTR)->LEN_NAME) = (VAL)) -#endif /* DECLARE_PCI_UNMAP_ADDR */ -#endif /* < 2.6.18 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) - -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,0))) -#define i_private u.generic_ip -#endif /* >= RHEL 5.0 */ - -#ifndef DIV_ROUND_UP -#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) -#endif -#ifndef __ALIGN_MASK -#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) -#endif -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) ) -#if (!((RHEL_RELEASE_CODE && \ - ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \ - RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \ - (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0)))))) -typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *); -#endif -#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) -#undef CONFIG_INET_LRO -#undef CONFIG_INET_LRO_MODULE -#ifdef IXGBE_FCOE -#undef CONFIG_FCOE -#undef CONFIG_FCOE_MODULE -#endif /* IXGBE_FCOE */ -#endif -typedef irqreturn_t (*new_handler_t)(int, void*); -static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) -#else /* 2.4.x */ -typedef void (*irq_handler_t)(int, void*, struct pt_regs *); -typedef void (*new_handler_t)(int, void*); -static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) -#endif /* >= 2.5.x */ -{ - irq_handler_t new_handler = (irq_handler_t) handler; - return request_irq(irq, new_handler, flags, devname, dev_id); -} - -#undef request_irq -#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id)) - -#define irq_handler_t new_handler_t -/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */ -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) -#define PCIE_CONFIG_SPACE_LEN 256 -#define PCI_CONFIG_SPACE_LEN 64 -#define PCIE_LINK_STATUS 0x12 -#define pci_config_space_ich8lan() do {} while(0) -#undef pci_save_state -extern int _kc_pci_save_state(struct pci_dev *); -#define pci_save_state(pdev) _kc_pci_save_state(pdev) -#undef pci_restore_state -extern void _kc_pci_restore_state(struct pci_dev *); -#define pci_restore_state(pdev) _kc_pci_restore_state(pdev) -#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ - -#ifdef HAVE_PCI_ERS -#undef free_netdev -extern void _kc_free_netdev(struct net_device *); -#define free_netdev(netdev) _kc_free_netdev(netdev) -#endif -static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) -{ - return 0; -} -#define pci_disable_pcie_error_reporting(dev) do {} while (0) -#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0) - -extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp); -#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp) -#ifndef bool -#define bool _Bool -#define true 1 -#define false 0 -#endif -#else /* 2.6.19 */ -#include -#include -#endif /* < 2.6.19 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) ) -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) ) -#undef INIT_WORK -#define INIT_WORK(_work, _func) \ -do { \ - INIT_LIST_HEAD(&(_work)->entry); \ - (_work)->pending = 0; \ - (_work)->func = (void (*)(void *))_func; \ - (_work)->data = _work; \ - init_timer(&(_work)->timer); \ -} while (0) -#endif - -#ifndef PCI_VDEVICE -#define PCI_VDEVICE(ven, dev) \ - PCI_VENDOR_ID_##ven, (dev), \ - PCI_ANY_ID, PCI_ANY_ID, 0, 0 -#endif - -#ifndef PCI_VENDOR_ID_INTEL -#define PCI_VENDOR_ID_INTEL 0x8086 -#endif - -#ifndef round_jiffies -#define round_jiffies(x) x -#endif - -#define csum_offset csum - -#define HAVE_EARLY_VMALLOC_NODE -#define dev_to_node(dev) -1 -#undef set_dev_node -/* remove compiler warning with b=b, for unused variable */ -#define set_dev_node(a, b) do { (b) = (b); } while(0) - -#if (!(RHEL_RELEASE_CODE && \ - (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \ - !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) -typedef __u16 __bitwise __sum16; -typedef __u32 __bitwise __wsum; -#endif - -#if (!(RHEL_RELEASE_CODE && \ - (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \ - !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) -static inline __wsum csum_unfold(__sum16 n) -{ - return (__force __wsum)n; -} -#endif - -#else /* < 2.6.20 */ -#define HAVE_DEVICE_NUMA_NODE -#endif /* < 2.6.20 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) -#define to_net_dev(class) container_of(class, struct net_device, class_dev) -#define NETDEV_CLASS_DEV -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5))) -#define vlan_group_get_device(vg, id) (vg->vlan_devices[id]) -#define vlan_group_set_device(vg, id, dev) \ - do { \ - if (vg) vg->vlan_devices[id] = dev; \ - } while (0) -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */ -#define pci_channel_offline(pdev) (pdev->error_state && \ - pdev->error_state != pci_channel_io_normal) -#define pci_request_selected_regions(pdev, bars, name) \ - pci_request_regions(pdev, name) -#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev); - -#ifndef __aligned -#define __aligned(x) __attribute__((aligned(x))) -#endif - -extern struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev); -#define netdev_to_dev(netdev) \ - pci_dev_to_dev(_kc_netdev_to_pdev(netdev)) -#else -static inline struct device *netdev_to_dev(struct net_device *netdev) -{ - return &netdev->dev; -} - -#endif /* < 2.6.21 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) -#define tcp_hdr(skb) (skb->h.th) -#define tcp_hdrlen(skb) (skb->h.th->doff << 2) -#define skb_transport_offset(skb) (skb->h.raw - skb->data) -#define skb_transport_header(skb) (skb->h.raw) -#define ipv6_hdr(skb) (skb->nh.ipv6h) -#define ip_hdr(skb) (skb->nh.iph) -#define skb_network_offset(skb) (skb->nh.raw - skb->data) -#define skb_network_header(skb) (skb->nh.raw) -#define skb_tail_pointer(skb) skb->tail -#define skb_reset_tail_pointer(skb) \ - do { \ - skb->tail = skb->data; \ - } while (0) -#define skb_set_tail_pointer(skb, offset) \ - do { \ - skb->tail = skb->data + offset; \ - } while (0) -#define skb_copy_to_linear_data(skb, from, len) \ - memcpy(skb->data, from, len) -#define skb_copy_to_linear_data_offset(skb, offset, from, len) \ - memcpy(skb->data + offset, from, len) -#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw) -#define pci_register_driver pci_module_init -#define skb_mac_header(skb) skb->mac.raw - -#ifdef NETIF_F_MULTI_QUEUE -#ifndef alloc_etherdev_mq -#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a) -#endif -#endif /* NETIF_F_MULTI_QUEUE */ - -#ifndef ETH_FCS_LEN -#define ETH_FCS_LEN 4 -#endif -#define cancel_work_sync(x) flush_scheduled_work() -#ifndef udp_hdr -#define udp_hdr _udp_hdr -static inline struct udphdr *_udp_hdr(const struct sk_buff *skb) -{ - return (struct udphdr *)skb_transport_header(skb); -} -#endif - -#ifdef cpu_to_be16 -#undef cpu_to_be16 -#endif -#define cpu_to_be16(x) __constant_htons(x) - -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1))) -enum { - DUMP_PREFIX_NONE, - DUMP_PREFIX_ADDRESS, - DUMP_PREFIX_OFFSET -}; -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */ -#ifndef hex_asc -#define hex_asc(x) "0123456789abcdef"[x] -#endif -#include -extern void _kc_print_hex_dump(const char *level, const char *prefix_str, - int prefix_type, int rowsize, int groupsize, - const void *buf, size_t len, bool ascii); -#define print_hex_dump(lvl, s, t, r, g, b, l, a) \ - _kc_print_hex_dump(lvl, s, t, r, g, b, l, a) -#ifndef ADVERTISED_2500baseX_Full -#define ADVERTISED_2500baseX_Full (1 << 15) -#endif -#ifndef SUPPORTED_2500baseX_Full -#define SUPPORTED_2500baseX_Full (1 << 15) -#endif - -#ifdef HAVE_I2C_SUPPORT -#include -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5))) -struct i2c_board_info { - char driver_name[KOBJ_NAME_LEN]; - char type[I2C_NAME_SIZE]; - unsigned short flags; - unsigned short addr; - void *platform_data; -}; -#define I2C_BOARD_INFO(driver, dev_addr) .driver_name = (driver),\ - .addr = (dev_addr) -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */ -#define i2c_new_device(adap, info) _kc_i2c_new_device(adap, info) -extern struct i2c_client * -_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info); -#endif /* HAVE_I2C_SUPPORT */ - -#else /* 2.6.22 */ -#define ETH_TYPE_TRANS_SETS_DEV -#define HAVE_NETDEV_STATS_IN_NETDEV -#endif /* < 2.6.22 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) ) -#undef SET_MODULE_OWNER -#define SET_MODULE_OWNER(dev) do { } while (0) -#endif /* > 2.6.22 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) ) -#define netif_subqueue_stopped(_a, _b) 0 -#ifndef PTR_ALIGN -#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) -#endif - -#ifndef CONFIG_PM_SLEEP -#define CONFIG_PM_SLEEP CONFIG_PM -#endif - -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) ) -#define HAVE_ETHTOOL_GET_PERM_ADDR -#endif /* 2.6.14 through 2.6.22 */ -#endif /* < 2.6.23 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) -#ifndef ETH_FLAG_LRO -#define ETH_FLAG_LRO NETIF_F_LRO -#endif - -/* if GRO is supported then the napi struct must already exist */ -#ifndef NETIF_F_GRO -/* NAPI API changes in 2.6.24 break everything */ -struct napi_struct { - /* used to look up the real NAPI polling routine */ - int (*poll)(struct napi_struct *, int); - struct net_device *dev; - int weight; -}; -#endif - -#ifdef NAPI -extern int __kc_adapter_clean(struct net_device *, int *); -extern struct net_device *napi_to_poll_dev(const struct napi_struct *napi); -#define netif_napi_add(_netdev, _napi, _poll, _weight) \ - do { \ - struct napi_struct *__napi = (_napi); \ - struct net_device *poll_dev = napi_to_poll_dev(__napi); \ - poll_dev->poll = &(__kc_adapter_clean); \ - poll_dev->priv = (_napi); \ - poll_dev->weight = (_weight); \ - set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \ - set_bit(__LINK_STATE_START, &poll_dev->state);\ - dev_hold(poll_dev); \ - __napi->poll = &(_poll); \ - __napi->weight = (_weight); \ - __napi->dev = (_netdev); \ - } while (0) -#define netif_napi_del(_napi) \ - do { \ - struct net_device *poll_dev = napi_to_poll_dev(_napi); \ - WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \ - dev_put(poll_dev); \ - memset(poll_dev, 0, sizeof(struct net_device));\ - } while (0) -#define napi_schedule_prep(_napi) \ - (netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi))) -#define napi_schedule(_napi) \ - do { \ - if (napi_schedule_prep(_napi)) \ - __netif_rx_schedule(napi_to_poll_dev(_napi)); \ - } while (0) -#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi)) -#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi)) -#ifdef CONFIG_SMP -static inline void napi_synchronize(const struct napi_struct *n) -{ - struct net_device *dev = napi_to_poll_dev(n); - - while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { - /* No hurry. */ - msleep(1); - } -} -#else -#define napi_synchronize(n) barrier() -#endif /* CONFIG_SMP */ -#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi)) -#ifndef NETIF_F_GRO -#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi)) -#else -#define napi_complete(_napi) \ - do { \ - napi_gro_flush(_napi); \ - netif_rx_complete(napi_to_poll_dev(_napi)); \ - } while (0) -#endif /* NETIF_F_GRO */ -#else /* NAPI */ -#define netif_napi_add(_netdev, _napi, _poll, _weight) \ - do { \ - struct napi_struct *__napi = _napi; \ - _netdev->poll = &(_poll); \ - _netdev->weight = (_weight); \ - __napi->poll = &(_poll); \ - __napi->weight = (_weight); \ - __napi->dev = (_netdev); \ - } while (0) -#define netif_napi_del(_a) do {} while (0) -#endif /* NAPI */ - -#undef dev_get_by_name -#define dev_get_by_name(_a, _b) dev_get_by_name(_b) -#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b) -#ifndef DMA_BIT_MASK -#define DMA_BIT_MASK(n) (((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1)) -#endif - -#ifdef NETIF_F_TSO6 -#define skb_is_gso_v6 _kc_skb_is_gso_v6 -static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; -} -#endif /* NETIF_F_TSO6 */ - -#ifndef KERN_CONT -#define KERN_CONT "" -#endif -#ifndef pr_err -#define pr_err(fmt, arg...) \ - printk(KERN_ERR fmt, ##arg) -#endif -#else /* < 2.6.24 */ -#define HAVE_ETHTOOL_GET_SSET_COUNT -#define HAVE_NETDEV_NAPI_LIST -#endif /* < 2.6.24 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) ) -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) -#include -#else /* >= 3.2.0 */ -#include -#endif /* else >= 3.2.0 */ -#endif /* > 2.6.24 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) ) -#define PM_QOS_CPU_DMA_LATENCY 1 - -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) ) -#include -#define PM_QOS_DEFAULT_VALUE INFINITE_LATENCY -#define pm_qos_add_requirement(pm_qos_class, name, value) \ - set_acceptable_latency(name, value) -#define pm_qos_remove_requirement(pm_qos_class, name) \ - remove_acceptable_latency(name) -#define pm_qos_update_requirement(pm_qos_class, name, value) \ - modify_acceptable_latency(name, value) -#else -#define PM_QOS_DEFAULT_VALUE -1 -#define pm_qos_add_requirement(pm_qos_class, name, value) -#define pm_qos_remove_requirement(pm_qos_class, name) -#define pm_qos_update_requirement(pm_qos_class, name, value) { \ - if (value != PM_QOS_DEFAULT_VALUE) { \ - printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \ - pci_name(adapter->pdev)); \ - } \ -} - -#endif /* > 2.6.18 */ - -#define pci_enable_device_mem(pdev) pci_enable_device(pdev) - -#ifndef DEFINE_PCI_DEVICE_TABLE -#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[] -#endif /* DEFINE_PCI_DEVICE_TABLE */ - - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) -#ifndef IGB_PROCFS -#define IGB_PROCFS -#endif /* IGB_PROCFS */ -#endif /* >= 2.6.0 */ - -#else /* < 2.6.25 */ - - -#if IS_ENABLED(CONFIG_HWMON) -#ifndef IGB_HWMON -#define IGB_HWMON -#endif /* IGB_HWMON */ -#endif /* CONFIG_HWMON */ - -#endif /* < 2.6.25 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) -#ifndef clamp_t -#define clamp_t(type, val, min, max) ({ \ - type __val = (val); \ - type __min = (min); \ - type __max = (max); \ - __val = __val < __min ? __min : __val; \ - __val > __max ? __max : __val; }) -#endif /* clamp_t */ -#undef kzalloc_node -#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags) - -extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state); -#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s) -#else /* < 2.6.26 */ -#include -#define HAVE_NETDEV_VLAN_FEATURES -#ifndef PCI_EXP_LNKCAP_ASPMS -#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ -#endif /* PCI_EXP_LNKCAP_ASPMS */ -#endif /* < 2.6.26 */ -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) -static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep, - __u32 speed) -{ - ep->speed = (__u16)speed; - /* ep->speed_hi = (__u16)(speed >> 16); */ -} -#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set - -static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep) -{ - /* no speed_hi before 2.6.27, and probably no need for it yet */ - return (__u32)ep->speed; -} -#define ethtool_cmd_speed _kc_ethtool_cmd_speed - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) ) -#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM)) -#define ANCIENT_PM 1 -#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \ - defined(CONFIG_PM_SLEEP)) -#define NEWER_PM 1 -#endif -#if defined(ANCIENT_PM) || defined(NEWER_PM) -#undef device_set_wakeup_enable -#define device_set_wakeup_enable(dev, val) \ - do { \ - u16 pmc = 0; \ - int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \ - if (pm) { \ - pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \ - &pmc); \ - } \ - (dev)->power.can_wakeup = !!(pmc >> 11); \ - (dev)->power.should_wakeup = (val && (pmc >> 11)); \ - } while (0) -#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */ -#endif /* 2.6.15 through 2.6.27 */ -#ifndef netif_napi_del -#define netif_napi_del(_a) do {} while (0) -#ifdef NAPI -#ifdef CONFIG_NETPOLL -#undef netif_napi_del -#define netif_napi_del(_a) list_del(&(_a)->dev_list); -#endif -#endif -#endif /* netif_napi_del */ -#ifdef dma_mapping_error -#undef dma_mapping_error -#endif -#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr) - -#ifdef CONFIG_NETDEVICES_MULTIQUEUE -#define HAVE_TX_MQ -#endif - -#ifdef HAVE_TX_MQ -extern void _kc_netif_tx_stop_all_queues(struct net_device *); -extern void _kc_netif_tx_wake_all_queues(struct net_device *); -extern void _kc_netif_tx_start_all_queues(struct net_device *); -#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a) -#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a) -#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a) -#undef netif_stop_subqueue -#define netif_stop_subqueue(_ndev,_qi) do { \ - if (netif_is_multiqueue((_ndev))) \ - netif_stop_subqueue((_ndev), (_qi)); \ - else \ - netif_stop_queue((_ndev)); \ - } while (0) -#undef netif_start_subqueue -#define netif_start_subqueue(_ndev,_qi) do { \ - if (netif_is_multiqueue((_ndev))) \ - netif_start_subqueue((_ndev), (_qi)); \ - else \ - netif_start_queue((_ndev)); \ - } while (0) -#else /* HAVE_TX_MQ */ -#define netif_tx_stop_all_queues(a) netif_stop_queue(a) -#define netif_tx_wake_all_queues(a) netif_wake_queue(a) -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) ) -#define netif_tx_start_all_queues(a) netif_start_queue(a) -#else -#define netif_tx_start_all_queues(a) do {} while (0) -#endif -#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev)) -#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev)) -#endif /* HAVE_TX_MQ */ -#ifndef NETIF_F_MULTI_QUEUE -#define NETIF_F_MULTI_QUEUE 0 -#define netif_is_multiqueue(a) 0 -#define netif_wake_subqueue(a, b) -#endif /* NETIF_F_MULTI_QUEUE */ - -#ifndef __WARN_printf -extern void __kc_warn_slowpath(const char *file, const int line, - const char *fmt, ...) __attribute__((format(printf, 3, 4))); -#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg) -#endif /* __WARN_printf */ - -#ifndef WARN -#define WARN(condition, format...) ({ \ - int __ret_warn_on = !!(condition); \ - if (unlikely(__ret_warn_on)) \ - __WARN_printf(format); \ - unlikely(__ret_warn_on); \ -}) -#endif /* WARN */ -#undef HAVE_IXGBE_DEBUG_FS -#undef HAVE_IGB_DEBUG_FS -#else /* < 2.6.27 */ -#define HAVE_TX_MQ -#define HAVE_NETDEV_SELECT_QUEUE -#ifdef CONFIG_DEBUG_FS -#define HAVE_IXGBE_DEBUG_FS -#define HAVE_IGB_DEBUG_FS -#endif /* CONFIG_DEBUG_FS */ -#endif /* < 2.6.27 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) -#define pci_ioremap_bar(pdev, bar) ioremap(pci_resource_start(pdev, bar), \ - pci_resource_len(pdev, bar)) -#define pci_wake_from_d3 _kc_pci_wake_from_d3 -#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep -extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable); -extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev); -#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC) -#ifndef __skb_queue_head_init -static inline void __kc_skb_queue_head_init(struct sk_buff_head *list) -{ - list->prev = list->next = (struct sk_buff *)list; - list->qlen = 0; -} -#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q) -#endif - -#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ -#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ - -#endif /* < 2.6.28 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) -#ifndef swap -#define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) -#endif -#define pci_request_selected_regions_exclusive(pdev, bars, name) \ - pci_request_selected_regions(pdev, bars, name) -#ifndef CONFIG_NR_CPUS -#define CONFIG_NR_CPUS 1 -#endif /* CONFIG_NR_CPUS */ -#ifndef pcie_aspm_enabled -#define pcie_aspm_enabled() (1) -#endif /* pcie_aspm_enabled */ - -#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ - -#ifndef pci_clear_master -extern void _kc_pci_clear_master(struct pci_dev *dev); -#define pci_clear_master(dev) _kc_pci_clear_master(dev) -#endif - -#ifndef PCI_EXP_LNKCTL_ASPMC -#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ -#endif -#else /* < 2.6.29 */ -#ifndef HAVE_NET_DEVICE_OPS -#define HAVE_NET_DEVICE_OPS -#endif -#ifdef CONFIG_DCB -#define HAVE_PFC_MODE_ENABLE -#endif /* CONFIG_DCB */ -#endif /* < 2.6.29 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) ) -#define skb_rx_queue_recorded(a) false -#define skb_get_rx_queue(a) 0 -#define skb_record_rx_queue(a, b) do {} while (0) -#define skb_tx_hash(n, s) ___kc_skb_tx_hash((n), (s), (n)->real_num_tx_queues) -#ifndef CONFIG_PCI_IOV -#undef pci_enable_sriov -#define pci_enable_sriov(a, b) -ENOTSUPP -#undef pci_disable_sriov -#define pci_disable_sriov(a) do {} while (0) -#endif /* CONFIG_PCI_IOV */ -#ifndef pr_cont -#define pr_cont(fmt, ...) \ - printk(KERN_CONT fmt, ##__VA_ARGS__) -#endif /* pr_cont */ -static inline void _kc_synchronize_irq(unsigned int a) -{ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) ) - synchronize_irq(); -#else /* < 2.5.28 */ - synchronize_irq(a); -#endif /* < 2.5.28 */ -} -#undef synchronize_irq -#define synchronize_irq(a) _kc_synchronize_irq(a) - -#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ - -#else /* < 2.6.30 */ -#define HAVE_ASPM_QUIRKS -#endif /* < 2.6.30 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) ) -#define ETH_P_1588 0x88F7 -#define ETH_P_FIP 0x8914 -#ifndef netdev_uc_count -#define netdev_uc_count(dev) ((dev)->uc_count) -#endif -#ifndef netdev_for_each_uc_addr -#define netdev_for_each_uc_addr(uclist, dev) \ - for (uclist = dev->uc_list; uclist; uclist = uclist->next) -#endif -#ifndef PORT_OTHER -#define PORT_OTHER 0xff -#endif -#ifndef MDIO_PHY_ID_PRTAD -#define MDIO_PHY_ID_PRTAD 0x03e0 -#endif -#ifndef MDIO_PHY_ID_DEVAD -#define MDIO_PHY_ID_DEVAD 0x001f -#endif -#ifndef skb_dst -#define skb_dst(s) ((s)->dst) -#endif - -#ifndef SUPPORTED_1000baseKX_Full -#define SUPPORTED_1000baseKX_Full (1 << 17) -#endif -#ifndef SUPPORTED_10000baseKX4_Full -#define SUPPORTED_10000baseKX4_Full (1 << 18) -#endif -#ifndef SUPPORTED_10000baseKR_Full -#define SUPPORTED_10000baseKR_Full (1 << 19) -#endif - -#ifndef ADVERTISED_1000baseKX_Full -#define ADVERTISED_1000baseKX_Full (1 << 17) -#endif -#ifndef ADVERTISED_10000baseKX4_Full -#define ADVERTISED_10000baseKX4_Full (1 << 18) -#endif -#ifndef ADVERTISED_10000baseKR_Full -#define ADVERTISED_10000baseKR_Full (1 << 19) -#endif - -#else /* < 2.6.31 */ -#ifndef HAVE_NETDEV_STORAGE_ADDRESS -#define HAVE_NETDEV_STORAGE_ADDRESS -#endif -#ifndef HAVE_NETDEV_HW_ADDR -#define HAVE_NETDEV_HW_ADDR -#endif -#ifndef HAVE_TRANS_START_IN_QUEUE -#define HAVE_TRANS_START_IN_QUEUE -#endif -#ifndef HAVE_INCLUDE_LINUX_MDIO_H -#define HAVE_INCLUDE_LINUX_MDIO_H -#endif -#endif /* < 2.6.31 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) ) -#undef netdev_tx_t -#define netdev_tx_t int -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef NETIF_F_FCOE_MTU -#define NETIF_F_FCOE_MTU (1 << 26) -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -static inline int _kc_pm_runtime_get_sync() -{ - return 1; -} -#define pm_runtime_get_sync(dev) _kc_pm_runtime_get_sync() -#else /* 2.6.0 => 2.6.32 */ -static inline int _kc_pm_runtime_get_sync(struct device *dev) -{ - return 1; -} -#ifndef pm_runtime_get_sync -#define pm_runtime_get_sync(dev) _kc_pm_runtime_get_sync(dev) -#endif -#endif /* 2.6.0 => 2.6.32 */ -#ifndef pm_runtime_put -#define pm_runtime_put(dev) do {} while (0) -#endif -#ifndef pm_runtime_put_sync -#define pm_runtime_put_sync(dev) do {} while (0) -#endif -#ifndef pm_runtime_resume -#define pm_runtime_resume(dev) do {} while (0) -#endif -#ifndef pm_schedule_suspend -#define pm_schedule_suspend(dev, t) do {} while (0) -#endif -#ifndef pm_runtime_set_suspended -#define pm_runtime_set_suspended(dev) do {} while (0) -#endif -#ifndef pm_runtime_disable -#define pm_runtime_disable(dev) do {} while (0) -#endif -#ifndef pm_runtime_put_noidle -#define pm_runtime_put_noidle(dev) do {} while (0) -#endif -#ifndef pm_runtime_set_active -#define pm_runtime_set_active(dev) do {} while (0) -#endif -#ifndef pm_runtime_enable -#define pm_runtime_enable(dev) do {} while (0) -#endif -#ifndef pm_runtime_get_noresume -#define pm_runtime_get_noresume(dev) do {} while (0) -#endif -#else /* < 2.6.32 */ -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE -#define HAVE_NETDEV_OPS_FCOE_ENABLE -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ -#ifdef CONFIG_DCB -#ifndef HAVE_DCBNL_OPS_GETAPP -#define HAVE_DCBNL_OPS_GETAPP -#endif -#endif /* CONFIG_DCB */ -#include -/* IOV bad DMA target work arounds require at least this kernel rev support */ -#define HAVE_PCIE_TYPE -#endif /* < 2.6.32 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) ) -#ifndef pci_pcie_cap -#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP) -#endif -#ifndef IPV4_FLOW -#define IPV4_FLOW 0x10 -#endif /* IPV4_FLOW */ -#ifndef IPV6_FLOW -#define IPV6_FLOW 0x11 -#endif /* IPV6_FLOW */ -/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */ -#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \ - (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) ) -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN -#define HAVE_NETDEV_OPS_FCOE_GETWWN -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ -#endif /* RHEL6 or SLES11 SP1 */ -#ifndef __percpu -#define __percpu -#endif /* __percpu */ -#ifndef PORT_DA -#define PORT_DA PORT_OTHER -#endif -#ifndef PORT_NONE -#define PORT_NONE PORT_OTHER -#endif - -#if ((RHEL_RELEASE_CODE && \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))) -#if !defined(CONFIG_X86_32) && !defined(CONFIG_NEED_DMA_MAP_STATE) -#undef DEFINE_DMA_UNMAP_ADDR -#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME -#undef DEFINE_DMA_UNMAP_LEN -#define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME -#undef dma_unmap_addr -#define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) -#undef dma_unmap_addr_set -#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) (((PTR)->ADDR_NAME) = (VAL)) -#undef dma_unmap_len -#define dma_unmap_len(PTR, LEN_NAME) ((PTR)->LEN_NAME) -#undef dma_unmap_len_set -#define dma_unmap_len_set(PTR, LEN_NAME, VAL) (((PTR)->LEN_NAME) = (VAL)) -#endif /* CONFIG_X86_64 && !CONFIG_NEED_DMA_MAP_STATE */ -#endif /* RHEL_RELEASE_CODE */ - -#if (!(RHEL_RELEASE_CODE && \ - (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,8)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))) || \ - ((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))))) -static inline bool pci_is_pcie(struct pci_dev *dev) -{ - return !!pci_pcie_cap(dev); -} -#endif /* RHEL_RELEASE_CODE */ - -#ifndef __always_unused -#define __always_unused __attribute__((__unused__)) -#endif -#ifndef __maybe_unused -#define __maybe_unused __attribute__((__unused__)) -#endif - -#if (!(RHEL_RELEASE_CODE && \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)))) -#define sk_tx_queue_get(_sk) (-1) -#define sk_tx_queue_set(_sk, _tx_queue) do {} while(0) -#endif /* !(RHEL >= 6.2) */ - -#if (RHEL_RELEASE_CODE && \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,4)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))) -#define HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT -#define HAVE_ETHTOOL_SET_PHYS_ID -#define HAVE_ETHTOOL_GET_TS_INFO -#endif /* RHEL >= 6.4 && RHEL < 7.0 */ - -#if (RHEL_RELEASE_CODE && \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))) -#define HAVE_RHEL6_NETDEV_OPS_EXT_FDB -#endif /* RHEL >= 6.5 && RHEL < 7.0 */ - -#else /* < 2.6.33 */ -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN -#define HAVE_NETDEV_OPS_FCOE_GETWWN -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ -#endif /* < 2.6.33 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) -#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) -#ifndef pci_num_vf -#define pci_num_vf(pdev) _kc_pci_num_vf(pdev) -extern int _kc_pci_num_vf(struct pci_dev *dev); -#endif -#endif /* RHEL_RELEASE_CODE */ - -#ifndef ETH_FLAG_NTUPLE -#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE -#endif - -#ifndef netdev_mc_count -#define netdev_mc_count(dev) ((dev)->mc_count) -#endif -#ifndef netdev_mc_empty -#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0) -#endif -#ifndef netdev_for_each_mc_addr -#define netdev_for_each_mc_addr(mclist, dev) \ - for (mclist = dev->mc_list; mclist; mclist = mclist->next) -#endif -#ifndef netdev_uc_count -#define netdev_uc_count(dev) ((dev)->uc.count) -#endif -#ifndef netdev_uc_empty -#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0) -#endif -#ifndef netdev_for_each_uc_addr -#define netdev_for_each_uc_addr(ha, dev) \ - list_for_each_entry(ha, &dev->uc.list, list) -#endif -#ifndef dma_set_coherent_mask -#define dma_set_coherent_mask(dev,mask) \ - pci_set_consistent_dma_mask(to_pci_dev(dev),(mask)) -#endif -#ifndef pci_dev_run_wake -#define pci_dev_run_wake(pdev) (0) -#endif - -/* netdev logging taken from include/linux/netdevice.h */ -#ifndef netdev_name -static inline const char *_kc_netdev_name(const struct net_device *dev) -{ - if (dev->reg_state != NETREG_REGISTERED) - return "(unregistered net_device)"; - return dev->name; -} -#define netdev_name(netdev) _kc_netdev_name(netdev) -#endif /* netdev_name */ - -#undef netdev_printk -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -#define netdev_printk(level, netdev, format, args...) \ -do { \ - struct pci_dev *pdev = _kc_netdev_to_pdev(netdev); \ - printk(level "%s: " format, pci_name(pdev), ##args); \ -} while(0) -#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) -#define netdev_printk(level, netdev, format, args...) \ -do { \ - struct pci_dev *pdev = _kc_netdev_to_pdev(netdev); \ - struct device *dev = pci_dev_to_dev(pdev); \ - dev_printk(level, dev, "%s: " format, \ - netdev_name(netdev), ##args); \ -} while(0) -#else /* 2.6.21 => 2.6.34 */ -#define netdev_printk(level, netdev, format, args...) \ - dev_printk(level, (netdev)->dev.parent, \ - "%s: " format, \ - netdev_name(netdev), ##args) -#endif /* <2.6.0 <2.6.21 <2.6.34 */ -#undef netdev_emerg -#define netdev_emerg(dev, format, args...) \ - netdev_printk(KERN_EMERG, dev, format, ##args) -#undef netdev_alert -#define netdev_alert(dev, format, args...) \ - netdev_printk(KERN_ALERT, dev, format, ##args) -#undef netdev_crit -#define netdev_crit(dev, format, args...) \ - netdev_printk(KERN_CRIT, dev, format, ##args) -#undef netdev_err -#define netdev_err(dev, format, args...) \ - netdev_printk(KERN_ERR, dev, format, ##args) -#undef netdev_warn -#define netdev_warn(dev, format, args...) \ - netdev_printk(KERN_WARNING, dev, format, ##args) -#undef netdev_notice -#define netdev_notice(dev, format, args...) \ - netdev_printk(KERN_NOTICE, dev, format, ##args) -#undef netdev_info -#define netdev_info(dev, format, args...) \ - netdev_printk(KERN_INFO, dev, format, ##args) -#undef netdev_dbg -#if defined(DEBUG) -#define netdev_dbg(__dev, format, args...) \ - netdev_printk(KERN_DEBUG, __dev, format, ##args) -#elif defined(CONFIG_DYNAMIC_DEBUG) -#define netdev_dbg(__dev, format, args...) \ -do { \ - dynamic_dev_dbg((__dev)->dev.parent, "%s: " format, \ - netdev_name(__dev), ##args); \ -} while (0) -#else /* DEBUG */ -#define netdev_dbg(__dev, format, args...) \ -({ \ - if (0) \ - netdev_printk(KERN_DEBUG, __dev, format, ##args); \ - 0; \ -}) -#endif /* DEBUG */ - -#undef netif_printk -#define netif_printk(priv, type, level, dev, fmt, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - netdev_printk(level, (dev), fmt, ##args); \ -} while (0) - -#undef netif_emerg -#define netif_emerg(priv, type, dev, fmt, args...) \ - netif_level(emerg, priv, type, dev, fmt, ##args) -#undef netif_alert -#define netif_alert(priv, type, dev, fmt, args...) \ - netif_level(alert, priv, type, dev, fmt, ##args) -#undef netif_crit -#define netif_crit(priv, type, dev, fmt, args...) \ - netif_level(crit, priv, type, dev, fmt, ##args) -#undef netif_err -#define netif_err(priv, type, dev, fmt, args...) \ - netif_level(err, priv, type, dev, fmt, ##args) -#undef netif_warn -#define netif_warn(priv, type, dev, fmt, args...) \ - netif_level(warn, priv, type, dev, fmt, ##args) -#undef netif_notice -#define netif_notice(priv, type, dev, fmt, args...) \ - netif_level(notice, priv, type, dev, fmt, ##args) -#undef netif_info -#define netif_info(priv, type, dev, fmt, args...) \ - netif_level(info, priv, type, dev, fmt, ##args) -#undef netif_dbg -#define netif_dbg(priv, type, dev, fmt, args...) \ - netif_level(dbg, priv, type, dev, fmt, ##args) - -#ifdef SET_SYSTEM_SLEEP_PM_OPS -#define HAVE_SYSTEM_SLEEP_PM_OPS -#endif - -#ifndef for_each_set_bit -#define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) -#endif /* for_each_set_bit */ - -#ifndef DEFINE_DMA_UNMAP_ADDR -#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR -#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN -#define dma_unmap_addr pci_unmap_addr -#define dma_unmap_addr_set pci_unmap_addr_set -#define dma_unmap_len pci_unmap_len -#define dma_unmap_len_set pci_unmap_len_set -#endif /* DEFINE_DMA_UNMAP_ADDR */ - -#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,3)) -#ifdef IGB_HWMON -#ifdef CONFIG_DEBUG_LOCK_ALLOC -#define sysfs_attr_init(attr) \ - do { \ - static struct lock_class_key __key; \ - (attr)->key = &__key; \ - } while (0) -#else -#define sysfs_attr_init(attr) do {} while (0) -#endif /* CONFIG_DEBUG_LOCK_ALLOC */ -#endif /* IGB_HWMON */ -#endif /* RHEL_RELEASE_CODE */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -static inline bool _kc_pm_runtime_suspended() -{ - return false; -} -#define pm_runtime_suspended(dev) _kc_pm_runtime_suspended() -#else /* 2.6.0 => 2.6.34 */ -static inline bool _kc_pm_runtime_suspended(struct device *dev) -{ - return false; -} -#ifndef pm_runtime_suspended -#define pm_runtime_suspended(dev) _kc_pm_runtime_suspended(dev) -#endif -#endif /* 2.6.0 => 2.6.34 */ - -#else /* < 2.6.34 */ -#define HAVE_SYSTEM_SLEEP_PM_OPS -#ifndef HAVE_SET_RX_MODE -#define HAVE_SET_RX_MODE -#endif - -#endif /* < 2.6.34 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) - -ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos, - const void __user *from, size_t count); -#define simple_write_to_buffer _kc_simple_write_to_buffer - -#ifndef numa_node_id -#define numa_node_id() 0 -#endif -#ifdef HAVE_TX_MQ -#include -#ifndef CONFIG_NETDEVICES_MULTIQUEUE -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))) -void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int); -#define netif_set_real_num_tx_queues _kc_netif_set_real_num_tx_queues -#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */ -#else /* CONFIG_NETDEVICES_MULTI_QUEUE */ -#define netif_set_real_num_tx_queues(_netdev, _count) \ - do { \ - (_netdev)->egress_subqueue_count = _count; \ - } while (0) -#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */ -#else /* HAVE_TX_MQ */ -#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0) -#endif /* HAVE_TX_MQ */ -#ifndef ETH_FLAG_RXHASH -#define ETH_FLAG_RXHASH (1<<28) -#endif /* ETH_FLAG_RXHASH */ -#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) -#define HAVE_IRQ_AFFINITY_HINT -#endif -#else /* < 2.6.35 */ -#define HAVE_PM_QOS_REQUEST_LIST -#define HAVE_IRQ_AFFINITY_HINT -#endif /* < 2.6.35 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) -extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32); -#define ethtool_op_set_flags _kc_ethtool_op_set_flags -extern u32 _kc_ethtool_op_get_flags(struct net_device *); -#define ethtool_op_get_flags _kc_ethtool_op_get_flags - -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -#ifdef NET_IP_ALIGN -#undef NET_IP_ALIGN -#endif -#define NET_IP_ALIGN 0 -#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ - -#ifdef NET_SKB_PAD -#undef NET_SKB_PAD -#endif - -#if (L1_CACHE_BYTES > 32) -#define NET_SKB_PAD L1_CACHE_BYTES -#else -#define NET_SKB_PAD 32 -#endif - -static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev, - unsigned int length) -{ - struct sk_buff *skb; - - skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC); - if (skb) { -#if (NET_IP_ALIGN + NET_SKB_PAD) - skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD); -#endif - skb->dev = dev; - } - return skb; -} - -#ifdef netdev_alloc_skb_ip_align -#undef netdev_alloc_skb_ip_align -#endif -#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l) - -#undef netif_level -#define netif_level(level, priv, type, dev, fmt, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - netdev_##level(dev, fmt, ##args); \ -} while (0) - -#undef usleep_range -#define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000)) - -#define u64_stats_update_begin(a) do { } while(0) -#define u64_stats_update_end(a) do { } while(0) -#define u64_stats_fetch_begin(a) do { } while(0) -#define u64_stats_fetch_retry_bh(a) (0) -#define u64_stats_fetch_begin_bh(a) (0) - -#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1)) -#define HAVE_8021P_SUPPORT -#endif - -#else /* < 2.6.36 */ - - -#define HAVE_PM_QOS_REQUEST_ACTIVE -#define HAVE_8021P_SUPPORT -#define HAVE_NDO_GET_STATS64 -#endif /* < 2.6.36 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) ) -#ifndef netif_set_real_num_rx_queues -static inline int __kc_netif_set_real_num_rx_queues(struct net_device *dev, - unsigned int rxq) -{ - return 0; -} -#define netif_set_real_num_rx_queues(dev, rxq) \ - __kc_netif_set_real_num_rx_queues((dev), (rxq)) -#endif -#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR -#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) -#endif -#ifndef VLAN_N_VID -#define VLAN_N_VID VLAN_GROUP_ARRAY_LEN -#endif /* VLAN_N_VID */ -#ifndef ETH_FLAG_TXVLAN -#define ETH_FLAG_TXVLAN (1 << 7) -#endif /* ETH_FLAG_TXVLAN */ -#ifndef ETH_FLAG_RXVLAN -#define ETH_FLAG_RXVLAN (1 << 8) -#endif /* ETH_FLAG_RXVLAN */ - -static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb) -{ - WARN_ON(skb->ip_summed != CHECKSUM_NONE); -} -#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb) - -static inline void *_kc_vzalloc_node(unsigned long size, int node) -{ - void *addr = vmalloc_node(size, node); - if (addr) - memset(addr, 0, size); - return addr; -} -#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node) - -static inline void *_kc_vzalloc(unsigned long size) -{ - void *addr = vmalloc(size); - if (addr) - memset(addr, 0, size); - return addr; -} -#define vzalloc(_size) _kc_vzalloc(_size) - -#ifndef vlan_get_protocol -static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb) -{ - if (vlan_tx_tag_present(skb) || - skb->protocol != cpu_to_be16(ETH_P_8021Q)) - return skb->protocol; - - if (skb_headlen(skb) < sizeof(struct vlan_ethhdr)) - return 0; - - return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto; -} -#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb) -#endif -#ifdef HAVE_HW_TIME_STAMP -#define SKBTX_HW_TSTAMP (1 << 0) -#define SKBTX_IN_PROGRESS (1 << 2) -#define SKB_SHARED_TX_IS_UNION -#endif - -#ifndef device_wakeup_enable -#define device_wakeup_enable(dev) device_set_wakeup_enable(dev, true) -#endif - -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) ) -#ifndef HAVE_VLAN_RX_REGISTER -#define HAVE_VLAN_RX_REGISTER -#endif -#endif /* > 2.4.18 */ -#endif /* < 2.6.37 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) ) -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) -#define skb_checksum_start_offset(skb) skb_transport_offset(skb) -#else /* 2.6.22 -> 2.6.37 */ -static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb) -{ - return skb->csum_start - skb_headroom(skb); -} -#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb) -#endif /* 2.6.22 -> 2.6.37 */ -#ifdef CONFIG_DCB -#ifndef IEEE_8021QAZ_MAX_TCS -#define IEEE_8021QAZ_MAX_TCS 8 -#endif -#ifndef DCB_CAP_DCBX_HOST -#define DCB_CAP_DCBX_HOST 0x01 -#endif -#ifndef DCB_CAP_DCBX_LLD_MANAGED -#define DCB_CAP_DCBX_LLD_MANAGED 0x02 -#endif -#ifndef DCB_CAP_DCBX_VER_CEE -#define DCB_CAP_DCBX_VER_CEE 0x04 -#endif -#ifndef DCB_CAP_DCBX_VER_IEEE -#define DCB_CAP_DCBX_VER_IEEE 0x08 -#endif -#ifndef DCB_CAP_DCBX_STATIC -#define DCB_CAP_DCBX_STATIC 0x10 -#endif -#endif /* CONFIG_DCB */ -#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)) -#define CONFIG_XPS -#endif /* RHEL_RELEASE_VERSION(6,2) */ -#endif /* < 2.6.38 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) -#ifndef NETIF_F_RXCSUM -#define NETIF_F_RXCSUM (1 << 29) -#endif -#ifndef skb_queue_reverse_walk_safe -#define skb_queue_reverse_walk_safe(queue, skb, tmp) \ - for (skb = (queue)->prev, tmp = skb->prev; \ - skb != (struct sk_buff *)(queue); \ - skb = tmp, tmp = skb->prev) -#endif -#else /* < 2.6.39 */ -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET -#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ -#ifndef HAVE_MQPRIO -#define HAVE_MQPRIO -#endif -#ifndef HAVE_SETUP_TC -#define HAVE_SETUP_TC -#endif -#ifdef CONFIG_DCB -#ifndef HAVE_DCBNL_IEEE -#define HAVE_DCBNL_IEEE -#endif -#endif /* CONFIG_DCB */ -#ifndef HAVE_NDO_SET_FEATURES -#define HAVE_NDO_SET_FEATURES -#endif -#endif /* < 2.6.39 */ - -/*****************************************************************************/ -/* use < 2.6.40 because of a Fedora 15 kernel update where they - * updated the kernel version to 2.6.40.x and they back-ported 3.0 features - * like set_phys_id for ethtool. - */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) ) -#ifdef ETHTOOL_GRXRINGS -#ifndef FLOW_EXT -#define FLOW_EXT 0x80000000 -union _kc_ethtool_flow_union { - struct ethtool_tcpip4_spec tcp_ip4_spec; - struct ethtool_usrip4_spec usr_ip4_spec; - __u8 hdata[60]; -}; -struct _kc_ethtool_flow_ext { - __be16 vlan_etype; - __be16 vlan_tci; - __be32 data[2]; -}; -struct _kc_ethtool_rx_flow_spec { - __u32 flow_type; - union _kc_ethtool_flow_union h_u; - struct _kc_ethtool_flow_ext h_ext; - union _kc_ethtool_flow_union m_u; - struct _kc_ethtool_flow_ext m_ext; - __u64 ring_cookie; - __u32 location; -}; -#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec -#endif /* FLOW_EXT */ -#endif - -#define pci_disable_link_state_locked pci_disable_link_state - -#ifndef PCI_LTR_VALUE_MASK -#define PCI_LTR_VALUE_MASK 0x000003ff -#endif -#ifndef PCI_LTR_SCALE_MASK -#define PCI_LTR_SCALE_MASK 0x00001c00 -#endif -#ifndef PCI_LTR_SCALE_SHIFT -#define PCI_LTR_SCALE_SHIFT 10 -#endif - -#else /* < 2.6.40 */ -#define HAVE_ETHTOOL_SET_PHYS_ID -#endif /* < 2.6.40 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,0,0) ) -#define USE_LEGACY_PM_SUPPORT -#endif /* < 3.0.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) -#ifndef __netdev_alloc_skb_ip_align -#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l) -#endif /* __netdev_alloc_skb_ip_align */ -#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app) -#define dcb_ieee_delapp(dev, app) 0 -#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority) - -/* 1000BASE-T Control register */ -#define CTL1000_AS_MASTER 0x0800 -#define CTL1000_ENABLE_MASTER 0x1000 - -#else /* < 3.1.0 */ -#ifndef HAVE_DCBNL_IEEE_DELAPP -#define HAVE_DCBNL_IEEE_DELAPP -#endif -#endif /* < 3.1.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) -#ifdef ETHTOOL_GRXRINGS -#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS -#endif /* ETHTOOL_GRXRINGS */ - -#ifndef skb_frag_size -#define skb_frag_size(frag) _kc_skb_frag_size(frag) -static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag) -{ - return frag->size; -} -#endif /* skb_frag_size */ - -#ifndef skb_frag_size_sub -#define skb_frag_size_sub(frag, delta) _kc_skb_frag_size_sub(frag, delta) -static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta) -{ - frag->size -= delta; -} -#endif /* skb_frag_size_sub */ - -#ifndef skb_frag_page -#define skb_frag_page(frag) _kc_skb_frag_page(frag) -static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag) -{ - return frag->page; -} -#endif /* skb_frag_page */ - -#ifndef skb_frag_address -#define skb_frag_address(frag) _kc_skb_frag_address(frag) -static inline void *_kc_skb_frag_address(const skb_frag_t *frag) -{ - return page_address(skb_frag_page(frag)) + frag->page_offset; -} -#endif /* skb_frag_address */ - -#ifndef skb_frag_dma_map -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) -#include -#endif -#define skb_frag_dma_map(dev,frag,offset,size,dir) \ - _kc_skb_frag_dma_map(dev,frag,offset,size,dir) -static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev, - const skb_frag_t *frag, - size_t offset, size_t size, - enum dma_data_direction dir) -{ - return dma_map_page(dev, skb_frag_page(frag), - frag->page_offset + offset, size, dir); -} -#endif /* skb_frag_dma_map */ - -#ifndef __skb_frag_unref -#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag) -static inline void __kc_skb_frag_unref(skb_frag_t *frag) -{ - put_page(skb_frag_page(frag)); -} -#endif /* __skb_frag_unref */ - -#ifndef SPEED_UNKNOWN -#define SPEED_UNKNOWN -1 -#endif -#ifndef DUPLEX_UNKNOWN -#define DUPLEX_UNKNOWN 0xff -#endif -#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3)) -#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED -#define HAVE_PCI_DEV_FLAGS_ASSIGNED -#endif -#endif -#else /* < 3.2.0 */ -#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED -#define HAVE_PCI_DEV_FLAGS_ASSIGNED -#define HAVE_VF_SPOOFCHK_CONFIGURE -#endif -#endif /* < 3.2.0 */ - -#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(6,2)) -#undef ixgbe_get_netdev_tc_txq -#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc]) -#endif -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) ) -typedef u32 kni_netdev_features_t; -#undef PCI_EXP_TYPE_RC_EC -#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ -#ifndef CONFIG_BQL -#define netdev_tx_completed_queue(_q, _p, _b) do {} while (0) -#define netdev_completed_queue(_n, _p, _b) do {} while (0) -#define netdev_tx_sent_queue(_q, _b) do {} while (0) -#define netdev_sent_queue(_n, _b) do {} while (0) -#define netdev_tx_reset_queue(_q) do {} while (0) -#define netdev_reset_queue(_n) do {} while (0) -#endif -#else /* ! < 3.3.0 */ -typedef netdev_features_t kni_netdev_features_t; -#define HAVE_INT_NDO_VLAN_RX_ADD_VID -#ifdef ETHTOOL_SRXNTUPLE -#undef ETHTOOL_SRXNTUPLE -#endif -#endif /* < 3.3.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) -#ifndef NETIF_F_RXFCS -#define NETIF_F_RXFCS 0 -#endif /* NETIF_F_RXFCS */ -#ifndef NETIF_F_RXALL -#define NETIF_F_RXALL 0 -#endif /* NETIF_F_RXALL */ - -#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) -#define NUMTCS_RETURNS_U8 - -int _kc_simple_open(struct inode *inode, struct file *file); -#define simple_open _kc_simple_open -#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */ - - -#ifndef skb_add_rx_frag -#define skb_add_rx_frag _kc_skb_add_rx_frag -extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *, - int, int, unsigned int); -#endif -#ifdef NET_ADDR_RANDOM -#define eth_hw_addr_random(N) do { \ - random_ether_addr(N->dev_addr); \ - N->addr_assign_type |= NET_ADDR_RANDOM; \ - } while (0) -#else /* NET_ADDR_RANDOM */ -#define eth_hw_addr_random(N) random_ether_addr(N->dev_addr) -#endif /* NET_ADDR_RANDOM */ -#else /* < 3.4.0 */ -#include -#endif /* >= 3.4.0 */ - -/*****************************************************************************/ -#if defined(E1000E_PTP) || defined(IGB_PTP) || defined(IXGBE_PTP) || defined(I40E_PTP) -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0) ) && IS_ENABLED(CONFIG_PTP_1588_CLOCK) -#define HAVE_PTP_1588_CLOCK -#else -#error Cannot enable PTP Hardware Clock support due to a pre-3.0 kernel version or CONFIG_PTP_1588_CLOCK not enabled in the kernel -#endif /* > 3.0.0 && IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ -#endif /* E1000E_PTP || IGB_PTP || IXGBE_PTP || I40E_PTP */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) ) -#define skb_tx_timestamp(skb) do {} while (0) -static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2) -{ - return !compare_ether_addr(addr1, addr2); -} -#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2)) -#else -#define HAVE_FDB_OPS -#define HAVE_ETHTOOL_GET_TS_INFO -#endif /* < 3.5.0 */ - -/*****************************************************************************/ -#include -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) ) -#define PCI_EXP_LNKCAP2 44 /* Link Capability 2 */ - -#ifndef MDIO_EEE_100TX -#define MDIO_EEE_100TX 0x0002 /* 100TX EEE cap */ -#endif -#ifndef MDIO_EEE_1000T -#define MDIO_EEE_1000T 0x0004 /* 1000T EEE cap */ -#endif -#ifndef MDIO_EEE_10GT -#define MDIO_EEE_10GT 0x0008 /* 10GT EEE cap */ -#endif -#ifndef MDIO_EEE_1000KX -#define MDIO_EEE_1000KX 0x0010 /* 1000KX EEE cap */ -#endif -#ifndef MDIO_EEE_10GKX4 -#define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */ -#endif -#ifndef MDIO_EEE_10GKR -#define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */ -#endif -#endif /* < 3.6.0 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) ) -#ifndef ADVERTISED_40000baseKR4_Full -/* these defines were all added in one commit, so should be safe - * to trigger activiation on one define - */ -#define SUPPORTED_40000baseKR4_Full (1 << 23) -#define SUPPORTED_40000baseCR4_Full (1 << 24) -#define SUPPORTED_40000baseSR4_Full (1 << 25) -#define SUPPORTED_40000baseLR4_Full (1 << 26) -#define ADVERTISED_40000baseKR4_Full (1 << 23) -#define ADVERTISED_40000baseCR4_Full (1 << 24) -#define ADVERTISED_40000baseSR4_Full (1 << 25) -#define ADVERTISED_40000baseLR4_Full (1 << 26) -#endif - -/** - * mmd_eee_cap_to_ethtool_sup_t - * @eee_cap: value of the MMD EEE Capability register - * - * A small helper function that translates MMD EEE Capability (3.20) bits - * to ethtool supported settings. - */ -static inline u32 __kc_mmd_eee_cap_to_ethtool_sup_t(u16 eee_cap) -{ - u32 supported = 0; - - if (eee_cap & MDIO_EEE_100TX) - supported |= SUPPORTED_100baseT_Full; - if (eee_cap & MDIO_EEE_1000T) - supported |= SUPPORTED_1000baseT_Full; - if (eee_cap & MDIO_EEE_10GT) - supported |= SUPPORTED_10000baseT_Full; - if (eee_cap & MDIO_EEE_1000KX) - supported |= SUPPORTED_1000baseKX_Full; - if (eee_cap & MDIO_EEE_10GKX4) - supported |= SUPPORTED_10000baseKX4_Full; - if (eee_cap & MDIO_EEE_10GKR) - supported |= SUPPORTED_10000baseKR_Full; - - return supported; -} -#define mmd_eee_cap_to_ethtool_sup_t(eee_cap) \ - __kc_mmd_eee_cap_to_ethtool_sup_t(eee_cap) - -/** - * mmd_eee_adv_to_ethtool_adv_t - * @eee_adv: value of the MMD EEE Advertisement/Link Partner Ability registers - * - * A small helper function that translates the MMD EEE Advertisement (7.60) - * and MMD EEE Link Partner Ability (7.61) bits to ethtool advertisement - * settings. - */ -static inline u32 __kc_mmd_eee_adv_to_ethtool_adv_t(u16 eee_adv) -{ - u32 adv = 0; - - if (eee_adv & MDIO_EEE_100TX) - adv |= ADVERTISED_100baseT_Full; - if (eee_adv & MDIO_EEE_1000T) - adv |= ADVERTISED_1000baseT_Full; - if (eee_adv & MDIO_EEE_10GT) - adv |= ADVERTISED_10000baseT_Full; - if (eee_adv & MDIO_EEE_1000KX) - adv |= ADVERTISED_1000baseKX_Full; - if (eee_adv & MDIO_EEE_10GKX4) - adv |= ADVERTISED_10000baseKX4_Full; - if (eee_adv & MDIO_EEE_10GKR) - adv |= ADVERTISED_10000baseKR_Full; - - return adv; -} -#define mmd_eee_adv_to_ethtool_adv_t(eee_adv) \ - __kc_mmd_eee_adv_to_ethtool_adv_t(eee_adv) - -/** - * ethtool_adv_to_mmd_eee_adv_t - * @adv: the ethtool advertisement settings - * - * A small helper function that translates ethtool advertisement settings - * to EEE advertisements for the MMD EEE Advertisement (7.60) and - * MMD EEE Link Partner Ability (7.61) registers. - */ -static inline u16 __kc_ethtool_adv_to_mmd_eee_adv_t(u32 adv) -{ - u16 reg = 0; - - if (adv & ADVERTISED_100baseT_Full) - reg |= MDIO_EEE_100TX; - if (adv & ADVERTISED_1000baseT_Full) - reg |= MDIO_EEE_1000T; - if (adv & ADVERTISED_10000baseT_Full) - reg |= MDIO_EEE_10GT; - if (adv & ADVERTISED_1000baseKX_Full) - reg |= MDIO_EEE_1000KX; - if (adv & ADVERTISED_10000baseKX4_Full) - reg |= MDIO_EEE_10GKX4; - if (adv & ADVERTISED_10000baseKR_Full) - reg |= MDIO_EEE_10GKR; - - return reg; -} -#define ethtool_adv_to_mmd_eee_adv_t(adv) \ - __kc_ethtool_adv_to_mmd_eee_adv_t(adv) - -#ifndef pci_pcie_type -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) -static inline u8 pci_pcie_type(struct pci_dev *pdev) -{ - int pos; - u16 reg16; - - pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (!pos) - BUG(); - pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); - return (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; -} -#else /* < 2.6.24 */ -#define pci_pcie_type(x) (x)->pcie_type -#endif /* < 2.6.24 */ -#endif /* pci_pcie_type */ - -#define ptp_clock_register(caps, args...) ptp_clock_register(caps) - -#ifndef PCI_EXP_LNKSTA2 -int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val); -#define pcie_capability_read_word(d,p,v) __kc_pcie_capability_read_word(d,p,v) -int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val); -#define pcie_capability_write_word(d,p,v) __kc_pcie_capability_write_word(d,p,v) -int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, - u16 clear, u16 set); -#define pcie_capability_clear_and_set_word(d,p,c,s) \ - __kc_pcie_capability_clear_and_set_word(d,p,c,s) - -#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ - -static inline int pcie_capability_clear_word(struct pci_dev *dev, int pos, - u16 clear) -{ - return __kc_pcie_capability_clear_and_set_word(dev, pos, clear, 0); -} -#endif /* !PCI_EXP_LNKSTA2 */ - -#if (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) -#define USE_CONST_DEV_UC_CHAR -#endif - -#else /* >= 3.7.0 */ -#define HAVE_CONST_STRUCT_PCI_ERROR_HANDLERS -#define USE_CONST_DEV_UC_CHAR -#endif /* >= 3.7.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) ) -#ifndef PCI_EXP_LNKCTL_ASPM_L0S -#define PCI_EXP_LNKCTL_ASPM_L0S 0x01 /* L0s Enable */ -#endif -#ifndef PCI_EXP_LNKCTL_ASPM_L1 -#define PCI_EXP_LNKCTL_ASPM_L1 0x02 /* L1 Enable */ -#endif -#define HAVE_CONFIG_HOTPLUG -/* Reserved Ethernet Addresses per IEEE 802.1Q */ -static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = { - 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; -#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) &&\ - !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) -static inline bool is_link_local_ether_addr(const u8 *addr) -{ - __be16 *a = (__be16 *)addr; - static const __be16 *b = (const __be16 *)eth_reserved_addr_base; - static const __be16 m = cpu_to_be16(0xfff0); - - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; -} -#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */ -#else /* >= 3.8.0 */ -#ifndef __devinit -#define __devinit -#define HAVE_ENCAP_CSUM_OFFLOAD -#endif - -#ifndef __devinitdata -#define __devinitdata -#endif - -#ifndef __devexit -#define __devexit -#endif - -#ifndef __devexit_p -#define __devexit_p -#endif - -#ifndef HAVE_SRIOV_CONFIGURE -#define HAVE_SRIOV_CONFIGURE -#endif - -#define HAVE_BRIDGE_ATTRIBS -#ifndef BRIDGE_MODE_VEB -#define BRIDGE_MODE_VEB 0 /* Default loopback mode */ -#endif /* BRIDGE_MODE_VEB */ -#ifndef BRIDGE_MODE_VEPA -#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ -#endif /* BRIDGE_MODE_VEPA */ -#endif /* >= 3.8.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) ) - -#undef hlist_entry -#define hlist_entry(ptr, type, member) container_of(ptr,type,member) - -#undef hlist_entry_safe -#define hlist_entry_safe(ptr, type, member) \ - (ptr) ? hlist_entry(ptr, type, member) : NULL - -#undef hlist_for_each_entry -#define hlist_for_each_entry(pos, head, member) \ - for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \ - pos; \ - pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) - -#undef hlist_for_each_entry_safe -#define hlist_for_each_entry_safe(pos, n, head, member) \ - for (pos = hlist_entry_safe((head)->first, typeof(*pos), member); \ - pos && ({ n = pos->member.next; 1; }); \ - pos = hlist_entry_safe(n, typeof(*pos), member)) - -#ifdef CONFIG_XPS -extern int __kc_netif_set_xps_queue(struct net_device *, struct cpumask *, u16); -#define netif_set_xps_queue(_dev, _mask, _idx) __kc_netif_set_xps_queue((_dev), (_mask), (_idx)) -#else /* CONFIG_XPS */ -#define netif_set_xps_queue(_dev, _mask, _idx) do {} while (0) -#endif /* CONFIG_XPS */ - -#ifdef HAVE_NETDEV_SELECT_QUEUE -#define _kc_hashrnd 0xd631614b /* not so random hash salt */ -extern u16 __kc_netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); -#define __netdev_pick_tx __kc_netdev_pick_tx -#endif /* HAVE_NETDEV_SELECT_QUEUE */ -#else -#define HAVE_BRIDGE_FILTER -#define USE_DEFAULT_FDB_DEL_DUMP -#endif /* < 3.9.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) -#ifdef CONFIG_PCI_IOV -extern int __kc_pci_vfs_assigned(struct pci_dev *dev); -#else -static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) -{ - return 0; -} -#endif -#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev) - -#ifndef VLAN_TX_COOKIE_MAGIC -static inline struct sk_buff *__kc__vlan_hwaccel_put_tag(struct sk_buff *skb, - u16 vlan_tci) -{ -#ifdef VLAN_TAG_PRESENT - vlan_tci |= VLAN_TAG_PRESENT; -#endif - skb->vlan_tci = vlan_tci; - return skb; -} -#define __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci) \ - __kc__vlan_hwaccel_put_tag(skb, vlan_tci) -#endif - -#else /* >= 3.10.0 */ -#define HAVE_ENCAP_TSO_OFFLOAD -#endif /* >= 3.10.0 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) ) -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,6))) -#if (!(UBUNTU_KERNEL_CODE >= UBUNTU_KERNEL_VERSION(3,13,0,30,0) \ - && (UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(12,4) \ - || UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(14,4)))) -#if (!(SLE_VERSION_CODE == SLE_VERSION(12,0,0))) -#ifdef NETIF_F_RXHASH -#define PKT_HASH_TYPE_L3 0 -static inline void -skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) -{ - skb->rxhash = hash; -} -#endif /* NETIF_F_RXHASH */ -#endif /* < SLES12 */ -#endif /* < 3.13.0-30.54 (Ubuntu 14.04) */ -#endif /* < RHEL7 */ -#endif /* < 3.14.0 */ - -#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) ) \ - || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) -#undef SET_ETHTOOL_OPS -#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) -#define HAVE_VF_MIN_MAX_TXRATE 1 -#endif /* >= 3.16.0 */ - -#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \ - || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) -#define HAVE_NDO_DFLT_BRIDGE_ADD_MASK -#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) ) -#define HAVE_NDO_FDB_ADD_VID -#endif /* !RHEL 7.2 */ -#endif /* >= 3.19.0 */ - -#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) ) \ - || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) -/* vlan_tx_xx functions got renamed to skb_vlan */ -#define vlan_tx_tag_get skb_vlan_tag_get -#define vlan_tx_tag_present skb_vlan_tag_present -#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) ) -#define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS -#endif /* !RHEL 7.2 */ -#endif /* 4.0.0 */ - -#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) \ - || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,3) )) -/* ndo_bridge_getlink adds new nlflags parameter */ -#define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS -#endif /* >= 4.1.0 */ - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) ) -/* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */ -#define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL -#endif /* >= 4.2.0 */ - -/* - * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4) - * For older kernels backported this commit, need to use renamed functions. - * This fix is specific to RedHat/CentOS kernels. - */ -#if (defined(RHEL_RELEASE_CODE) && \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) -#define vlan_tx_tag_get skb_vlan_tag_get -#define vlan_tx_tag_present skb_vlan_tag_present -#endif - -#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) || \ - (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0))) -#define HAVE_VF_VLAN_PROTO -#endif /* >= 4.9.0, >= SLES12SP3 */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) -#define HAVE_PCI_ENABLE_MSIX -#endif - -#if defined(timer_setup) && defined(from_timer) -#define HAVE_TIMER_SETUP -#endif - -#endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h deleted file mode 100644 index 6ff94133..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h +++ /dev/null @@ -1,910 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_H_ -#define _IXGBE_H_ - -#ifndef IXGBE_NO_LRO -#include -#endif - -#include -#include -#ifdef HAVE_IRQ_AFFINITY_HINT -#include -#endif /* HAVE_IRQ_AFFINITY_HINT */ -#include - -#ifdef SIOCETHTOOL -#include -#endif -#ifdef NETIF_F_HW_VLAN_TX -#include -#endif -#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) -#define IXGBE_DCA -#include -#endif -#include "ixgbe_dcb.h" - -#include "kcompat.h" - -#ifdef HAVE_SCTP -#include -#endif - -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#define IXGBE_FCOE -#include "ixgbe_fcoe.h" -#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */ - -#if defined(CONFIG_PTP_1588_CLOCK) || defined(CONFIG_PTP_1588_CLOCK_MODULE) -#define HAVE_IXGBE_PTP -#endif - -#include "ixgbe_api.h" - -#define PFX "ixgbe: " -#define DPRINTK(nlevel, klevel, fmt, args...) \ - ((void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \ - printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \ - __func__ , ## args))) - -/* TX/RX descriptor defines */ -#define IXGBE_DEFAULT_TXD 512 -#define IXGBE_DEFAULT_TX_WORK 256 -#define IXGBE_MAX_TXD 4096 -#define IXGBE_MIN_TXD 64 - -#define IXGBE_DEFAULT_RXD 512 -#define IXGBE_DEFAULT_RX_WORK 256 -#define IXGBE_MAX_RXD 4096 -#define IXGBE_MIN_RXD 64 - - -/* flow control */ -#define IXGBE_MIN_FCRTL 0x40 -#define IXGBE_MAX_FCRTL 0x7FF80 -#define IXGBE_MIN_FCRTH 0x600 -#define IXGBE_MAX_FCRTH 0x7FFF0 -#define IXGBE_DEFAULT_FCPAUSE 0xFFFF -#define IXGBE_MIN_FCPAUSE 0 -#define IXGBE_MAX_FCPAUSE 0xFFFF - -/* Supported Rx Buffer Sizes */ -#define IXGBE_RXBUFFER_512 512 /* Used for packet split */ -#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT -#define IXGBE_RXBUFFER_1536 1536 -#define IXGBE_RXBUFFER_2K 2048 -#define IXGBE_RXBUFFER_3K 3072 -#define IXGBE_RXBUFFER_4K 4096 -#define IXGBE_RXBUFFER_7K 7168 -#define IXGBE_RXBUFFER_8K 8192 -#define IXGBE_RXBUFFER_15K 15360 -#endif /* CONFIG_IXGBE_DISABLE_PACKET_SPLIT */ -#define IXGBE_MAX_RXBUFFER 16384 /* largest size for single descriptor */ - -/* - * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN mans we - * reserve 2 more, and skb_shared_info adds an additional 384 bytes more, - * this adds up to 512 bytes of extra data meaning the smallest allocation - * we could have is 1K. - * i.e. RXBUFFER_512 --> size-1024 slab - */ -#define IXGBE_RX_HDR_SIZE IXGBE_RXBUFFER_512 - -#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) - -/* How many Rx Buffers do we bundle into one write to the hardware ? */ -#define IXGBE_RX_BUFFER_WRITE 16 /* Must be power of 2 */ - -#define IXGBE_TX_FLAGS_CSUM (u32)(1) -#define IXGBE_TX_FLAGS_HW_VLAN (u32)(1 << 1) -#define IXGBE_TX_FLAGS_SW_VLAN (u32)(1 << 2) -#define IXGBE_TX_FLAGS_TSO (u32)(1 << 3) -#define IXGBE_TX_FLAGS_IPV4 (u32)(1 << 4) -#define IXGBE_TX_FLAGS_FCOE (u32)(1 << 5) -#define IXGBE_TX_FLAGS_FSO (u32)(1 << 6) -#define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7) -#define IXGBE_TX_FLAGS_TSTAMP (u32)(1 << 8) -#define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 -#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 -#define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29 -#define IXGBE_TX_FLAGS_VLAN_SHIFT 16 - -#define IXGBE_MAX_RX_DESC_POLL 10 - -#define IXGBE_MAX_VF_MC_ENTRIES 30 -#define IXGBE_MAX_VF_FUNCTIONS 64 -#define IXGBE_MAX_VFTA_ENTRIES 128 -#define MAX_EMULATION_MAC_ADDRS 16 -#define IXGBE_MAX_PF_MACVLANS 15 -#define IXGBE_82599_VF_DEVICE_ID 0x10ED -#define IXGBE_X540_VF_DEVICE_ID 0x1515 - -#ifdef CONFIG_PCI_IOV -#define VMDQ_P(p) ((p) + adapter->num_vfs) -#else -#define VMDQ_P(p) (p) -#endif - -#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter) \ - { \ - u32 current_counter = IXGBE_READ_REG(hw, reg); \ - if (current_counter < last_counter) \ - counter += 0x100000000LL; \ - last_counter = current_counter; \ - counter &= 0xFFFFFFFF00000000LL; \ - counter |= current_counter; \ - } - -#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \ - { \ - u64 current_counter_lsb = IXGBE_READ_REG(hw, reg_lsb); \ - u64 current_counter_msb = IXGBE_READ_REG(hw, reg_msb); \ - u64 current_counter = (current_counter_msb << 32) | \ - current_counter_lsb; \ - if (current_counter < last_counter) \ - counter += 0x1000000000LL; \ - last_counter = current_counter; \ - counter &= 0xFFFFFFF000000000LL; \ - counter |= current_counter; \ - } - -struct vf_stats { - u64 gprc; - u64 gorc; - u64 gptc; - u64 gotc; - u64 mprc; -}; - -struct vf_data_storage { - unsigned char vf_mac_addresses[ETH_ALEN]; - u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES]; - u16 num_vf_mc_hashes; - u16 default_vf_vlan_id; - u16 vlans_enabled; - bool clear_to_send; - struct vf_stats vfstats; - struct vf_stats last_vfstats; - struct vf_stats saved_rst_vfstats; - bool pf_set_mac; - u16 pf_vlan; /* When set, guest VLAN config not allowed. */ - u16 pf_qos; - u16 tx_rate; - u16 vlan_count; - u8 spoofchk_enabled; - struct pci_dev *vfdev; -}; - -struct vf_macvlans { - struct list_head l; - int vf; - bool free; - bool is_macvlan; - u8 vf_macvlan[ETH_ALEN]; -}; - -#ifndef IXGBE_NO_LRO -#define IXGBE_LRO_MAX 32 /*Maximum number of LRO descriptors*/ -#define IXGBE_LRO_GLOBAL 10 - -struct ixgbe_lro_stats { - u32 flushed; - u32 coal; -}; - -/* - * ixgbe_lro_header - header format to be aggregated by LRO - * @iph: IP header without options - * @tcp: TCP header - * @ts: Optional TCP timestamp data in TCP options - * - * This structure relies on the check above that verifies that the header - * is IPv4 and does not contain any options. - */ -struct ixgbe_lrohdr { - struct iphdr iph; - struct tcphdr th; - __be32 ts[0]; -}; - -struct ixgbe_lro_list { - struct sk_buff_head active; - struct ixgbe_lro_stats stats; -}; - -#endif /* IXGBE_NO_LRO */ -#define IXGBE_MAX_TXD_PWR 14 -#define IXGBE_MAX_DATA_PER_TXD (1 << IXGBE_MAX_TXD_PWR) - -/* Tx Descriptors needed, worst case */ -#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD) -#ifdef MAX_SKB_FRAGS -#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4) -#else -#define DESC_NEEDED 4 -#endif - -/* wrapper around a pointer to a socket buffer, - * so a DMA handle can be stored along with the buffer */ -struct ixgbe_tx_buffer { - union ixgbe_adv_tx_desc *next_to_watch; - unsigned long time_stamp; - struct sk_buff *skb; - unsigned int bytecount; - unsigned short gso_segs; - __be16 protocol; - DEFINE_DMA_UNMAP_ADDR(dma); - DEFINE_DMA_UNMAP_LEN(len); - u32 tx_flags; -}; - -struct ixgbe_rx_buffer { - struct sk_buff *skb; - dma_addr_t dma; -#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT - struct page *page; - unsigned int page_offset; -#endif -}; - -struct ixgbe_queue_stats { - u64 packets; - u64 bytes; -}; - -struct ixgbe_tx_queue_stats { - u64 restart_queue; - u64 tx_busy; - u64 tx_done_old; -}; - -struct ixgbe_rx_queue_stats { - u64 rsc_count; - u64 rsc_flush; - u64 non_eop_descs; - u64 alloc_rx_page_failed; - u64 alloc_rx_buff_failed; - u64 csum_err; -}; - -enum ixgbe_ring_state_t { - __IXGBE_TX_FDIR_INIT_DONE, - __IXGBE_TX_DETECT_HANG, - __IXGBE_HANG_CHECK_ARMED, - __IXGBE_RX_RSC_ENABLED, -#ifndef HAVE_NDO_SET_FEATURES - __IXGBE_RX_CSUM_ENABLED, -#endif - __IXGBE_RX_CSUM_UDP_ZERO_ERR, -#ifdef IXGBE_FCOE - __IXGBE_RX_FCOE_BUFSZ, -#endif -}; - -#define check_for_tx_hang(ring) \ - test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) -#define set_check_for_tx_hang(ring) \ - set_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) -#define clear_check_for_tx_hang(ring) \ - clear_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) -#ifndef IXGBE_NO_HW_RSC -#define ring_is_rsc_enabled(ring) \ - test_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) -#else -#define ring_is_rsc_enabled(ring) false -#endif -#define set_ring_rsc_enabled(ring) \ - set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) -#define clear_ring_rsc_enabled(ring) \ - clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) -#define netdev_ring(ring) (ring->netdev) -#define ring_queue_index(ring) (ring->queue_index) - - -struct ixgbe_ring { - struct ixgbe_ring *next; /* pointer to next ring in q_vector */ - struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */ - struct net_device *netdev; /* netdev ring belongs to */ - struct device *dev; /* device for DMA mapping */ - void *desc; /* descriptor ring memory */ - union { - struct ixgbe_tx_buffer *tx_buffer_info; - struct ixgbe_rx_buffer *rx_buffer_info; - }; - unsigned long state; - u8 __iomem *tail; - dma_addr_t dma; /* phys. address of descriptor ring */ - unsigned int size; /* length in bytes */ - - u16 count; /* amount of descriptors */ - - u8 queue_index; /* needed for multiqueue queue management */ - u8 reg_idx; /* holds the special value that gets - * the hardware register offset - * associated with this ring, which is - * different for DCB and RSS modes - */ - u16 next_to_use; - u16 next_to_clean; - - union { -#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT - u16 rx_buf_len; -#else - u16 next_to_alloc; -#endif - struct { - u8 atr_sample_rate; - u8 atr_count; - }; - }; - - u8 dcb_tc; - struct ixgbe_queue_stats stats; - union { - struct ixgbe_tx_queue_stats tx_stats; - struct ixgbe_rx_queue_stats rx_stats; - }; -} ____cacheline_internodealigned_in_smp; - -enum ixgbe_ring_f_enum { - RING_F_NONE = 0, - RING_F_VMDQ, /* SR-IOV uses the same ring feature */ - RING_F_RSS, - RING_F_FDIR, -#ifdef IXGBE_FCOE - RING_F_FCOE, -#endif /* IXGBE_FCOE */ - RING_F_ARRAY_SIZE /* must be last in enum set */ -}; - -#define IXGBE_MAX_DCB_INDICES 8 -#define IXGBE_MAX_RSS_INDICES 16 -#define IXGBE_MAX_VMDQ_INDICES 64 -#define IXGBE_MAX_FDIR_INDICES 64 -#ifdef IXGBE_FCOE -#define IXGBE_MAX_FCOE_INDICES 8 -#define MAX_RX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES) -#define MAX_TX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES) -#else -#define MAX_RX_QUEUES IXGBE_MAX_FDIR_INDICES -#define MAX_TX_QUEUES IXGBE_MAX_FDIR_INDICES -#endif /* IXGBE_FCOE */ -struct ixgbe_ring_feature { - int indices; - int mask; -}; - -#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT -/* - * FCoE requires that all Rx buffers be over 2200 bytes in length. Since - * this is twice the size of a half page we need to double the page order - * for FCoE enabled Rx queues. - */ -#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) -static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) -{ - return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0; -} -#else -#define ixgbe_rx_pg_order(_ring) 0 -#endif -#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) -#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) - -#endif -struct ixgbe_ring_container { - struct ixgbe_ring *ring; /* pointer to linked list of rings */ - unsigned int total_bytes; /* total bytes processed this int */ - unsigned int total_packets; /* total packets processed this int */ - u16 work_limit; /* total work allowed per interrupt */ - u8 count; /* total number of rings in vector */ - u8 itr; /* current ITR setting for ring */ -}; - -/* iterator for handling rings in ring container */ -#define ixgbe_for_each_ring(pos, head) \ - for (pos = (head).ring; pos != NULL; pos = pos->next) - -#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \ - ? 8 : 1) -#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS - -/* MAX_MSIX_Q_VECTORS of these are allocated, - * but we only use one per queue-specific vector. - */ -struct ixgbe_q_vector { - struct ixgbe_adapter *adapter; - int cpu; /* CPU for DCA */ - u16 v_idx; /* index of q_vector within array, also used for - * finding the bit in EICR and friends that - * represents the vector for this ring */ - u16 itr; /* Interrupt throttle rate written to EITR */ - struct ixgbe_ring_container rx, tx; - -#ifdef CONFIG_IXGBE_NAPI - struct napi_struct napi; -#endif -#ifndef HAVE_NETDEV_NAPI_LIST - struct net_device poll_dev; -#endif -#ifdef HAVE_IRQ_AFFINITY_HINT - cpumask_t affinity_mask; -#endif -#ifndef IXGBE_NO_LRO - struct ixgbe_lro_list lrolist; /* LRO list for queue vector*/ -#endif - int numa_node; - char name[IFNAMSIZ + 9]; - - /* for dynamic allocation of rings associated with this q_vector */ - struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp; -}; - -/* - * microsecond values for various ITR rates shifted by 2 to fit itr register - * with the first 3 bits reserved 0 - */ -#define IXGBE_MIN_RSC_ITR 24 -#define IXGBE_100K_ITR 40 -#define IXGBE_20K_ITR 200 -#define IXGBE_16K_ITR 248 -#define IXGBE_10K_ITR 400 -#define IXGBE_8K_ITR 500 - -/* ixgbe_test_staterr - tests bits in Rx descriptor status and error fields */ -static inline __le32 ixgbe_test_staterr(union ixgbe_adv_rx_desc *rx_desc, - const u32 stat_err_bits) -{ - return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); -} - -/* ixgbe_desc_unused - calculate if we have unused descriptors */ -static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring) -{ - u16 ntc = ring->next_to_clean; - u16 ntu = ring->next_to_use; - - return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; -} - -#define IXGBE_RX_DESC(R, i) \ - (&(((union ixgbe_adv_rx_desc *)((R)->desc))[i])) -#define IXGBE_TX_DESC(R, i) \ - (&(((union ixgbe_adv_tx_desc *)((R)->desc))[i])) -#define IXGBE_TX_CTXTDESC(R, i) \ - (&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i])) - -#define IXGBE_MAX_JUMBO_FRAME_SIZE 16128 -#ifdef IXGBE_FCOE -/* use 3K as the baby jumbo frame size for FCoE */ -#define IXGBE_FCOE_JUMBO_FRAME_SIZE 3072 -#endif /* IXGBE_FCOE */ - -#define TCP_TIMER_VECTOR 0 -#define OTHER_VECTOR 1 -#define NON_Q_VECTORS (OTHER_VECTOR + TCP_TIMER_VECTOR) - -#define IXGBE_MAX_MSIX_Q_VECTORS_82599 64 -#define IXGBE_MAX_MSIX_Q_VECTORS_82598 16 - -struct ixgbe_mac_addr { - u8 addr[ETH_ALEN]; - u16 queue; - u16 state; /* bitmask */ -}; -#define IXGBE_MAC_STATE_DEFAULT 0x1 -#define IXGBE_MAC_STATE_MODIFIED 0x2 -#define IXGBE_MAC_STATE_IN_USE 0x4 - -#ifdef IXGBE_PROCFS -struct ixgbe_therm_proc_data { - struct ixgbe_hw *hw; - struct ixgbe_thermal_diode_data *sensor_data; -}; - -#endif /* IXGBE_PROCFS */ - -/* - * Only for array allocations in our adapter struct. On 82598, there will be - * unused entries in the array, but that's not a big deal. Also, in 82599, - * we can actually assign 64 queue vectors based on our extended-extended - * interrupt registers. This is different than 82598, which is limited to 16. - */ -#define MAX_MSIX_Q_VECTORS IXGBE_MAX_MSIX_Q_VECTORS_82599 -#define MAX_MSIX_COUNT IXGBE_MAX_MSIX_VECTORS_82599 - -#define MIN_MSIX_Q_VECTORS 1 -#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS) - -/* default to trying for four seconds */ -#define IXGBE_TRY_LINK_TIMEOUT (4 * HZ) - -/* board specific private data structure */ -struct ixgbe_adapter { -#ifdef NETIF_F_HW_VLAN_TX -#ifdef HAVE_VLAN_RX_REGISTER - struct vlan_group *vlgrp; /* must be first, see ixgbe_receive_skb */ -#else - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; -#endif -#endif /* NETIF_F_HW_VLAN_TX */ - /* OS defined structs */ - struct net_device *netdev; - struct pci_dev *pdev; - - unsigned long state; - - /* Some features need tri-state capability, - * thus the additional *_CAPABLE flags. - */ - u32 flags; -#define IXGBE_FLAG_MSI_CAPABLE (u32)(1 << 0) -#define IXGBE_FLAG_MSI_ENABLED (u32)(1 << 1) -#define IXGBE_FLAG_MSIX_CAPABLE (u32)(1 << 2) -#define IXGBE_FLAG_MSIX_ENABLED (u32)(1 << 3) -#ifndef IXGBE_NO_LLI -#define IXGBE_FLAG_LLI_PUSH (u32)(1 << 4) -#endif -#define IXGBE_FLAG_IN_NETPOLL (u32)(1 << 8) -#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) -#define IXGBE_FLAG_DCA_ENABLED (u32)(1 << 9) -#define IXGBE_FLAG_DCA_CAPABLE (u32)(1 << 10) -#define IXGBE_FLAG_DCA_ENABLED_DATA (u32)(1 << 11) -#else -#define IXGBE_FLAG_DCA_ENABLED (u32)0 -#define IXGBE_FLAG_DCA_CAPABLE (u32)0 -#define IXGBE_FLAG_DCA_ENABLED_DATA (u32)0 -#endif -#define IXGBE_FLAG_MQ_CAPABLE (u32)(1 << 12) -#define IXGBE_FLAG_DCB_ENABLED (u32)(1 << 13) -#define IXGBE_FLAG_DCB_CAPABLE (u32)(1 << 14) -#define IXGBE_FLAG_RSS_ENABLED (u32)(1 << 15) -#define IXGBE_FLAG_RSS_CAPABLE (u32)(1 << 16) -#define IXGBE_FLAG_VMDQ_ENABLED (u32)(1 << 18) -#define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 19) -#define IXGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 20) -#define IXGBE_FLAG_NEED_LINK_CONFIG (u32)(1 << 21) -#define IXGBE_FLAG_FDIR_HASH_CAPABLE (u32)(1 << 22) -#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE (u32)(1 << 23) -#ifdef IXGBE_FCOE -#define IXGBE_FLAG_FCOE_CAPABLE (u32)(1 << 24) -#define IXGBE_FLAG_FCOE_ENABLED (u32)(1 << 25) -#endif /* IXGBE_FCOE */ -#define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 26) -#define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 27) -#define IXGBE_FLAG_SRIOV_REPLICATION_ENABLE (u32)(1 << 28) -#define IXGBE_FLAG_SRIOV_L2SWITCH_ENABLE (u32)(1 << 29) -#define IXGBE_FLAG_SRIOV_L2LOOPBACK_ENABLE (u32)(1 << 30) -#define IXGBE_FLAG_RX_BB_CAPABLE (u32)(1 << 31) - - u32 flags2; -#ifndef IXGBE_NO_HW_RSC -#define IXGBE_FLAG2_RSC_CAPABLE (u32)(1) -#define IXGBE_FLAG2_RSC_ENABLED (u32)(1 << 1) -#else -#define IXGBE_FLAG2_RSC_CAPABLE 0 -#define IXGBE_FLAG2_RSC_ENABLED 0 -#endif -#define IXGBE_FLAG2_VMDQ_DEFAULT_OVERRIDE (u32)(1 << 2) -#define IXGBE_FLAG2_TEMP_SENSOR_CAPABLE (u32)(1 << 4) -#define IXGBE_FLAG2_TEMP_SENSOR_EVENT (u32)(1 << 5) -#define IXGBE_FLAG2_SEARCH_FOR_SFP (u32)(1 << 6) -#define IXGBE_FLAG2_SFP_NEEDS_RESET (u32)(1 << 7) -#define IXGBE_FLAG2_RESET_REQUESTED (u32)(1 << 8) -#define IXGBE_FLAG2_FDIR_REQUIRES_REINIT (u32)(1 << 9) -#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP (u32)(1 << 10) -#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP (u32)(1 << 11) -#define IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED (u32)(1 << 12) - - /* Tx fast path data */ - int num_tx_queues; - u16 tx_itr_setting; - u16 tx_work_limit; - - /* Rx fast path data */ - int num_rx_queues; - u16 rx_itr_setting; - u16 rx_work_limit; - - /* TX */ - struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp; - - u64 restart_queue; - u64 lsc_int; - u32 tx_timeout_count; - - /* RX */ - struct ixgbe_ring *rx_ring[MAX_RX_QUEUES]; - int num_rx_pools; /* == num_rx_queues in 82598 */ - int num_rx_queues_per_pool; /* 1 if 82598, can be many if 82599 */ - u64 hw_csum_rx_error; - u64 hw_rx_no_dma_resources; - u64 rsc_total_count; - u64 rsc_total_flush; - u64 non_eop_descs; -#ifndef CONFIG_IXGBE_NAPI - u64 rx_dropped_backlog; /* count drops from rx intr handler */ -#endif - u32 alloc_rx_page_failed; - u32 alloc_rx_buff_failed; - - struct ixgbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS]; - -#ifdef HAVE_DCBNL_IEEE - struct ieee_pfc *ixgbe_ieee_pfc; - struct ieee_ets *ixgbe_ieee_ets; -#endif - struct ixgbe_dcb_config dcb_cfg; - struct ixgbe_dcb_config temp_dcb_cfg; - u8 dcb_set_bitmap; - u8 dcbx_cap; -#ifndef HAVE_MQPRIO - u8 tc; -#endif - enum ixgbe_fc_mode last_lfc_mode; - - int num_msix_vectors; - int max_msix_q_vectors; /* true count of q_vectors for device */ - struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE]; - struct msix_entry *msix_entries; - -#ifndef HAVE_NETDEV_STATS_IN_NETDEV - struct net_device_stats net_stats; -#endif -#ifndef IXGBE_NO_LRO - struct ixgbe_lro_stats lro_stats; -#endif - -#ifdef ETHTOOL_TEST - u32 test_icr; - struct ixgbe_ring test_tx_ring; - struct ixgbe_ring test_rx_ring; -#endif - - /* structs defined in ixgbe_hw.h */ - struct ixgbe_hw hw; - u16 msg_enable; - struct ixgbe_hw_stats stats; -#ifndef IXGBE_NO_LLI - u32 lli_port; - u32 lli_size; - u32 lli_etype; - u32 lli_vlan_pri; -#endif /* IXGBE_NO_LLI */ - - u32 *config_space; - u64 tx_busy; - unsigned int tx_ring_count; - unsigned int rx_ring_count; - - u32 link_speed; - bool link_up; - unsigned long link_check_timeout; - - struct timer_list service_timer; - struct work_struct service_task; - - struct hlist_head fdir_filter_list; - unsigned long fdir_overflow; /* number of times ATR was backed off */ - union ixgbe_atr_input fdir_mask; - int fdir_filter_count; - u32 fdir_pballoc; - u32 atr_sample_rate; - spinlock_t fdir_perfect_lock; - -#ifdef IXGBE_FCOE - struct ixgbe_fcoe fcoe; -#endif /* IXGBE_FCOE */ - u32 wol; - - u16 bd_number; - - char eeprom_id[32]; - u16 eeprom_cap; - bool netdev_registered; - u32 interrupt_event; -#ifdef HAVE_ETHTOOL_SET_PHYS_ID - u32 led_reg; -#endif - - DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS); - unsigned int num_vfs; - struct vf_data_storage *vfinfo; - int vf_rate_link_speed; - struct vf_macvlans vf_mvs; - struct vf_macvlans *mv_list; -#ifdef CONFIG_PCI_IOV - u32 timer_event_accumulator; - u32 vferr_refcount; -#endif - struct ixgbe_mac_addr *mac_table; -#ifdef IXGBE_SYSFS - struct kobject *info_kobj; - struct kobject *therm_kobj[IXGBE_MAX_SENSORS]; -#else /* IXGBE_SYSFS */ -#ifdef IXGBE_PROCFS - struct proc_dir_entry *eth_dir; - struct proc_dir_entry *info_dir; - struct proc_dir_entry *therm_dir[IXGBE_MAX_SENSORS]; - struct ixgbe_therm_proc_data therm_data[IXGBE_MAX_SENSORS]; -#endif /* IXGBE_PROCFS */ -#endif /* IXGBE_SYSFS */ -}; - -struct ixgbe_fdir_filter { - struct hlist_node fdir_node; - union ixgbe_atr_input filter; - u16 sw_idx; - u16 action; -}; - -enum ixgbe_state_t { - __IXGBE_TESTING, - __IXGBE_RESETTING, - __IXGBE_DOWN, - __IXGBE_SERVICE_SCHED, - __IXGBE_IN_SFP_INIT, -}; - -struct ixgbe_cb { -#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT - union { /* Union defining head/tail partner */ - struct sk_buff *head; - struct sk_buff *tail; - }; -#endif - dma_addr_t dma; -#ifndef IXGBE_NO_LRO - __be32 tsecr; /* timestamp echo response */ - u32 tsval; /* timestamp value in host order */ - u32 next_seq; /* next expected sequence number */ - u16 free; /* 65521 minus total size */ - u16 mss; /* size of data portion of packet */ -#endif /* IXGBE_NO_LRO */ -#ifdef HAVE_VLAN_RX_REGISTER - u16 vid; /* VLAN tag */ -#endif - u16 append_cnt; /* number of skb's appended */ -#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT - bool page_released; -#endif -}; -#define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb) - -#ifdef IXGBE_SYSFS -void ixgbe_sysfs_exit(struct ixgbe_adapter *adapter); -int ixgbe_sysfs_init(struct ixgbe_adapter *adapter); -#endif /* IXGBE_SYSFS */ -#ifdef IXGBE_PROCFS -void ixgbe_procfs_exit(struct ixgbe_adapter *adapter); -int ixgbe_procfs_init(struct ixgbe_adapter *adapter); -int ixgbe_procfs_topdir_init(void); -void ixgbe_procfs_topdir_exit(void); -#endif /* IXGBE_PROCFS */ - -extern struct dcbnl_rtnl_ops dcbnl_ops; -extern int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max); - -extern u8 ixgbe_dcb_txq_to_tc(struct ixgbe_adapter *adapter, u8 index); - -/* needed by ixgbe_main.c */ -extern int ixgbe_validate_mac_addr(u8 *mc_addr); -extern void ixgbe_check_options(struct ixgbe_adapter *adapter); -extern void ixgbe_assign_netdev_ops(struct net_device *netdev); - -/* needed by ixgbe_ethtool.c */ -extern char ixgbe_driver_name[]; -extern const char ixgbe_driver_version[]; - -extern void ixgbe_up(struct ixgbe_adapter *adapter); -extern void ixgbe_down(struct ixgbe_adapter *adapter); -extern void ixgbe_reinit_locked(struct ixgbe_adapter *adapter); -extern void ixgbe_reset(struct ixgbe_adapter *adapter); -extern void ixgbe_set_ethtool_ops(struct net_device *netdev); -extern int ixgbe_setup_rx_resources(struct ixgbe_ring *); -extern int ixgbe_setup_tx_resources(struct ixgbe_ring *); -extern void ixgbe_free_rx_resources(struct ixgbe_ring *); -extern void ixgbe_free_tx_resources(struct ixgbe_ring *); -extern void ixgbe_configure_rx_ring(struct ixgbe_adapter *, - struct ixgbe_ring *); -extern void ixgbe_configure_tx_ring(struct ixgbe_adapter *, - struct ixgbe_ring *); -extern void ixgbe_update_stats(struct ixgbe_adapter *adapter); -extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter); -extern void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter); -extern bool ixgbe_is_ixgbe(struct pci_dev *pcidev); -extern netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, - struct ixgbe_adapter *, - struct ixgbe_ring *); -extern void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *, - struct ixgbe_tx_buffer *); -extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16); -extern void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, - struct ixgbe_ring *); -extern void ixgbe_clear_rscctl(struct ixgbe_adapter *adapter, - struct ixgbe_ring *); -extern void ixgbe_set_rx_mode(struct net_device *netdev); -extern int ixgbe_write_mc_addr_list(struct net_device *netdev); -extern int ixgbe_setup_tc(struct net_device *dev, u8 tc); -#ifdef IXGBE_FCOE -extern void ixgbe_tx_ctxtdesc(struct ixgbe_ring *, u32, u32, u32, u32); -#endif /* IXGBE_FCOE */ -extern void ixgbe_do_reset(struct net_device *netdev); -extern void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector); -extern void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter, - struct ixgbe_ring *); -extern void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter); -extern void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter); -#ifdef ETHTOOL_OPS_COMPAT -extern int ethtool_ioctl(struct ifreq *ifr); -#endif - -#ifdef IXGBE_FCOE -extern void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter); -extern int ixgbe_fso(struct ixgbe_ring *tx_ring, - struct ixgbe_tx_buffer *first, - u8 *hdr_len); -extern void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter); -extern int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb); -extern int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid, - struct scatterlist *sgl, unsigned int sgc); -#ifdef HAVE_NETDEV_OPS_FCOE_DDP_TARGET -extern int ixgbe_fcoe_ddp_target(struct net_device *netdev, u16 xid, - struct scatterlist *sgl, unsigned int sgc); -#endif /* HAVE_NETDEV_OPS_FCOE_DDP_TARGET */ -extern int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid); -#ifdef HAVE_NETDEV_OPS_FCOE_ENABLE -extern int ixgbe_fcoe_enable(struct net_device *netdev); -extern int ixgbe_fcoe_disable(struct net_device *netdev); -#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */ -#ifdef CONFIG_DCB -#ifdef HAVE_DCBNL_OPS_GETAPP -extern u8 ixgbe_fcoe_getapp(struct net_device *netdev); -#endif /* HAVE_DCBNL_OPS_GETAPP */ -extern u8 ixgbe_fcoe_setapp(struct ixgbe_adapter *adapter, u8 up); -#endif /* CONFIG_DCB */ -#ifdef HAVE_NETDEV_OPS_FCOE_GETWWN -extern int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type); -#endif -#endif /* IXGBE_FCOE */ - -#ifdef CONFIG_DCB -#ifdef HAVE_DCBNL_IEEE -s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame); -#endif /* HAVE_DCBNL_IEEE */ -#endif /* CONFIG_DCB */ - -extern void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring); -extern int ixgbe_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd); -extern int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter, - struct net_device *netdev, unsigned int vfn); -extern void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); -extern int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); -extern int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); -extern int ixgbe_available_rars(struct ixgbe_adapter *adapter); -#ifndef HAVE_VLAN_RX_REGISTER -extern void ixgbe_vlan_mode(struct net_device *, u32); -#endif -#ifndef ixgbe_get_netdev_tc_txq -#define ixgbe_get_netdev_tc_txq(dev, tc) (&dev->tc_to_txq[tc]) -#endif -extern void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter); -#endif /* _IXGBE_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c deleted file mode 100644 index 242de671..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c +++ /dev/null @@ -1,1281 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "ixgbe_type.h" -#include "ixgbe_82598.h" -#include "ixgbe_api.h" -#include "ixgbe_common.h" -#include "ixgbe_phy.h" - -static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, - bool *autoneg); -static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw); -static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw, - bool autoneg_wait_to_complete); -static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, bool *link_up, - bool link_up_wait_to_complete); -static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete); -static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete); -static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw); -static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq); -static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw); -static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb, - u32 headroom, int strategy); - -/** - * ixgbe_set_pcie_completion_timeout - set pci-e completion timeout - * @hw: pointer to the HW structure - * - * The defaults for 82598 should be in the range of 50us to 50ms, - * however the hardware default for these parts is 500us to 1ms which is less - * than the 10ms recommended by the pci-e spec. To address this we need to - * increase the value to either 10ms to 250ms for capability version 1 config, - * or 16ms to 55ms for version 2. - **/ -void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw) -{ - u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR); - u16 pcie_devctl2; - - /* only take action if timeout value is defaulted to 0 */ - if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK) - goto out; - - /* - * if capababilities version is type 1 we can write the - * timeout of 10ms to 250ms through the GCR register - */ - if (!(gcr & IXGBE_GCR_CAP_VER2)) { - gcr |= IXGBE_GCR_CMPL_TMOUT_10ms; - goto out; - } - - /* - * for version 2 capabilities we need to write the config space - * directly in order to set the completion timeout value for - * 16ms to 55ms - */ - pcie_devctl2 = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2); - pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms; - IXGBE_WRITE_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2); -out: - /* disable completion timeout resend */ - gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND; - IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr); -} - -/** - * ixgbe_init_ops_82598 - Inits func ptrs and MAC type - * @hw: pointer to hardware structure - * - * Initialize the function pointers and assign the MAC type for 82598. - * Does not touch the hardware. - **/ -s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw) -{ - struct ixgbe_mac_info *mac = &hw->mac; - struct ixgbe_phy_info *phy = &hw->phy; - s32 ret_val; - - ret_val = ixgbe_init_phy_ops_generic(hw); - ret_val = ixgbe_init_ops_generic(hw); - - /* PHY */ - phy->ops.init = &ixgbe_init_phy_ops_82598; - - /* MAC */ - mac->ops.start_hw = &ixgbe_start_hw_82598; - mac->ops.reset_hw = &ixgbe_reset_hw_82598; - mac->ops.get_media_type = &ixgbe_get_media_type_82598; - mac->ops.get_supported_physical_layer = - &ixgbe_get_supported_physical_layer_82598; - mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82598; - mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82598; - mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie_82598; - - /* RAR, Multicast, VLAN */ - mac->ops.set_vmdq = &ixgbe_set_vmdq_82598; - mac->ops.clear_vmdq = &ixgbe_clear_vmdq_82598; - mac->ops.set_vfta = &ixgbe_set_vfta_82598; - mac->ops.set_vlvf = NULL; - mac->ops.clear_vfta = &ixgbe_clear_vfta_82598; - - /* Flow Control */ - mac->ops.fc_enable = &ixgbe_fc_enable_82598; - - mac->mcft_size = 128; - mac->vft_size = 128; - mac->num_rar_entries = 16; - mac->rx_pb_size = 512; - mac->max_tx_queues = 32; - mac->max_rx_queues = 64; - mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw); - - /* SFP+ Module */ - phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_82598; - - /* Link */ - mac->ops.check_link = &ixgbe_check_mac_link_82598; - mac->ops.setup_link = &ixgbe_setup_mac_link_82598; - mac->ops.flap_tx_laser = NULL; - mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82598; - mac->ops.setup_rxpba = &ixgbe_set_rxpba_82598; - - /* Manageability interface */ - mac->ops.set_fw_drv_ver = NULL; - - return ret_val; -} - -/** - * ixgbe_init_phy_ops_82598 - PHY/SFP specific init - * @hw: pointer to hardware structure - * - * Initialize any function pointers that were not able to be - * set during init_shared_code because the PHY/SFP type was - * not known. Perform the SFP init if necessary. - * - **/ -s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw) -{ - struct ixgbe_mac_info *mac = &hw->mac; - struct ixgbe_phy_info *phy = &hw->phy; - s32 ret_val = 0; - u16 list_offset, data_offset; - - /* Identify the PHY */ - phy->ops.identify(hw); - - /* Overwrite the link function pointers if copper PHY */ - if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) { - mac->ops.setup_link = &ixgbe_setup_copper_link_82598; - mac->ops.get_link_capabilities = - &ixgbe_get_copper_link_capabilities_generic; - } - - switch (hw->phy.type) { - case ixgbe_phy_tn: - phy->ops.setup_link = &ixgbe_setup_phy_link_tnx; - phy->ops.check_link = &ixgbe_check_phy_link_tnx; - phy->ops.get_firmware_version = - &ixgbe_get_phy_firmware_version_tnx; - break; - case ixgbe_phy_nl: - phy->ops.reset = &ixgbe_reset_phy_nl; - - /* Call SFP+ identify routine to get the SFP+ module type */ - ret_val = phy->ops.identify_sfp(hw); - if (ret_val != 0) - goto out; - else if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) { - ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED; - goto out; - } - - /* Check to see if SFP+ module is supported */ - ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, - &list_offset, - &data_offset); - if (ret_val != 0) { - ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED; - goto out; - } - break; - default: - break; - } - -out: - return ret_val; -} - -/** - * ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx - * @hw: pointer to hardware structure - * - * Starts the hardware using the generic start_hw function. - * Disables relaxed ordering Then set pcie completion timeout - * - **/ -s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) -{ - u32 regval; - u32 i; - s32 ret_val = 0; - - ret_val = ixgbe_start_hw_generic(hw); - - /* Disable relaxed ordering */ - for (i = 0; ((i < hw->mac.max_tx_queues) && - (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { - regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); - regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval); - } - - for (i = 0; ((i < hw->mac.max_rx_queues) && - (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { - regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); - regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | - IXGBE_DCA_RXCTRL_HEAD_WRO_EN); - IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); - } - - /* set the completion timeout for interface */ - if (ret_val == 0) - ixgbe_set_pcie_completion_timeout(hw); - - return ret_val; -} - -/** - * ixgbe_get_link_capabilities_82598 - Determines link capabilities - * @hw: pointer to hardware structure - * @speed: pointer to link speed - * @autoneg: boolean auto-negotiation value - * - * Determines the link capabilities by reading the AUTOC register. - **/ -static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, - bool *autoneg) -{ - s32 status = 0; - u32 autoc = 0; - - /* - * Determine link capabilities based on the stored value of AUTOC, - * which represents EEPROM defaults. If AUTOC value has not been - * stored, use the current register value. - */ - if (hw->mac.orig_link_settings_stored) - autoc = hw->mac.orig_autoc; - else - autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); - - switch (autoc & IXGBE_AUTOC_LMS_MASK) { - case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: - *speed = IXGBE_LINK_SPEED_1GB_FULL; - *autoneg = false; - break; - - case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: - *speed = IXGBE_LINK_SPEED_10GB_FULL; - *autoneg = false; - break; - - case IXGBE_AUTOC_LMS_1G_AN: - *speed = IXGBE_LINK_SPEED_1GB_FULL; - *autoneg = true; - break; - - case IXGBE_AUTOC_LMS_KX4_AN: - case IXGBE_AUTOC_LMS_KX4_AN_1G_AN: - *speed = IXGBE_LINK_SPEED_UNKNOWN; - if (autoc & IXGBE_AUTOC_KX4_SUPP) - *speed |= IXGBE_LINK_SPEED_10GB_FULL; - if (autoc & IXGBE_AUTOC_KX_SUPP) - *speed |= IXGBE_LINK_SPEED_1GB_FULL; - *autoneg = true; - break; - - default: - status = IXGBE_ERR_LINK_SETUP; - break; - } - - return status; -} - -/** - * ixgbe_get_media_type_82598 - Determines media type - * @hw: pointer to hardware structure - * - * Returns the media type (fiber, copper, backplane) - **/ -static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw) -{ - enum ixgbe_media_type media_type; - - /* Detect if there is a copper PHY attached. */ - switch (hw->phy.type) { - case ixgbe_phy_cu_unknown: - case ixgbe_phy_tn: - media_type = ixgbe_media_type_copper; - goto out; - default: - break; - } - - /* Media type for I82598 is based on device ID */ - switch (hw->device_id) { - case IXGBE_DEV_ID_82598: - case IXGBE_DEV_ID_82598_BX: - /* Default device ID is mezzanine card KX/KX4 */ - media_type = ixgbe_media_type_backplane; - break; - case IXGBE_DEV_ID_82598AF_DUAL_PORT: - case IXGBE_DEV_ID_82598AF_SINGLE_PORT: - case IXGBE_DEV_ID_82598_DA_DUAL_PORT: - case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM: - case IXGBE_DEV_ID_82598EB_XF_LR: - case IXGBE_DEV_ID_82598EB_SFP_LOM: - media_type = ixgbe_media_type_fiber; - break; - case IXGBE_DEV_ID_82598EB_CX4: - case IXGBE_DEV_ID_82598_CX4_DUAL_PORT: - media_type = ixgbe_media_type_cx4; - break; - case IXGBE_DEV_ID_82598AT: - case IXGBE_DEV_ID_82598AT2: - media_type = ixgbe_media_type_copper; - break; - default: - media_type = ixgbe_media_type_unknown; - break; - } -out: - return media_type; -} - -/** - * ixgbe_fc_enable_82598 - Enable flow control - * @hw: pointer to hardware structure - * - * Enable flow control according to the current settings. - **/ -s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) -{ - s32 ret_val = 0; - u32 fctrl_reg; - u32 rmcs_reg; - u32 reg; - u32 fcrtl, fcrth; - u32 link_speed = 0; - int i; - bool link_up; - - /* Validate the water mark configuration */ - if (!hw->fc.pause_time) { - ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; - goto out; - } - - /* Low water mark of zero causes XOFF floods */ - for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { - if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && - hw->fc.high_water[i]) { - if (!hw->fc.low_water[i] || - hw->fc.low_water[i] >= hw->fc.high_water[i]) { - hw_dbg(hw, "Invalid water mark configuration\n"); - ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; - goto out; - } - } - } - - /* - * On 82598 having Rx FC on causes resets while doing 1G - * so if it's on turn it off once we know link_speed. For - * more details see 82598 Specification update. - */ - hw->mac.ops.check_link(hw, &link_speed, &link_up, false); - if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) { - switch (hw->fc.requested_mode) { - case ixgbe_fc_full: - hw->fc.requested_mode = ixgbe_fc_tx_pause; - break; - case ixgbe_fc_rx_pause: - hw->fc.requested_mode = ixgbe_fc_none; - break; - default: - /* no change */ - break; - } - } - - /* Negotiate the fc mode to use */ - ixgbe_fc_autoneg(hw); - - /* Disable any previous flow control settings */ - fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL); - fctrl_reg &= ~(IXGBE_FCTRL_RFCE | IXGBE_FCTRL_RPFCE); - - rmcs_reg = IXGBE_READ_REG(hw, IXGBE_RMCS); - rmcs_reg &= ~(IXGBE_RMCS_TFCE_PRIORITY | IXGBE_RMCS_TFCE_802_3X); - - /* - * The possible values of fc.current_mode are: - * 0: Flow control is completely disabled - * 1: Rx flow control is enabled (we can receive pause frames, - * but not send pause frames). - * 2: Tx flow control is enabled (we can send pause frames but - * we do not support receiving pause frames). - * 3: Both Rx and Tx flow control (symmetric) are enabled. - * other: Invalid. - */ - switch (hw->fc.current_mode) { - case ixgbe_fc_none: - /* - * Flow control is disabled by software override or autoneg. - * The code below will actually disable it in the HW. - */ - break; - case ixgbe_fc_rx_pause: - /* - * Rx Flow control is enabled and Tx Flow control is - * disabled by software override. Since there really - * isn't a way to advertise that we are capable of RX - * Pause ONLY, we will advertise that we support both - * symmetric and asymmetric Rx PAUSE. Later, we will - * disable the adapter's ability to send PAUSE frames. - */ - fctrl_reg |= IXGBE_FCTRL_RFCE; - break; - case ixgbe_fc_tx_pause: - /* - * Tx Flow control is enabled, and Rx Flow control is - * disabled by software override. - */ - rmcs_reg |= IXGBE_RMCS_TFCE_802_3X; - break; - case ixgbe_fc_full: - /* Flow control (both Rx and Tx) is enabled by SW override. */ - fctrl_reg |= IXGBE_FCTRL_RFCE; - rmcs_reg |= IXGBE_RMCS_TFCE_802_3X; - break; - default: - hw_dbg(hw, "Flow control param set incorrectly\n"); - ret_val = IXGBE_ERR_CONFIG; - goto out; - break; - } - - /* Set 802.3x based flow control settings. */ - fctrl_reg |= IXGBE_FCTRL_DPF; - IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl_reg); - IXGBE_WRITE_REG(hw, IXGBE_RMCS, rmcs_reg); - - /* Set up and enable Rx high/low water mark thresholds, enable XON. */ - for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { - if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && - hw->fc.high_water[i]) { - fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; - fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; - IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl); - IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), fcrth); - } else { - IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), 0); - IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), 0); - } - - } - - /* Configure pause time (2 TCs per register) */ - reg = hw->fc.pause_time * 0x00010001; - for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++) - IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg); - - /* Configure flow control refresh threshold value */ - IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2); - -out: - return ret_val; -} - -/** - * ixgbe_start_mac_link_82598 - Configures MAC link settings - * @hw: pointer to hardware structure - * - * Configures link settings based on values in the ixgbe_hw struct. - * Restarts the link. Performs autonegotiation if needed. - **/ -static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw, - bool autoneg_wait_to_complete) -{ - u32 autoc_reg; - u32 links_reg; - u32 i; - s32 status = 0; - - /* Restart link */ - autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); - autoc_reg |= IXGBE_AUTOC_AN_RESTART; - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); - - /* Only poll for autoneg to complete if specified to do so */ - if (autoneg_wait_to_complete) { - if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) == - IXGBE_AUTOC_LMS_KX4_AN || - (autoc_reg & IXGBE_AUTOC_LMS_MASK) == - IXGBE_AUTOC_LMS_KX4_AN_1G_AN) { - links_reg = 0; /* Just in case Autoneg time = 0 */ - for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) { - links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); - if (links_reg & IXGBE_LINKS_KX_AN_COMP) - break; - msleep(100); - } - if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; - hw_dbg(hw, "Autonegotiation did not complete.\n"); - } - } - } - - /* Add delay to filter out noises during initial link setup */ - msleep(50); - - return status; -} - -/** - * ixgbe_validate_link_ready - Function looks for phy link - * @hw: pointer to hardware structure - * - * Function indicates success when phy link is available. If phy is not ready - * within 5 seconds of MAC indicating link, the function returns error. - **/ -static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw) -{ - u32 timeout; - u16 an_reg; - - if (hw->device_id != IXGBE_DEV_ID_82598AT2) - return 0; - - for (timeout = 0; - timeout < IXGBE_VALIDATE_LINK_READY_TIMEOUT; timeout++) { - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &an_reg); - - if ((an_reg & IXGBE_MII_AUTONEG_COMPLETE) && - (an_reg & IXGBE_MII_AUTONEG_LINK_UP)) - break; - - msleep(100); - } - - if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) { - hw_dbg(hw, "Link was indicated but link is down\n"); - return IXGBE_ERR_LINK_SETUP; - } - - return 0; -} - -/** - * ixgbe_check_mac_link_82598 - Get link/speed status - * @hw: pointer to hardware structure - * @speed: pointer to link speed - * @link_up: true is link is up, false otherwise - * @link_up_wait_to_complete: bool used to wait for link up or not - * - * Reads the links register to determine if link is up and the current speed - **/ -static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, bool *link_up, - bool link_up_wait_to_complete) -{ - u32 links_reg; - u32 i; - u16 link_reg, adapt_comp_reg; - - /* - * SERDES PHY requires us to read link status from undocumented - * register 0xC79F. Bit 0 set indicates link is up/ready; clear - * indicates link down. OxC00C is read to check that the XAUI lanes - * are active. Bit 0 clear indicates active; set indicates inactive. - */ - if (hw->phy.type == ixgbe_phy_nl) { - hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg); - hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg); - hw->phy.ops.read_reg(hw, 0xC00C, IXGBE_TWINAX_DEV, - &adapt_comp_reg); - if (link_up_wait_to_complete) { - for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { - if ((link_reg & 1) && - ((adapt_comp_reg & 1) == 0)) { - *link_up = true; - break; - } else { - *link_up = false; - } - msleep(100); - hw->phy.ops.read_reg(hw, 0xC79F, - IXGBE_TWINAX_DEV, - &link_reg); - hw->phy.ops.read_reg(hw, 0xC00C, - IXGBE_TWINAX_DEV, - &adapt_comp_reg); - } - } else { - if ((link_reg & 1) && ((adapt_comp_reg & 1) == 0)) - *link_up = true; - else - *link_up = false; - } - - if (*link_up == false) - goto out; - } - - links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); - if (link_up_wait_to_complete) { - for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { - if (links_reg & IXGBE_LINKS_UP) { - *link_up = true; - break; - } else { - *link_up = false; - } - msleep(100); - links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); - } - } else { - if (links_reg & IXGBE_LINKS_UP) - *link_up = true; - else - *link_up = false; - } - - if (links_reg & IXGBE_LINKS_SPEED) - *speed = IXGBE_LINK_SPEED_10GB_FULL; - else - *speed = IXGBE_LINK_SPEED_1GB_FULL; - - if ((hw->device_id == IXGBE_DEV_ID_82598AT2) && (*link_up == true) && - (ixgbe_validate_link_ready(hw) != 0)) - *link_up = false; - -out: - return 0; -} - -/** - * ixgbe_setup_mac_link_82598 - Set MAC link speed - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - * @autoneg_wait_to_complete: true when waiting for completion is needed - * - * Set the link speed in the AUTOC register and restarts link. - **/ -static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw, - ixgbe_link_speed speed, bool autoneg, - bool autoneg_wait_to_complete) -{ - s32 status = 0; - ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN; - u32 curr_autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); - u32 autoc = curr_autoc; - u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK; - - /* Check to see if speed passed in is supported. */ - ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg); - speed &= link_capabilities; - - if (speed == IXGBE_LINK_SPEED_UNKNOWN) - status = IXGBE_ERR_LINK_SETUP; - - /* Set KX4/KX support according to speed requested */ - else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN || - link_mode == IXGBE_AUTOC_LMS_KX4_AN_1G_AN) { - autoc &= ~IXGBE_AUTOC_KX4_KX_SUPP_MASK; - if (speed & IXGBE_LINK_SPEED_10GB_FULL) - autoc |= IXGBE_AUTOC_KX4_SUPP; - if (speed & IXGBE_LINK_SPEED_1GB_FULL) - autoc |= IXGBE_AUTOC_KX_SUPP; - if (autoc != curr_autoc) - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc); - } - - if (status == 0) { - /* - * Setup and restart the link based on the new values in - * ixgbe_hw This will write the AUTOC register based on the new - * stored values - */ - status = ixgbe_start_mac_link_82598(hw, - autoneg_wait_to_complete); - } - - return status; -} - - -/** - * ixgbe_setup_copper_link_82598 - Set the PHY autoneg advertised field - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - * @autoneg_wait_to_complete: true if waiting is needed to complete - * - * Sets the link speed in the AUTOC register in the MAC and restarts link. - **/ -static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete) -{ - s32 status; - - /* Setup the PHY according to input speed */ - status = hw->phy.ops.setup_link_speed(hw, speed, autoneg, - autoneg_wait_to_complete); - /* Set up MAC */ - ixgbe_start_mac_link_82598(hw, autoneg_wait_to_complete); - - return status; -} - -/** - * ixgbe_reset_hw_82598 - Performs hardware reset - * @hw: pointer to hardware structure - * - * Resets the hardware by resetting the transmit and receive units, masks and - * clears all interrupts, performing a PHY reset, and performing a link (MAC) - * reset. - **/ -static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw) -{ - s32 status = 0; - s32 phy_status = 0; - u32 ctrl; - u32 gheccr; - u32 i; - u32 autoc; - u8 analog_val; - - /* Call adapter stop to disable tx/rx and clear interrupts */ - status = hw->mac.ops.stop_adapter(hw); - if (status != 0) - goto reset_hw_out; - - /* - * Power up the Atlas Tx lanes if they are currently powered down. - * Atlas Tx lanes are powered down for MAC loopback tests, but - * they are not automatically restored on reset. - */ - hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &analog_val); - if (analog_val & IXGBE_ATLAS_PDN_TX_REG_EN) { - /* Enable Tx Atlas so packets can be transmitted again */ - hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, - &analog_val); - analog_val &= ~IXGBE_ATLAS_PDN_TX_REG_EN; - hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, - analog_val); - - hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, - &analog_val); - analog_val &= ~IXGBE_ATLAS_PDN_TX_10G_QL_ALL; - hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, - analog_val); - - hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, - &analog_val); - analog_val &= ~IXGBE_ATLAS_PDN_TX_1G_QL_ALL; - hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, - analog_val); - - hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, - &analog_val); - analog_val &= ~IXGBE_ATLAS_PDN_TX_AN_QL_ALL; - hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, - analog_val); - } - - /* Reset PHY */ - if (hw->phy.reset_disable == false) { - /* PHY ops must be identified and initialized prior to reset */ - - /* Init PHY and function pointers, perform SFP setup */ - phy_status = hw->phy.ops.init(hw); - if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED) - goto reset_hw_out; - if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT) - goto mac_reset_top; - - hw->phy.ops.reset(hw); - } - -mac_reset_top: - /* - * Issue global reset to the MAC. This needs to be a SW reset. - * If link reset is used, it might reset the MAC when mng is using it - */ - ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST; - IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); - IXGBE_WRITE_FLUSH(hw); - - /* Poll for reset bit to self-clear indicating reset is complete */ - for (i = 0; i < 10; i++) { - udelay(1); - ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); - if (!(ctrl & IXGBE_CTRL_RST)) - break; - } - if (ctrl & IXGBE_CTRL_RST) { - status = IXGBE_ERR_RESET_FAILED; - hw_dbg(hw, "Reset polling failed to complete.\n"); - } - - msleep(50); - - /* - * Double resets are required for recovery from certain error - * conditions. Between resets, it is necessary to stall to allow time - * for any pending HW events to complete. - */ - if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { - hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; - goto mac_reset_top; - } - - gheccr = IXGBE_READ_REG(hw, IXGBE_GHECCR); - gheccr &= ~((1 << 21) | (1 << 18) | (1 << 9) | (1 << 6)); - IXGBE_WRITE_REG(hw, IXGBE_GHECCR, gheccr); - - /* - * Store the original AUTOC value if it has not been - * stored off yet. Otherwise restore the stored original - * AUTOC value since the reset operation sets back to deaults. - */ - autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); - if (hw->mac.orig_link_settings_stored == false) { - hw->mac.orig_autoc = autoc; - hw->mac.orig_link_settings_stored = true; - } else if (autoc != hw->mac.orig_autoc) { - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc); - } - - /* Store the permanent mac address */ - hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); - - /* - * Store MAC address from RAR0, clear receive address registers, and - * clear the multicast table - */ - hw->mac.ops.init_rx_addrs(hw); - -reset_hw_out: - if (phy_status != 0) - status = phy_status; - - return status; -} - -/** - * ixgbe_set_vmdq_82598 - Associate a VMDq set index with a rx address - * @hw: pointer to hardware struct - * @rar: receive address register index to associate with a VMDq index - * @vmdq: VMDq set index - **/ -s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) -{ - u32 rar_high; - u32 rar_entries = hw->mac.num_rar_entries; - - /* Make sure we are using a valid rar index range */ - if (rar >= rar_entries) { - hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; - } - - rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); - rar_high &= ~IXGBE_RAH_VIND_MASK; - rar_high |= ((vmdq << IXGBE_RAH_VIND_SHIFT) & IXGBE_RAH_VIND_MASK); - IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high); - return 0; -} - -/** - * ixgbe_clear_vmdq_82598 - Disassociate a VMDq set index from an rx address - * @hw: pointer to hardware struct - * @rar: receive address register index to associate with a VMDq index - * @vmdq: VMDq clear index (not used in 82598, but elsewhere) - **/ -static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) -{ - u32 rar_high; - u32 rar_entries = hw->mac.num_rar_entries; - - - /* Make sure we are using a valid rar index range */ - if (rar >= rar_entries) { - hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; - } - - rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); - if (rar_high & IXGBE_RAH_VIND_MASK) { - rar_high &= ~IXGBE_RAH_VIND_MASK; - IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high); - } - - return 0; -} - -/** - * ixgbe_set_vfta_82598 - Set VLAN filter table - * @hw: pointer to hardware structure - * @vlan: VLAN id to write to VLAN filter - * @vind: VMDq output index that maps queue to VLAN id in VFTA - * @vlan_on: boolean flag to turn on/off VLAN in VFTA - * - * Turn on/off specified VLAN in the VLAN filter table. - **/ -s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, - bool vlan_on) -{ - u32 regindex; - u32 bitindex; - u32 bits; - u32 vftabyte; - - if (vlan > 4095) - return IXGBE_ERR_PARAM; - - /* Determine 32-bit word position in array */ - regindex = (vlan >> 5) & 0x7F; /* upper seven bits */ - - /* Determine the location of the (VMD) queue index */ - vftabyte = ((vlan >> 3) & 0x03); /* bits (4:3) indicating byte array */ - bitindex = (vlan & 0x7) << 2; /* lower 3 bits indicate nibble */ - - /* Set the nibble for VMD queue index */ - bits = IXGBE_READ_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex)); - bits &= (~(0x0F << bitindex)); - bits |= (vind << bitindex); - IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex), bits); - - /* Determine the location of the bit for this VLAN id */ - bitindex = vlan & 0x1F; /* lower five bits */ - - bits = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); - if (vlan_on) - /* Turn on this VLAN id */ - bits |= (1 << bitindex); - else - /* Turn off this VLAN id */ - bits &= ~(1 << bitindex); - IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), bits); - - return 0; -} - -/** - * ixgbe_clear_vfta_82598 - Clear VLAN filter table - * @hw: pointer to hardware structure - * - * Clears the VLAN filer table, and the VMDq index associated with the filter - **/ -static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw) -{ - u32 offset; - u32 vlanbyte; - - for (offset = 0; offset < hw->mac.vft_size; offset++) - IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0); - - for (vlanbyte = 0; vlanbyte < 4; vlanbyte++) - for (offset = 0; offset < hw->mac.vft_size; offset++) - IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vlanbyte, offset), - 0); - - return 0; -} - -/** - * ixgbe_read_analog_reg8_82598 - Reads 8 bit Atlas analog register - * @hw: pointer to hardware structure - * @reg: analog register to read - * @val: read value - * - * Performs read operation to Atlas analog register specified. - **/ -s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val) -{ - u32 atlas_ctl; - - IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, - IXGBE_ATLASCTL_WRITE_CMD | (reg << 8)); - IXGBE_WRITE_FLUSH(hw); - udelay(10); - atlas_ctl = IXGBE_READ_REG(hw, IXGBE_ATLASCTL); - *val = (u8)atlas_ctl; - - return 0; -} - -/** - * ixgbe_write_analog_reg8_82598 - Writes 8 bit Atlas analog register - * @hw: pointer to hardware structure - * @reg: atlas register to write - * @val: value to write - * - * Performs write operation to Atlas analog register specified. - **/ -s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val) -{ - u32 atlas_ctl; - - atlas_ctl = (reg << 8) | val; - IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, atlas_ctl); - IXGBE_WRITE_FLUSH(hw); - udelay(10); - - return 0; -} - -/** - * ixgbe_read_i2c_eeprom_82598 - Reads 8 bit word over I2C interface. - * @hw: pointer to hardware structure - * @byte_offset: EEPROM byte offset to read - * @eeprom_data: value read - * - * Performs 8 byte read operation to SFP module's EEPROM over I2C interface. - **/ -s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset, - u8 *eeprom_data) -{ - s32 status = 0; - u16 sfp_addr = 0; - u16 sfp_data = 0; - u16 sfp_stat = 0; - u32 i; - - if (hw->phy.type == ixgbe_phy_nl) { - /* - * NetLogic phy SDA/SCL registers are at addresses 0xC30A to - * 0xC30D. These registers are used to talk to the SFP+ - * module's EEPROM through the SDA/SCL (I2C) interface. - */ - sfp_addr = (IXGBE_I2C_EEPROM_DEV_ADDR << 8) + byte_offset; - sfp_addr = (sfp_addr | IXGBE_I2C_EEPROM_READ_MASK); - hw->phy.ops.write_reg(hw, - IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, - sfp_addr); - - /* Poll status */ - for (i = 0; i < 100; i++) { - hw->phy.ops.read_reg(hw, - IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, - &sfp_stat); - sfp_stat = sfp_stat & IXGBE_I2C_EEPROM_STATUS_MASK; - if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS) - break; - msleep(10); - } - - if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) { - hw_dbg(hw, "EEPROM read did not pass.\n"); - status = IXGBE_ERR_SFP_NOT_PRESENT; - goto out; - } - - /* Read data */ - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, &sfp_data); - - *eeprom_data = (u8)(sfp_data >> 8); - } else { - status = IXGBE_ERR_PHY; - goto out; - } - -out: - return status; -} - -/** - * ixgbe_get_supported_physical_layer_82598 - Returns physical layer type - * @hw: pointer to hardware structure - * - * Determines physical layer capabilities of the current configuration. - **/ -u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw) -{ - u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; - u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); - u32 pma_pmd_10g = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK; - u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; - u16 ext_ability = 0; - - hw->phy.ops.identify(hw); - - /* Copper PHY must be checked before AUTOC LMS to determine correct - * physical layer because 10GBase-T PHYs use LMS = KX4/KX */ - switch (hw->phy.type) { - case ixgbe_phy_tn: - case ixgbe_phy_cu_unknown: - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); - if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY) - physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; - if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) - physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; - if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY) - physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; - goto out; - default: - break; - } - - switch (autoc & IXGBE_AUTOC_LMS_MASK) { - case IXGBE_AUTOC_LMS_1G_AN: - case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: - if (pma_pmd_1g == IXGBE_AUTOC_1G_KX) - physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX; - else - physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_BX; - break; - case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: - if (pma_pmd_10g == IXGBE_AUTOC_10G_CX4) - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4; - else if (pma_pmd_10g == IXGBE_AUTOC_10G_KX4) - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4; - else /* XAUI */ - physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; - break; - case IXGBE_AUTOC_LMS_KX4_AN: - case IXGBE_AUTOC_LMS_KX4_AN_1G_AN: - if (autoc & IXGBE_AUTOC_KX_SUPP) - physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX; - if (autoc & IXGBE_AUTOC_KX4_SUPP) - physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4; - break; - default: - break; - } - - if (hw->phy.type == ixgbe_phy_nl) { - hw->phy.ops.identify_sfp(hw); - - switch (hw->phy.sfp_type) { - case ixgbe_sfp_type_da_cu: - physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; - break; - case ixgbe_sfp_type_sr: - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR; - break; - case ixgbe_sfp_type_lr: - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR; - break; - default: - physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; - break; - } - } - - switch (hw->device_id) { - case IXGBE_DEV_ID_82598_DA_DUAL_PORT: - physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; - break; - case IXGBE_DEV_ID_82598AF_DUAL_PORT: - case IXGBE_DEV_ID_82598AF_SINGLE_PORT: - case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM: - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR; - break; - case IXGBE_DEV_ID_82598EB_XF_LR: - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR; - break; - default: - break; - } - -out: - return physical_layer; -} - -/** - * ixgbe_set_lan_id_multi_port_pcie_82598 - Set LAN id for PCIe multiple - * port devices. - * @hw: pointer to the HW structure - * - * Calls common function and corrects issue with some single port devices - * that enable LAN1 but not LAN0. - **/ -void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw) -{ - struct ixgbe_bus_info *bus = &hw->bus; - u16 pci_gen = 0; - u16 pci_ctrl2 = 0; - - ixgbe_set_lan_id_multi_port_pcie(hw); - - /* check if LAN0 is disabled */ - hw->eeprom.ops.read(hw, IXGBE_PCIE_GENERAL_PTR, &pci_gen); - if ((pci_gen != 0) && (pci_gen != 0xFFFF)) { - - hw->eeprom.ops.read(hw, pci_gen + IXGBE_PCIE_CTRL2, &pci_ctrl2); - - /* if LAN0 is completely disabled force function to 0 */ - if ((pci_ctrl2 & IXGBE_PCIE_CTRL2_LAN_DISABLE) && - !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DISABLE_SELECT) && - !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DUMMY_ENABLE)) { - - bus->func = 0; - } - } -} - -/** - * ixgbe_set_rxpba_82598 - Initialize RX packet buffer - * @hw: pointer to hardware structure - * @num_pb: number of packet buffers to allocate - * @headroom: reserve n KB of headroom - * @strategy: packet buffer allocation strategy - **/ -static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb, - u32 headroom, int strategy) -{ - u32 rxpktsize = IXGBE_RXPBSIZE_64KB; - u8 i = 0; - - if (!num_pb) - return; - - /* Setup Rx packet buffer sizes */ - switch (strategy) { - case PBA_STRATEGY_WEIGHTED: - /* Setup the first four at 80KB */ - rxpktsize = IXGBE_RXPBSIZE_80KB; - for (; i < 4; i++) - IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); - /* Setup the last four at 48KB...don't re-init i */ - rxpktsize = IXGBE_RXPBSIZE_48KB; - /* Fall Through */ - case PBA_STRATEGY_EQUAL: - default: - /* Divide the remaining Rx packet buffer evenly among the TCs */ - for (; i < IXGBE_MAX_PACKET_BUFFERS; i++) - IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); - break; - } - - /* Setup Tx packet buffer sizes */ - for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) - IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), IXGBE_TXPBSIZE_40KB); - - return; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h deleted file mode 100644 index 9a8c670a..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_82598_H_ -#define _IXGBE_82598_H_ - -u32 ixgbe_get_pcie_msix_count_82598(struct ixgbe_hw *hw); -s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw); -s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw); -s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq); -s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on); -s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val); -s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val); -s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset, - u8 *eeprom_data); -u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw); -s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw); -void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw); -void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw); -#endif /* _IXGBE_82598_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c deleted file mode 100644 index 3f159123..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c +++ /dev/null @@ -1,2299 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "ixgbe_type.h" -#include "ixgbe_82599.h" -#include "ixgbe_api.h" -#include "ixgbe_common.h" -#include "ixgbe_phy.h" - -static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete); -static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw); -static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw, - u16 offset, u16 *data); -static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data); -static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, - u8 dev_addr, u8 *data); -static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, - u8 dev_addr, u8 data); - -void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw) -{ - struct ixgbe_mac_info *mac = &hw->mac; - - /* enable the laser control functions for SFP+ fiber */ - if (mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) { - mac->ops.disable_tx_laser = - &ixgbe_disable_tx_laser_multispeed_fiber; - mac->ops.enable_tx_laser = - &ixgbe_enable_tx_laser_multispeed_fiber; - mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber; - - } else { - mac->ops.disable_tx_laser = NULL; - mac->ops.enable_tx_laser = NULL; - mac->ops.flap_tx_laser = NULL; - } - - if (hw->phy.multispeed_fiber) { - /* Set up dual speed SFP+ support */ - mac->ops.setup_link = &ixgbe_setup_mac_link_multispeed_fiber; - } else { - if ((ixgbe_get_media_type(hw) == ixgbe_media_type_backplane) && - (hw->phy.smart_speed == ixgbe_smart_speed_auto || - hw->phy.smart_speed == ixgbe_smart_speed_on) && - !ixgbe_verify_lesm_fw_enabled_82599(hw)) { - mac->ops.setup_link = &ixgbe_setup_mac_link_smartspeed; - } else { - mac->ops.setup_link = &ixgbe_setup_mac_link_82599; - } - } -} - -/** - * ixgbe_init_phy_ops_82599 - PHY/SFP specific init - * @hw: pointer to hardware structure - * - * Initialize any function pointers that were not able to be - * set during init_shared_code because the PHY/SFP type was - * not known. Perform the SFP init if necessary. - * - **/ -s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw) -{ - struct ixgbe_mac_info *mac = &hw->mac; - struct ixgbe_phy_info *phy = &hw->phy; - s32 ret_val = 0; - u32 esdp; - - if (hw->device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) { - /* Store flag indicating I2C bus access control unit. */ - hw->phy.qsfp_shared_i2c_bus = TRUE; - - /* Initialize access to QSFP+ I2C bus */ - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - esdp |= IXGBE_ESDP_SDP0_DIR; - esdp &= ~IXGBE_ESDP_SDP1_DIR; - esdp &= ~IXGBE_ESDP_SDP0; - esdp &= ~IXGBE_ESDP_SDP0_NATIVE; - esdp &= ~IXGBE_ESDP_SDP1_NATIVE; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); - IXGBE_WRITE_FLUSH(hw); - - phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_82599; - phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_82599; - } - /* Identify the PHY or SFP module */ - ret_val = phy->ops.identify(hw); - if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED) - goto init_phy_ops_out; - - /* Setup function pointers based on detected SFP module and speeds */ - ixgbe_init_mac_link_ops_82599(hw); - if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) - hw->phy.ops.reset = NULL; - - /* If copper media, overwrite with copper function pointers */ - if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) { - mac->ops.setup_link = &ixgbe_setup_copper_link_82599; - mac->ops.get_link_capabilities = - &ixgbe_get_copper_link_capabilities_generic; - } - - /* Set necessary function pointers based on phy type */ - switch (hw->phy.type) { - case ixgbe_phy_tn: - phy->ops.setup_link = &ixgbe_setup_phy_link_tnx; - phy->ops.check_link = &ixgbe_check_phy_link_tnx; - phy->ops.get_firmware_version = - &ixgbe_get_phy_firmware_version_tnx; - break; - default: - break; - } -init_phy_ops_out: - return ret_val; -} - -s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) -{ - s32 ret_val = 0; - u32 reg_anlp1 = 0; - u32 i = 0; - u16 list_offset, data_offset, data_value; - - if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) { - ixgbe_init_mac_link_ops_82599(hw); - - hw->phy.ops.reset = NULL; - - ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset, - &data_offset); - if (ret_val != 0) - goto setup_sfp_out; - - /* PHY config will finish before releasing the semaphore */ - ret_val = hw->mac.ops.acquire_swfw_sync(hw, - IXGBE_GSSR_MAC_CSR_SM); - if (ret_val != 0) { - ret_val = IXGBE_ERR_SWFW_SYNC; - goto setup_sfp_out; - } - - hw->eeprom.ops.read(hw, ++data_offset, &data_value); - while (data_value != 0xffff) { - IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value); - IXGBE_WRITE_FLUSH(hw); - hw->eeprom.ops.read(hw, ++data_offset, &data_value); - } - - /* Release the semaphore */ - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); - /* Delay obtaining semaphore again to allow FW access */ - msleep(hw->eeprom.semaphore_delay); - - /* Now restart DSP by setting Restart_AN and clearing LMS */ - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, ((IXGBE_READ_REG(hw, - IXGBE_AUTOC) & ~IXGBE_AUTOC_LMS_MASK) | - IXGBE_AUTOC_AN_RESTART)); - - /* Wait for AN to leave state 0 */ - for (i = 0; i < 10; i++) { - msleep(4); - reg_anlp1 = IXGBE_READ_REG(hw, IXGBE_ANLP1); - if (reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK) - break; - } - if (!(reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)) { - hw_dbg(hw, "sfp module setup not complete\n"); - ret_val = IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; - goto setup_sfp_out; - } - - /* Restart DSP by setting Restart_AN and return to SFI mode */ - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (IXGBE_READ_REG(hw, - IXGBE_AUTOC) | IXGBE_AUTOC_LMS_10G_SERIAL | - IXGBE_AUTOC_AN_RESTART)); - } - -setup_sfp_out: - return ret_val; -} - -/** - * ixgbe_init_ops_82599 - Inits func ptrs and MAC type - * @hw: pointer to hardware structure - * - * Initialize the function pointers and assign the MAC type for 82599. - * Does not touch the hardware. - **/ - -s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw) -{ - struct ixgbe_mac_info *mac = &hw->mac; - struct ixgbe_phy_info *phy = &hw->phy; - struct ixgbe_eeprom_info *eeprom = &hw->eeprom; - s32 ret_val; - - ixgbe_init_phy_ops_generic(hw); - ret_val = ixgbe_init_ops_generic(hw); - - /* PHY */ - phy->ops.identify = &ixgbe_identify_phy_82599; - phy->ops.init = &ixgbe_init_phy_ops_82599; - - /* MAC */ - mac->ops.reset_hw = &ixgbe_reset_hw_82599; - mac->ops.get_media_type = &ixgbe_get_media_type_82599; - mac->ops.get_supported_physical_layer = - &ixgbe_get_supported_physical_layer_82599; - mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic; - mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic; - mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_82599; - mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82599; - mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82599; - mac->ops.start_hw = &ixgbe_start_hw_82599; - mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic; - mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic; - mac->ops.get_device_caps = &ixgbe_get_device_caps_generic; - mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic; - mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic; - - /* RAR, Multicast, VLAN */ - mac->ops.set_vmdq = &ixgbe_set_vmdq_generic; - mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic; - mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic; - mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic; - mac->rar_highwater = 1; - mac->ops.set_vfta = &ixgbe_set_vfta_generic; - mac->ops.set_vlvf = &ixgbe_set_vlvf_generic; - mac->ops.clear_vfta = &ixgbe_clear_vfta_generic; - mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic; - mac->ops.setup_sfp = &ixgbe_setup_sfp_modules_82599; - mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing; - mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing; - - /* Link */ - mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82599; - mac->ops.check_link = &ixgbe_check_mac_link_generic; - mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic; - ixgbe_init_mac_link_ops_82599(hw); - - mac->mcft_size = 128; - mac->vft_size = 128; - mac->num_rar_entries = 128; - mac->rx_pb_size = 512; - mac->max_tx_queues = 128; - mac->max_rx_queues = 128; - mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw); - - mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) & - IXGBE_FWSM_MODE_MASK) ? true : false; - - //hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf; - - /* EEPROM */ - eeprom->ops.read = &ixgbe_read_eeprom_82599; - eeprom->ops.read_buffer = &ixgbe_read_eeprom_buffer_82599; - - /* Manageability interface */ - mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic; - - mac->ops.get_thermal_sensor_data = - &ixgbe_get_thermal_sensor_data_generic; - mac->ops.init_thermal_sensor_thresh = - &ixgbe_init_thermal_sensor_thresh_generic; - - return ret_val; -} - -/** - * ixgbe_get_link_capabilities_82599 - Determines link capabilities - * @hw: pointer to hardware structure - * @speed: pointer to link speed - * @negotiation: true when autoneg or autotry is enabled - * - * Determines the link capabilities by reading the AUTOC register. - **/ -s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, - bool *negotiation) -{ - s32 status = 0; - u32 autoc = 0; - - /* Check if 1G SFP module. */ - if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) { - *speed = IXGBE_LINK_SPEED_1GB_FULL; - *negotiation = true; - goto out; - } - - /* - * Determine link capabilities based on the stored value of AUTOC, - * which represents EEPROM defaults. If AUTOC value has not - * been stored, use the current register values. - */ - if (hw->mac.orig_link_settings_stored) - autoc = hw->mac.orig_autoc; - else - autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); - - switch (autoc & IXGBE_AUTOC_LMS_MASK) { - case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: - *speed = IXGBE_LINK_SPEED_1GB_FULL; - *negotiation = false; - break; - - case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: - *speed = IXGBE_LINK_SPEED_10GB_FULL; - *negotiation = false; - break; - - case IXGBE_AUTOC_LMS_1G_AN: - *speed = IXGBE_LINK_SPEED_1GB_FULL; - *negotiation = true; - break; - - case IXGBE_AUTOC_LMS_10G_SERIAL: - *speed = IXGBE_LINK_SPEED_10GB_FULL; - *negotiation = false; - break; - - case IXGBE_AUTOC_LMS_KX4_KX_KR: - case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN: - *speed = IXGBE_LINK_SPEED_UNKNOWN; - if (autoc & IXGBE_AUTOC_KR_SUPP) - *speed |= IXGBE_LINK_SPEED_10GB_FULL; - if (autoc & IXGBE_AUTOC_KX4_SUPP) - *speed |= IXGBE_LINK_SPEED_10GB_FULL; - if (autoc & IXGBE_AUTOC_KX_SUPP) - *speed |= IXGBE_LINK_SPEED_1GB_FULL; - *negotiation = true; - break; - - case IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII: - *speed = IXGBE_LINK_SPEED_100_FULL; - if (autoc & IXGBE_AUTOC_KR_SUPP) - *speed |= IXGBE_LINK_SPEED_10GB_FULL; - if (autoc & IXGBE_AUTOC_KX4_SUPP) - *speed |= IXGBE_LINK_SPEED_10GB_FULL; - if (autoc & IXGBE_AUTOC_KX_SUPP) - *speed |= IXGBE_LINK_SPEED_1GB_FULL; - *negotiation = true; - break; - - case IXGBE_AUTOC_LMS_SGMII_1G_100M: - *speed = IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_100_FULL; - *negotiation = false; - break; - - default: - status = IXGBE_ERR_LINK_SETUP; - goto out; - break; - } - - if (hw->phy.multispeed_fiber) { - *speed |= IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - *negotiation = true; - } - -out: - return status; -} - -/** - * ixgbe_get_media_type_82599 - Get media type - * @hw: pointer to hardware structure - * - * Returns the media type (fiber, copper, backplane) - **/ -enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw) -{ - enum ixgbe_media_type media_type; - - /* Detect if there is a copper PHY attached. */ - switch (hw->phy.type) { - case ixgbe_phy_cu_unknown: - case ixgbe_phy_tn: - media_type = ixgbe_media_type_copper; - goto out; - default: - break; - } - - switch (hw->device_id) { - case IXGBE_DEV_ID_82599_KX4: - case IXGBE_DEV_ID_82599_KX4_MEZZ: - case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: - case IXGBE_DEV_ID_82599_KR: - case IXGBE_DEV_ID_82599_BACKPLANE_FCOE: - case IXGBE_DEV_ID_82599_XAUI_LOM: - /* Default device ID is mezzanine card KX/KX4 */ - media_type = ixgbe_media_type_backplane; - break; - case IXGBE_DEV_ID_82599_SFP: - case IXGBE_DEV_ID_82599_SFP_FCOE: - case IXGBE_DEV_ID_82599_SFP_EM: - case IXGBE_DEV_ID_82599_SFP_SF2: - case IXGBE_DEV_ID_82599EN_SFP: - media_type = ixgbe_media_type_fiber; - break; - case IXGBE_DEV_ID_82599_CX4: - media_type = ixgbe_media_type_cx4; - break; - case IXGBE_DEV_ID_82599_T3_LOM: - media_type = ixgbe_media_type_copper; - break; - case IXGBE_DEV_ID_82599_LS: - media_type = ixgbe_media_type_fiber_lco; - break; - case IXGBE_DEV_ID_82599_QSFP_SF_QP: - media_type = ixgbe_media_type_fiber_qsfp; - break; - default: - media_type = ixgbe_media_type_unknown; - break; - } -out: - return media_type; -} - -/** - * ixgbe_start_mac_link_82599 - Setup MAC link settings - * @hw: pointer to hardware structure - * @autoneg_wait_to_complete: true when waiting for completion is needed - * - * Configures link settings based on values in the ixgbe_hw struct. - * Restarts the link. Performs autonegotiation if needed. - **/ -s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, - bool autoneg_wait_to_complete) -{ - u32 autoc_reg; - u32 links_reg = 0; - u32 i; - s32 status = 0; - - /* Restart link */ - autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); - autoc_reg |= IXGBE_AUTOC_AN_RESTART; - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); - - /* Only poll for autoneg to complete if specified to do so */ - if (autoneg_wait_to_complete) { - if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) == - IXGBE_AUTOC_LMS_KX4_KX_KR || - (autoc_reg & IXGBE_AUTOC_LMS_MASK) == - IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || - (autoc_reg & IXGBE_AUTOC_LMS_MASK) == - IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { - for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) { - links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); - if (links_reg & IXGBE_LINKS_KX_AN_COMP) - break; - msleep(100); - } - if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; - hw_dbg(hw, "Autoneg did not complete.\n"); - } - } - } - - /* Add delay to filter out noises during initial link setup */ - msleep(50); - - return status; -} - -/** - * ixgbe_disable_tx_laser_multispeed_fiber - Disable Tx laser - * @hw: pointer to hardware structure - * - * The base drivers may require better control over SFP+ module - * PHY states. This includes selectively shutting down the Tx - * laser on the PHY, effectively halting physical link. - **/ -void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) -{ - u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); - - /* Disable tx laser; allow 100us to go dark per spec */ - esdp_reg |= IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - IXGBE_WRITE_FLUSH(hw); - udelay(100); -} - -/** - * ixgbe_enable_tx_laser_multispeed_fiber - Enable Tx laser - * @hw: pointer to hardware structure - * - * The base drivers may require better control over SFP+ module - * PHY states. This includes selectively turning on the Tx - * laser on the PHY, effectively starting physical link. - **/ -void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) -{ - u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); - - /* Enable tx laser; allow 100ms to light up */ - esdp_reg &= ~IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - IXGBE_WRITE_FLUSH(hw); - msleep(100); -} - -/** - * ixgbe_flap_tx_laser_multispeed_fiber - Flap Tx laser - * @hw: pointer to hardware structure - * - * When the driver changes the link speeds that it can support, - * it sets autotry_restart to true to indicate that we need to - * initiate a new autotry session with the link partner. To do - * so, we set the speed then disable and re-enable the tx laser, to - * alert the link partner that it also needs to restart autotry on its - * end. This is consistent with true clause 37 autoneg, which also - * involves a loss of signal. - **/ -void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) -{ - if (hw->mac.autotry_restart) { - ixgbe_disable_tx_laser_multispeed_fiber(hw); - ixgbe_enable_tx_laser_multispeed_fiber(hw); - hw->mac.autotry_restart = false; - } -} - -/** - * ixgbe_setup_mac_link_multispeed_fiber - Set MAC link speed - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - * @autoneg_wait_to_complete: true when waiting for completion is needed - * - * Set the link speed in the AUTOC register and restarts link. - **/ -s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, - ixgbe_link_speed speed, bool autoneg, - bool autoneg_wait_to_complete) -{ - s32 status = 0; - ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN; - ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN; - u32 speedcnt = 0; - u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); - u32 i = 0; - bool link_up = false; - bool negotiation; - - /* Mask off requested but non-supported speeds */ - status = ixgbe_get_link_capabilities(hw, &link_speed, &negotiation); - if (status != 0) - return status; - - speed &= link_speed; - - /* - * Try each speed one by one, highest priority first. We do this in - * software because 10gb fiber doesn't support speed autonegotiation. - */ - if (speed & IXGBE_LINK_SPEED_10GB_FULL) { - speedcnt++; - highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL; - - /* If we already have link at this speed, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, false); - if (status != 0) - return status; - - if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up) - goto out; - - /* Set the module link speed */ - esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5); - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - IXGBE_WRITE_FLUSH(hw); - - /* Allow module to change analog characteristics (1G->10G) */ - msleep(40); - - status = ixgbe_setup_mac_link_82599(hw, - IXGBE_LINK_SPEED_10GB_FULL, - autoneg, - autoneg_wait_to_complete); - if (status != 0) - return status; - - /* Flap the tx laser if it has not already been done */ - ixgbe_flap_tx_laser(hw); - - /* - * Wait for the controller to acquire link. Per IEEE 802.3ap, - * Section 73.10.2, we may have to wait up to 500ms if KR is - * attempted. 82599 uses the same timing for 10g SFI. - */ - for (i = 0; i < 5; i++) { - /* Wait for the link partner to also set speed */ - msleep(100); - - /* If we have link, just jump out */ - status = ixgbe_check_link(hw, &link_speed, - &link_up, false); - if (status != 0) - return status; - - if (link_up) - goto out; - } - } - - if (speed & IXGBE_LINK_SPEED_1GB_FULL) { - speedcnt++; - if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN) - highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL; - - /* If we already have link at this speed, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, false); - if (status != 0) - return status; - - if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up) - goto out; - - /* Set the module link speed */ - esdp_reg &= ~IXGBE_ESDP_SDP5; - esdp_reg |= IXGBE_ESDP_SDP5_DIR; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - IXGBE_WRITE_FLUSH(hw); - - /* Allow module to change analog characteristics (10G->1G) */ - msleep(40); - - status = ixgbe_setup_mac_link_82599(hw, - IXGBE_LINK_SPEED_1GB_FULL, - autoneg, - autoneg_wait_to_complete); - if (status != 0) - return status; - - /* Flap the tx laser if it has not already been done */ - ixgbe_flap_tx_laser(hw); - - /* Wait for the link partner to also set speed */ - msleep(100); - - /* If we have link, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, false); - if (status != 0) - return status; - - if (link_up) - goto out; - } - - /* - * We didn't get link. Configure back to the highest speed we tried, - * (if there was more than one). We call ourselves back with just the - * single highest speed that the user requested. - */ - if (speedcnt > 1) - status = ixgbe_setup_mac_link_multispeed_fiber(hw, - highest_link_speed, autoneg, autoneg_wait_to_complete); - -out: - /* Set autoneg_advertised value based on input link speed */ - hw->phy.autoneg_advertised = 0; - - if (speed & IXGBE_LINK_SPEED_10GB_FULL) - hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; - - if (speed & IXGBE_LINK_SPEED_1GB_FULL) - hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; - - return status; -} - -/** - * ixgbe_setup_mac_link_smartspeed - Set MAC link speed using SmartSpeed - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - * @autoneg_wait_to_complete: true when waiting for completion is needed - * - * Implements the Intel SmartSpeed algorithm. - **/ -s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw, - ixgbe_link_speed speed, bool autoneg, - bool autoneg_wait_to_complete) -{ - s32 status = 0; - ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN; - s32 i, j; - bool link_up = false; - u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); - - /* Set autoneg_advertised value based on input link speed */ - hw->phy.autoneg_advertised = 0; - - if (speed & IXGBE_LINK_SPEED_10GB_FULL) - hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; - - if (speed & IXGBE_LINK_SPEED_1GB_FULL) - hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; - - if (speed & IXGBE_LINK_SPEED_100_FULL) - hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL; - - /* - * Implement Intel SmartSpeed algorithm. SmartSpeed will reduce the - * autoneg advertisement if link is unable to be established at the - * highest negotiated rate. This can sometimes happen due to integrity - * issues with the physical media connection. - */ - - /* First, try to get link with full advertisement */ - hw->phy.smart_speed_active = false; - for (j = 0; j < IXGBE_SMARTSPEED_MAX_RETRIES; j++) { - status = ixgbe_setup_mac_link_82599(hw, speed, autoneg, - autoneg_wait_to_complete); - if (status != 0) - goto out; - - /* - * Wait for the controller to acquire link. Per IEEE 802.3ap, - * Section 73.10.2, we may have to wait up to 500ms if KR is - * attempted, or 200ms if KX/KX4/BX/BX4 is attempted, per - * Table 9 in the AN MAS. - */ - for (i = 0; i < 5; i++) { - msleep(100); - - /* If we have link, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, - false); - if (status != 0) - goto out; - - if (link_up) - goto out; - } - } - - /* - * We didn't get link. If we advertised KR plus one of KX4/KX - * (or BX4/BX), then disable KR and try again. - */ - if (((autoc_reg & IXGBE_AUTOC_KR_SUPP) == 0) || - ((autoc_reg & IXGBE_AUTOC_KX4_KX_SUPP_MASK) == 0)) - goto out; - - /* Turn SmartSpeed on to disable KR support */ - hw->phy.smart_speed_active = true; - status = ixgbe_setup_mac_link_82599(hw, speed, autoneg, - autoneg_wait_to_complete); - if (status != 0) - goto out; - - /* - * Wait for the controller to acquire link. 600ms will allow for - * the AN link_fail_inhibit_timer as well for multiple cycles of - * parallel detect, both 10g and 1g. This allows for the maximum - * connect attempts as defined in the AN MAS table 73-7. - */ - for (i = 0; i < 6; i++) { - msleep(100); - - /* If we have link, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, false); - if (status != 0) - goto out; - - if (link_up) - goto out; - } - - /* We didn't get link. Turn SmartSpeed back off. */ - hw->phy.smart_speed_active = false; - status = ixgbe_setup_mac_link_82599(hw, speed, autoneg, - autoneg_wait_to_complete); - -out: - if (link_up && (link_speed == IXGBE_LINK_SPEED_1GB_FULL)) - hw_dbg(hw, "Smartspeed has downgraded the link speed " - "from the maximum advertised\n"); - return status; -} - -/** - * ixgbe_setup_mac_link_82599 - Set MAC link speed - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - * @autoneg_wait_to_complete: true when waiting for completion is needed - * - * Set the link speed in the AUTOC register and restarts link. - **/ -s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, - ixgbe_link_speed speed, bool autoneg, - bool autoneg_wait_to_complete) -{ - s32 status = 0; - u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); - u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); - u32 start_autoc = autoc; - u32 orig_autoc = 0; - u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK; - u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; - u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK; - u32 links_reg = 0; - u32 i; - ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN; - - /* Check to see if speed passed in is supported. */ - status = ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg); - if (status != 0) - goto out; - - speed &= link_capabilities; - - if (speed == IXGBE_LINK_SPEED_UNKNOWN) { - status = IXGBE_ERR_LINK_SETUP; - goto out; - } - - /* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/ - if (hw->mac.orig_link_settings_stored) - orig_autoc = hw->mac.orig_autoc; - else - orig_autoc = autoc; - - if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR || - link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || - link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { - /* Set KX4/KX/KR support according to speed requested */ - autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP); - if (speed & IXGBE_LINK_SPEED_10GB_FULL) { - if (orig_autoc & IXGBE_AUTOC_KX4_SUPP) - autoc |= IXGBE_AUTOC_KX4_SUPP; - if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) && - (hw->phy.smart_speed_active == false)) - autoc |= IXGBE_AUTOC_KR_SUPP; - } - if (speed & IXGBE_LINK_SPEED_1GB_FULL) - autoc |= IXGBE_AUTOC_KX_SUPP; - } else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) && - (link_mode == IXGBE_AUTOC_LMS_1G_LINK_NO_AN || - link_mode == IXGBE_AUTOC_LMS_1G_AN)) { - /* Switch from 1G SFI to 10G SFI if requested */ - if ((speed == IXGBE_LINK_SPEED_10GB_FULL) && - (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)) { - autoc &= ~IXGBE_AUTOC_LMS_MASK; - autoc |= IXGBE_AUTOC_LMS_10G_SERIAL; - } - } else if ((pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) && - (link_mode == IXGBE_AUTOC_LMS_10G_SERIAL)) { - /* Switch from 10G SFI to 1G SFI if requested */ - if ((speed == IXGBE_LINK_SPEED_1GB_FULL) && - (pma_pmd_1g == IXGBE_AUTOC_1G_SFI)) { - autoc &= ~IXGBE_AUTOC_LMS_MASK; - if (autoneg) - autoc |= IXGBE_AUTOC_LMS_1G_AN; - else - autoc |= IXGBE_AUTOC_LMS_1G_LINK_NO_AN; - } - } - - if (autoc != start_autoc) { - /* Restart link */ - autoc |= IXGBE_AUTOC_AN_RESTART; - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc); - - /* Only poll for autoneg to complete if specified to do so */ - if (autoneg_wait_to_complete) { - if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR || - link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || - link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { - for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) { - links_reg = - IXGBE_READ_REG(hw, IXGBE_LINKS); - if (links_reg & IXGBE_LINKS_KX_AN_COMP) - break; - msleep(100); - } - if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = - IXGBE_ERR_AUTONEG_NOT_COMPLETE; - hw_dbg(hw, "Autoneg did not complete.\n"); - } - } - } - - /* Add delay to filter out noises during initial link setup */ - msleep(50); - } - -out: - return status; -} - -/** - * ixgbe_setup_copper_link_82599 - Set the PHY autoneg advertised field - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - * @autoneg_wait_to_complete: true if waiting is needed to complete - * - * Restarts link on PHY and MAC based on settings passed in. - **/ -static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete) -{ - s32 status; - - /* Setup the PHY according to input speed */ - status = hw->phy.ops.setup_link_speed(hw, speed, autoneg, - autoneg_wait_to_complete); - /* Set up MAC */ - ixgbe_start_mac_link_82599(hw, autoneg_wait_to_complete); - - return status; -} - -/** - * ixgbe_reset_hw_82599 - Perform hardware reset - * @hw: pointer to hardware structure - * - * Resets the hardware by resetting the transmit and receive units, masks - * and clears all interrupts, perform a PHY reset, and perform a link (MAC) - * reset. - **/ -s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) -{ -// ixgbe_link_speed link_speed; - s32 status = 0; -// u32 ctrl, i, autoc, autoc2; -// bool link_up = false; - -#if 0 - /* Call adapter stop to disable tx/rx and clear interrupts */ - status = hw->mac.ops.stop_adapter(hw); - if (status != 0) - goto reset_hw_out; - - /* flush pending Tx transactions */ - ixgbe_clear_tx_pending(hw); - - /* PHY ops must be identified and initialized prior to reset */ - - /* Identify PHY and related function pointers */ - status = hw->phy.ops.init(hw); - - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) - goto reset_hw_out; - - /* Setup SFP module if there is one present. */ - if (hw->phy.sfp_setup_needed) { - status = hw->mac.ops.setup_sfp(hw); - hw->phy.sfp_setup_needed = false; - } - - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) - goto reset_hw_out; - - /* Reset PHY */ - if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL) - hw->phy.ops.reset(hw); - -mac_reset_top: - /* - * Issue global reset to the MAC. Needs to be SW reset if link is up. - * If link reset is used when link is up, it might reset the PHY when - * mng is using it. If link is down or the flag to force full link - * reset is set, then perform link reset. - */ - ctrl = IXGBE_CTRL_LNK_RST; - if (!hw->force_full_reset) { - hw->mac.ops.check_link(hw, &link_speed, &link_up, false); - if (link_up) - ctrl = IXGBE_CTRL_RST; - } - - ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); - IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); - IXGBE_WRITE_FLUSH(hw); - - /* Poll for reset bit to self-clear indicating reset is complete */ - for (i = 0; i < 10; i++) { - udelay(1); - ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); - if (!(ctrl & IXGBE_CTRL_RST_MASK)) - break; - } - - if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; - hw_dbg(hw, "Reset polling failed to complete.\n"); - } - - msleep(50); - - /* - * Double resets are required for recovery from certain error - * conditions. Between resets, it is necessary to stall to allow time - * for any pending HW events to complete. - */ - if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { - hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; - goto mac_reset_top; - } - - /* - * Store the original AUTOC/AUTOC2 values if they have not been - * stored off yet. Otherwise restore the stored original - * values since the reset operation sets back to defaults. - */ - autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); - autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); - if (hw->mac.orig_link_settings_stored == false) { - hw->mac.orig_autoc = autoc; - hw->mac.orig_autoc2 = autoc2; - hw->mac.orig_link_settings_stored = true; - } else { - if (autoc != hw->mac.orig_autoc) - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc | - IXGBE_AUTOC_AN_RESTART)); - - if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) != - (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) { - autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK; - autoc2 |= (hw->mac.orig_autoc2 & - IXGBE_AUTOC2_UPPER_MASK); - IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2); - } - } -#endif - - /* Store the permanent mac address */ - hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); - - /* - * Store MAC address from RAR0, clear receive address registers, and - * clear the multicast table. Also reset num_rar_entries to 128, - * since we modify this value when programming the SAN MAC address. - */ - hw->mac.num_rar_entries = 128; - hw->mac.ops.init_rx_addrs(hw); - - /* Store the permanent SAN mac address */ - hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr); - - /* Add the SAN MAC address to the RAR only if it's a valid address */ - if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { - hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1, - hw->mac.san_addr, 0, IXGBE_RAH_AV); - - /* Save the SAN MAC RAR index */ - hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; - - /* Reserve the last RAR for the SAN MAC address */ - hw->mac.num_rar_entries--; - } - - /* Store the alternative WWNN/WWPN prefix */ - hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix, - &hw->mac.wwpn_prefix); - -//reset_hw_out: - return status; -} - -/** - * ixgbe_reinit_fdir_tables_82599 - Reinitialize Flow Director tables. - * @hw: pointer to hardware structure - **/ -s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw) -{ - int i; - u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL); - fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE; - - /* - * Before starting reinitialization process, - * FDIRCMD.CMD must be zero. - */ - for (i = 0; i < IXGBE_FDIRCMD_CMD_POLL; i++) { - if (!(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) & - IXGBE_FDIRCMD_CMD_MASK)) - break; - udelay(10); - } - if (i >= IXGBE_FDIRCMD_CMD_POLL) { - hw_dbg(hw, "Flow Director previous command isn't complete, " - "aborting table re-initialization.\n"); - return IXGBE_ERR_FDIR_REINIT_FAILED; - } - - IXGBE_WRITE_REG(hw, IXGBE_FDIRFREE, 0); - IXGBE_WRITE_FLUSH(hw); - /* - * 82599 adapters flow director init flow cannot be restarted, - * Workaround 82599 silicon errata by performing the following steps - * before re-writing the FDIRCTRL control register with the same value. - * - write 1 to bit 8 of FDIRCMD register & - * - write 0 to bit 8 of FDIRCMD register - */ - IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, - (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) | - IXGBE_FDIRCMD_CLEARHT)); - IXGBE_WRITE_FLUSH(hw); - IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, - (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) & - ~IXGBE_FDIRCMD_CLEARHT)); - IXGBE_WRITE_FLUSH(hw); - /* - * Clear FDIR Hash register to clear any leftover hashes - * waiting to be programmed. - */ - IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, 0x00); - IXGBE_WRITE_FLUSH(hw); - - IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl); - IXGBE_WRITE_FLUSH(hw); - - /* Poll init-done after we write FDIRCTRL register */ - for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) { - if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) & - IXGBE_FDIRCTRL_INIT_DONE) - break; - udelay(10); - } - if (i >= IXGBE_FDIR_INIT_DONE_POLL) { - hw_dbg(hw, "Flow Director Signature poll time exceeded!\n"); - return IXGBE_ERR_FDIR_REINIT_FAILED; - } - - /* Clear FDIR statistics registers (read to clear) */ - IXGBE_READ_REG(hw, IXGBE_FDIRUSTAT); - IXGBE_READ_REG(hw, IXGBE_FDIRFSTAT); - IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); - IXGBE_READ_REG(hw, IXGBE_FDIRMISS); - IXGBE_READ_REG(hw, IXGBE_FDIRLEN); - - return 0; -} - -/** - * ixgbe_fdir_enable_82599 - Initialize Flow Director control registers - * @hw: pointer to hardware structure - * @fdirctrl: value to write to flow director control register - **/ -static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl) -{ - int i; - - /* Prime the keys for hashing */ - IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY); - IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY); - - /* - * Poll init-done after we write the register. Estimated times: - * 10G: PBALLOC = 11b, timing is 60us - * 1G: PBALLOC = 11b, timing is 600us - * 100M: PBALLOC = 11b, timing is 6ms - * - * Multiple these timings by 4 if under full Rx load - * - * So we'll poll for IXGBE_FDIR_INIT_DONE_POLL times, sleeping for - * 1 msec per poll time. If we're at line rate and drop to 100M, then - * this might not finish in our poll time, but we can live with that - * for now. - */ - IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl); - IXGBE_WRITE_FLUSH(hw); - for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) { - if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) & - IXGBE_FDIRCTRL_INIT_DONE) - break; - msleep(1); - } - - if (i >= IXGBE_FDIR_INIT_DONE_POLL) - hw_dbg(hw, "Flow Director poll time exceeded!\n"); -} - -/** - * ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters - * @hw: pointer to hardware structure - * @fdirctrl: value to write to flow director control register, initially - * contains just the value of the Rx packet buffer allocation - **/ -s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl) -{ - /* - * Continue setup of fdirctrl register bits: - * Move the flexible bytes to use the ethertype - shift 6 words - * Set the maximum length per hash bucket to 0xA filters - * Send interrupt when 64 filters are left - */ - fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) | - (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) | - (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT); - - /* write hashes and fdirctrl register, poll for completion */ - ixgbe_fdir_enable_82599(hw, fdirctrl); - - return 0; -} - -/** - * ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters - * @hw: pointer to hardware structure - * @fdirctrl: value to write to flow director control register, initially - * contains just the value of the Rx packet buffer allocation - **/ -s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl) -{ - /* - * Continue setup of fdirctrl register bits: - * Turn perfect match filtering on - * Report hash in RSS field of Rx wb descriptor - * Initialize the drop queue - * Move the flexible bytes to use the ethertype - shift 6 words - * Set the maximum length per hash bucket to 0xA filters - * Send interrupt when 64 (0x4 * 16) filters are left - */ - fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH | - IXGBE_FDIRCTRL_REPORT_STATUS | - (IXGBE_FDIR_DROP_QUEUE << IXGBE_FDIRCTRL_DROP_Q_SHIFT) | - (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) | - (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) | - (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT); - - /* write hashes and fdirctrl register, poll for completion */ - ixgbe_fdir_enable_82599(hw, fdirctrl); - - return 0; -} - -/* - * These defines allow us to quickly generate all of the necessary instructions - * in the function below by simply calling out IXGBE_COMPUTE_SIG_HASH_ITERATION - * for values 0 through 15 - */ -#define IXGBE_ATR_COMMON_HASH_KEY \ - (IXGBE_ATR_BUCKET_HASH_KEY & IXGBE_ATR_SIGNATURE_HASH_KEY) -#define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \ -do { \ - u32 n = (_n); \ - if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \ - common_hash ^= lo_hash_dword >> n; \ - else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \ - bucket_hash ^= lo_hash_dword >> n; \ - else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \ - sig_hash ^= lo_hash_dword << (16 - n); \ - if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \ - common_hash ^= hi_hash_dword >> n; \ - else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \ - bucket_hash ^= hi_hash_dword >> n; \ - else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \ - sig_hash ^= hi_hash_dword << (16 - n); \ -} while (0); - -/** - * ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash - * @stream: input bitstream to compute the hash on - * - * This function is almost identical to the function above but contains - * several optomizations such as unwinding all of the loops, letting the - * compiler work out all of the conditional ifs since the keys are static - * defines, and computing two keys at once since the hashed dword stream - * will be the same for both keys. - **/ -u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input, - union ixgbe_atr_hash_dword common) -{ - u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan; - u32 sig_hash = 0, bucket_hash = 0, common_hash = 0; - - /* record the flow_vm_vlan bits as they are a key part to the hash */ - flow_vm_vlan = IXGBE_NTOHL(input.dword); - - /* generate common hash dword */ - hi_hash_dword = IXGBE_NTOHL(common.dword); - - /* low dword is word swapped version of common */ - lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16); - - /* apply flow ID/VM pool/VLAN ID bits to hash words */ - hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16); - - /* Process bits 0 and 16 */ - IXGBE_COMPUTE_SIG_HASH_ITERATION(0); - - /* - * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to - * delay this because bit 0 of the stream should not be processed - * so we do not add the vlan until after bit 0 was processed - */ - lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16); - - /* Process remaining 30 bit of the key */ - IXGBE_COMPUTE_SIG_HASH_ITERATION(1); - IXGBE_COMPUTE_SIG_HASH_ITERATION(2); - IXGBE_COMPUTE_SIG_HASH_ITERATION(3); - IXGBE_COMPUTE_SIG_HASH_ITERATION(4); - IXGBE_COMPUTE_SIG_HASH_ITERATION(5); - IXGBE_COMPUTE_SIG_HASH_ITERATION(6); - IXGBE_COMPUTE_SIG_HASH_ITERATION(7); - IXGBE_COMPUTE_SIG_HASH_ITERATION(8); - IXGBE_COMPUTE_SIG_HASH_ITERATION(9); - IXGBE_COMPUTE_SIG_HASH_ITERATION(10); - IXGBE_COMPUTE_SIG_HASH_ITERATION(11); - IXGBE_COMPUTE_SIG_HASH_ITERATION(12); - IXGBE_COMPUTE_SIG_HASH_ITERATION(13); - IXGBE_COMPUTE_SIG_HASH_ITERATION(14); - IXGBE_COMPUTE_SIG_HASH_ITERATION(15); - - /* combine common_hash result with signature and bucket hashes */ - bucket_hash ^= common_hash; - bucket_hash &= IXGBE_ATR_HASH_MASK; - - sig_hash ^= common_hash << 16; - sig_hash &= IXGBE_ATR_HASH_MASK << 16; - - /* return completed signature hash */ - return sig_hash ^ bucket_hash; -} - -/** - * ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter - * @hw: pointer to hardware structure - * @input: unique input dword - * @common: compressed common input dword - * @queue: queue index to direct traffic to - **/ -s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, - union ixgbe_atr_hash_dword input, - union ixgbe_atr_hash_dword common, - u8 queue) -{ - u64 fdirhashcmd; - u32 fdircmd; - - /* - * Get the flow_type in order to program FDIRCMD properly - * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6 - */ - switch (input.formatted.flow_type) { - case IXGBE_ATR_FLOW_TYPE_TCPV4: - case IXGBE_ATR_FLOW_TYPE_UDPV4: - case IXGBE_ATR_FLOW_TYPE_SCTPV4: - case IXGBE_ATR_FLOW_TYPE_TCPV6: - case IXGBE_ATR_FLOW_TYPE_UDPV6: - case IXGBE_ATR_FLOW_TYPE_SCTPV6: - break; - default: - hw_dbg(hw, " Error on flow type input\n"); - return IXGBE_ERR_CONFIG; - } - - /* configure FDIRCMD register */ - fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | - IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN; - fdircmd |= input.formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT; - fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT; - - /* - * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits - * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH. - */ - fdirhashcmd = (u64)fdircmd << 32; - fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common); - IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd); - - hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd); - - return 0; -} - -#define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \ -do { \ - u32 n = (_n); \ - if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \ - bucket_hash ^= lo_hash_dword >> n; \ - if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \ - bucket_hash ^= hi_hash_dword >> n; \ -} while (0); - -/** - * ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash - * @atr_input: input bitstream to compute the hash on - * @input_mask: mask for the input bitstream - * - * This function serves two main purposes. First it applys the input_mask - * to the atr_input resulting in a cleaned up atr_input data stream. - * Secondly it computes the hash and stores it in the bkt_hash field at - * the end of the input byte stream. This way it will be available for - * future use without needing to recompute the hash. - **/ -void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, - union ixgbe_atr_input *input_mask) -{ - - u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan; - u32 bucket_hash = 0; - - /* Apply masks to input data */ - input->dword_stream[0] &= input_mask->dword_stream[0]; - input->dword_stream[1] &= input_mask->dword_stream[1]; - input->dword_stream[2] &= input_mask->dword_stream[2]; - input->dword_stream[3] &= input_mask->dword_stream[3]; - input->dword_stream[4] &= input_mask->dword_stream[4]; - input->dword_stream[5] &= input_mask->dword_stream[5]; - input->dword_stream[6] &= input_mask->dword_stream[6]; - input->dword_stream[7] &= input_mask->dword_stream[7]; - input->dword_stream[8] &= input_mask->dword_stream[8]; - input->dword_stream[9] &= input_mask->dword_stream[9]; - input->dword_stream[10] &= input_mask->dword_stream[10]; - - /* record the flow_vm_vlan bits as they are a key part to the hash */ - flow_vm_vlan = IXGBE_NTOHL(input->dword_stream[0]); - - /* generate common hash dword */ - hi_hash_dword = IXGBE_NTOHL(input->dword_stream[1] ^ - input->dword_stream[2] ^ - input->dword_stream[3] ^ - input->dword_stream[4] ^ - input->dword_stream[5] ^ - input->dword_stream[6] ^ - input->dword_stream[7] ^ - input->dword_stream[8] ^ - input->dword_stream[9] ^ - input->dword_stream[10]); - - /* low dword is word swapped version of common */ - lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16); - - /* apply flow ID/VM pool/VLAN ID bits to hash words */ - hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16); - - /* Process bits 0 and 16 */ - IXGBE_COMPUTE_BKT_HASH_ITERATION(0); - - /* - * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to - * delay this because bit 0 of the stream should not be processed - * so we do not add the vlan until after bit 0 was processed - */ - lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16); - - /* Process remaining 30 bit of the key */ - IXGBE_COMPUTE_BKT_HASH_ITERATION(1); - IXGBE_COMPUTE_BKT_HASH_ITERATION(2); - IXGBE_COMPUTE_BKT_HASH_ITERATION(3); - IXGBE_COMPUTE_BKT_HASH_ITERATION(4); - IXGBE_COMPUTE_BKT_HASH_ITERATION(5); - IXGBE_COMPUTE_BKT_HASH_ITERATION(6); - IXGBE_COMPUTE_BKT_HASH_ITERATION(7); - IXGBE_COMPUTE_BKT_HASH_ITERATION(8); - IXGBE_COMPUTE_BKT_HASH_ITERATION(9); - IXGBE_COMPUTE_BKT_HASH_ITERATION(10); - IXGBE_COMPUTE_BKT_HASH_ITERATION(11); - IXGBE_COMPUTE_BKT_HASH_ITERATION(12); - IXGBE_COMPUTE_BKT_HASH_ITERATION(13); - IXGBE_COMPUTE_BKT_HASH_ITERATION(14); - IXGBE_COMPUTE_BKT_HASH_ITERATION(15); - - /* - * Limit hash to 13 bits since max bucket count is 8K. - * Store result at the end of the input stream. - */ - input->formatted.bkt_hash = bucket_hash & 0x1FFF; -} - -/** - * ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks - * @input_mask: mask to be bit swapped - * - * The source and destination port masks for flow director are bit swapped - * in that bit 15 effects bit 0, 14 effects 1, 13, 2 etc. In order to - * generate a correctly swapped value we need to bit swap the mask and that - * is what is accomplished by this function. - **/ -static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask) -{ - u32 mask = IXGBE_NTOHS(input_mask->formatted.dst_port); - mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT; - mask |= IXGBE_NTOHS(input_mask->formatted.src_port); - mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1); - mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2); - mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4); - return ((mask & 0x00FF00FF) << 8) | ((mask & 0xFF00FF00) >> 8); -} - -/* - * These two macros are meant to address the fact that we have registers - * that are either all or in part big-endian. As a result on big-endian - * systems we will end up byte swapping the value to little-endian before - * it is byte swapped again and written to the hardware in the original - * big-endian format. - */ -#define IXGBE_STORE_AS_BE32(_value) \ - (((u32)(_value) >> 24) | (((u32)(_value) & 0x00FF0000) >> 8) | \ - (((u32)(_value) & 0x0000FF00) << 8) | ((u32)(_value) << 24)) - -#define IXGBE_WRITE_REG_BE32(a, reg, value) \ - IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(IXGBE_NTOHL(value))) - -#define IXGBE_STORE_AS_BE16(_value) \ - IXGBE_NTOHS(((u16)(_value) >> 8) | ((u16)(_value) << 8)) - -s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, - union ixgbe_atr_input *input_mask) -{ - /* mask IPv6 since it is currently not supported */ - u32 fdirm = IXGBE_FDIRM_DIPv6; - u32 fdirtcpm; - - /* - * Program the relevant mask registers. If src/dst_port or src/dst_addr - * are zero, then assume a full mask for that field. Also assume that - * a VLAN of 0 is unspecified, so mask that out as well. L4type - * cannot be masked out in this implementation. - * - * This also assumes IPv4 only. IPv6 masking isn't supported at this - * point in time. - */ - - /* verify bucket hash is cleared on hash generation */ - if (input_mask->formatted.bkt_hash) - hw_dbg(hw, " bucket hash should always be 0 in mask\n"); - - /* Program FDIRM and verify partial masks */ - switch (input_mask->formatted.vm_pool & 0x7F) { - case 0x0: - fdirm |= IXGBE_FDIRM_POOL; - case 0x7F: - break; - default: - hw_dbg(hw, " Error on vm pool mask\n"); - return IXGBE_ERR_CONFIG; - } - - switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) { - case 0x0: - fdirm |= IXGBE_FDIRM_L4P; - if (input_mask->formatted.dst_port || - input_mask->formatted.src_port) { - hw_dbg(hw, " Error on src/dst port mask\n"); - return IXGBE_ERR_CONFIG; - } - case IXGBE_ATR_L4TYPE_MASK: - break; - default: - hw_dbg(hw, " Error on flow type mask\n"); - return IXGBE_ERR_CONFIG; - } - - switch (IXGBE_NTOHS(input_mask->formatted.vlan_id) & 0xEFFF) { - case 0x0000: - /* mask VLAN ID, fall through to mask VLAN priority */ - fdirm |= IXGBE_FDIRM_VLANID; - case 0x0FFF: - /* mask VLAN priority */ - fdirm |= IXGBE_FDIRM_VLANP; - break; - case 0xE000: - /* mask VLAN ID only, fall through */ - fdirm |= IXGBE_FDIRM_VLANID; - case 0xEFFF: - /* no VLAN fields masked */ - break; - default: - hw_dbg(hw, " Error on VLAN mask\n"); - return IXGBE_ERR_CONFIG; - } - - switch (input_mask->formatted.flex_bytes & 0xFFFF) { - case 0x0000: - /* Mask Flex Bytes, fall through */ - fdirm |= IXGBE_FDIRM_FLEX; - case 0xFFFF: - break; - default: - hw_dbg(hw, " Error on flexible byte mask\n"); - return IXGBE_ERR_CONFIG; - } - - /* Now mask VM pool and destination IPv6 - bits 5 and 2 */ - IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); - - /* store the TCP/UDP port masks, bit reversed from port layout */ - fdirtcpm = ixgbe_get_fdirtcpm_82599(input_mask); - - /* write both the same so that UDP and TCP use the same mask */ - IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm); - IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm); - - /* store source and destination IP masks (big-enian) */ - IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M, - ~input_mask->formatted.src_ip[0]); - IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M, - ~input_mask->formatted.dst_ip[0]); - - return 0; -} - -s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw, - union ixgbe_atr_input *input, - u16 soft_id, u8 queue) -{ - u32 fdirport, fdirvlan, fdirhash, fdircmd; - - /* currently IPv6 is not supported, must be programmed with 0 */ - IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0), - input->formatted.src_ip[0]); - IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(1), - input->formatted.src_ip[1]); - IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(2), - input->formatted.src_ip[2]); - - /* record the source address (big-endian) */ - IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]); - - /* record the first 32 bits of the destination address (big-endian) */ - IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]); - - /* record source and destination port (little-endian)*/ - fdirport = IXGBE_NTOHS(input->formatted.dst_port); - fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT; - fdirport |= IXGBE_NTOHS(input->formatted.src_port); - IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport); - - /* record vlan (little-endian) and flex_bytes(big-endian) */ - fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes); - fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT; - fdirvlan |= IXGBE_NTOHS(input->formatted.vlan_id); - IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan); - - /* configure FDIRHASH register */ - fdirhash = input->formatted.bkt_hash; - fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT; - IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash); - - /* - * flush all previous writes to make certain registers are - * programmed prior to issuing the command - */ - IXGBE_WRITE_FLUSH(hw); - - /* configure FDIRCMD register */ - fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | - IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN; - if (queue == IXGBE_FDIR_DROP_QUEUE) - fdircmd |= IXGBE_FDIRCMD_DROP; - fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT; - fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT; - fdircmd |= (u32)input->formatted.vm_pool << IXGBE_FDIRCMD_VT_POOL_SHIFT; - - IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd); - - return 0; -} - -s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, - union ixgbe_atr_input *input, - u16 soft_id) -{ - u32 fdirhash; - u32 fdircmd = 0; - u32 retry_count; - s32 err = 0; - - /* configure FDIRHASH register */ - fdirhash = input->formatted.bkt_hash; - fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT; - IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash); - - /* flush hash to HW */ - IXGBE_WRITE_FLUSH(hw); - - /* Query if filter is present */ - IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, IXGBE_FDIRCMD_CMD_QUERY_REM_FILT); - - for (retry_count = 10; retry_count; retry_count--) { - /* allow 10us for query to process */ - udelay(10); - /* verify query completed successfully */ - fdircmd = IXGBE_READ_REG(hw, IXGBE_FDIRCMD); - if (!(fdircmd & IXGBE_FDIRCMD_CMD_MASK)) - break; - } - - if (!retry_count) - err = IXGBE_ERR_FDIR_REINIT_FAILED; - - /* if filter exists in hardware then remove it */ - if (fdircmd & IXGBE_FDIRCMD_FILTER_VALID) { - IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash); - IXGBE_WRITE_FLUSH(hw); - IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, - IXGBE_FDIRCMD_CMD_REMOVE_FLOW); - } - - return err; -} - -/** - * ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter - * @hw: pointer to hardware structure - * @input: input bitstream - * @input_mask: mask for the input bitstream - * @soft_id: software index for the filters - * @queue: queue index to direct traffic to - * - * Note that the caller to this function must lock before calling, since the - * hardware writes must be protected from one another. - **/ -s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, - union ixgbe_atr_input *input, - union ixgbe_atr_input *input_mask, - u16 soft_id, u8 queue) -{ - s32 err = IXGBE_ERR_CONFIG; - - /* - * Check flow_type formatting, and bail out before we touch the hardware - * if there's a configuration issue - */ - switch (input->formatted.flow_type) { - case IXGBE_ATR_FLOW_TYPE_IPV4: - input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK; - if (input->formatted.dst_port || input->formatted.src_port) { - hw_dbg(hw, " Error on src/dst port\n"); - return IXGBE_ERR_CONFIG; - } - break; - case IXGBE_ATR_FLOW_TYPE_SCTPV4: - if (input->formatted.dst_port || input->formatted.src_port) { - hw_dbg(hw, " Error on src/dst port\n"); - return IXGBE_ERR_CONFIG; - } - case IXGBE_ATR_FLOW_TYPE_TCPV4: - case IXGBE_ATR_FLOW_TYPE_UDPV4: - input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK | - IXGBE_ATR_L4TYPE_MASK; - break; - default: - hw_dbg(hw, " Error on flow type input\n"); - return err; - } - - /* program input mask into the HW */ - err = ixgbe_fdir_set_input_mask_82599(hw, input_mask); - if (err) - return err; - - /* apply mask and compute/store hash */ - ixgbe_atr_compute_perfect_hash_82599(input, input_mask); - - /* program filters to filter memory */ - return ixgbe_fdir_write_perfect_filter_82599(hw, input, - soft_id, queue); -} - -/** - * ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register - * @hw: pointer to hardware structure - * @reg: analog register to read - * @val: read value - * - * Performs read operation to Omer analog register specified. - **/ -s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val) -{ - u32 core_ctl; - - IXGBE_WRITE_REG(hw, IXGBE_CORECTL, IXGBE_CORECTL_WRITE_CMD | - (reg << 8)); - IXGBE_WRITE_FLUSH(hw); - udelay(10); - core_ctl = IXGBE_READ_REG(hw, IXGBE_CORECTL); - *val = (u8)core_ctl; - - return 0; -} - -/** - * ixgbe_write_analog_reg8_82599 - Writes 8 bit Omer analog register - * @hw: pointer to hardware structure - * @reg: atlas register to write - * @val: value to write - * - * Performs write operation to Omer analog register specified. - **/ -s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val) -{ - u32 core_ctl; - - core_ctl = (reg << 8) | val; - IXGBE_WRITE_REG(hw, IXGBE_CORECTL, core_ctl); - IXGBE_WRITE_FLUSH(hw); - udelay(10); - - return 0; -} - -/** - * ixgbe_start_hw_82599 - Prepare hardware for Tx/Rx - * @hw: pointer to hardware structure - * - * Starts the hardware using the generic start_hw function - * and the generation start_hw function. - * Then performs revision-specific operations, if any. - **/ -s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw) -{ - s32 ret_val = 0; - - ret_val = ixgbe_start_hw_generic(hw); - if (ret_val != 0) - goto out; - - ret_val = ixgbe_start_hw_gen2(hw); - if (ret_val != 0) - goto out; - - /* We need to run link autotry after the driver loads */ - hw->mac.autotry_restart = true; - - if (ret_val == 0) - ret_val = ixgbe_verify_fw_version_82599(hw); -out: - return ret_val; -} - -/** - * ixgbe_identify_phy_82599 - Get physical layer module - * @hw: pointer to hardware structure - * - * Determines the physical layer module found on the current adapter. - * If PHY already detected, maintains current PHY type in hw struct, - * otherwise executes the PHY detection routine. - **/ -s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw) -{ - s32 status = IXGBE_ERR_PHY_ADDR_INVALID; - - /* Detect PHY if not unknown - returns success if already detected. */ - status = ixgbe_identify_phy_generic(hw); - if (status != 0) { - /* 82599 10GBASE-T requires an external PHY */ - if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper) - goto out; - else - status = ixgbe_identify_module_generic(hw); - } - - /* Set PHY type none if no PHY detected */ - if (hw->phy.type == ixgbe_phy_unknown) { - hw->phy.type = ixgbe_phy_none; - status = 0; - } - - /* Return error if SFP module has been detected but is not supported */ - if (hw->phy.type == ixgbe_phy_sfp_unsupported) - status = IXGBE_ERR_SFP_NOT_SUPPORTED; - -out: - return status; -} - -/** - * ixgbe_get_supported_physical_layer_82599 - Returns physical layer type - * @hw: pointer to hardware structure - * - * Determines physical layer capabilities of the current configuration. - **/ -u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw) -{ - u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; - u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); - u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); - u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK; - u32 pma_pmd_10g_parallel = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK; - u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; - u16 ext_ability = 0; - u8 comp_codes_10g = 0; - u8 comp_codes_1g = 0; - - hw->phy.ops.identify(hw); - - switch (hw->phy.type) { - case ixgbe_phy_tn: - case ixgbe_phy_cu_unknown: - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); - if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY) - physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; - if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) - physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; - if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY) - physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; - goto out; - default: - break; - } - - switch (autoc & IXGBE_AUTOC_LMS_MASK) { - case IXGBE_AUTOC_LMS_1G_AN: - case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: - if (pma_pmd_1g == IXGBE_AUTOC_1G_KX_BX) { - physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX | - IXGBE_PHYSICAL_LAYER_1000BASE_BX; - goto out; - } else - /* SFI mode so read SFP module */ - goto sfp_check; - break; - case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: - if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_CX4) - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4; - else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_KX4) - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4; - else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_XAUI) - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_XAUI; - goto out; - break; - case IXGBE_AUTOC_LMS_10G_SERIAL: - if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_KR) { - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR; - goto out; - } else if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) - goto sfp_check; - break; - case IXGBE_AUTOC_LMS_KX4_KX_KR: - case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN: - if (autoc & IXGBE_AUTOC_KX_SUPP) - physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX; - if (autoc & IXGBE_AUTOC_KX4_SUPP) - physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4; - if (autoc & IXGBE_AUTOC_KR_SUPP) - physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KR; - goto out; - break; - default: - goto out; - break; - } - -sfp_check: - /* SFP check must be done last since DA modules are sometimes used to - * test KR mode - we need to id KR mode correctly before SFP module. - * Call identify_sfp because the pluggable module may have changed */ - hw->phy.ops.identify_sfp(hw); - if (hw->phy.sfp_type == ixgbe_sfp_type_not_present) - goto out; - - switch (hw->phy.type) { - case ixgbe_phy_sfp_passive_tyco: - case ixgbe_phy_sfp_passive_unknown: - physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; - break; - case ixgbe_phy_sfp_ftl_active: - case ixgbe_phy_sfp_active_unknown: - physical_layer = IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA; - break; - case ixgbe_phy_sfp_avago: - case ixgbe_phy_sfp_ftl: - case ixgbe_phy_sfp_intel: - case ixgbe_phy_sfp_unknown: - hw->phy.ops.read_i2c_eeprom(hw, - IXGBE_SFF_1GBE_COMP_CODES, &comp_codes_1g); - hw->phy.ops.read_i2c_eeprom(hw, - IXGBE_SFF_10GBE_COMP_CODES, &comp_codes_10g); - if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE) - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR; - else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE) - physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR; - else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) - physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_T; - else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) - physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_SX; - break; - default: - break; - } - -out: - return physical_layer; -} - -/** - * ixgbe_enable_rx_dma_82599 - Enable the Rx DMA unit on 82599 - * @hw: pointer to hardware structure - * @regval: register value to write to RXCTRL - * - * Enables the Rx DMA unit for 82599 - **/ -s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval) -{ - - /* - * Workaround for 82599 silicon errata when enabling the Rx datapath. - * If traffic is incoming before we enable the Rx unit, it could hang - * the Rx DMA unit. Therefore, make sure the security engine is - * completely disabled prior to enabling the Rx unit. - */ - - hw->mac.ops.disable_sec_rx_path(hw); - - IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval); - - hw->mac.ops.enable_sec_rx_path(hw); - - return 0; -} - -/** - * ixgbe_verify_fw_version_82599 - verify fw version for 82599 - * @hw: pointer to hardware structure - * - * Verifies that installed the firmware version is 0.6 or higher - * for SFI devices. All 82599 SFI devices should have version 0.6 or higher. - * - * Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or - * if the FW version is not supported. - **/ -static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) -{ - s32 status = IXGBE_ERR_EEPROM_VERSION; - u16 fw_offset, fw_ptp_cfg_offset; - u16 fw_version = 0; - - /* firmware check is only necessary for SFI devices */ - if (hw->phy.media_type != ixgbe_media_type_fiber) { - status = 0; - goto fw_version_out; - } - - /* get the offset to the Firmware Module block */ - hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset); - - if ((fw_offset == 0) || (fw_offset == 0xFFFF)) - goto fw_version_out; - - /* get the offset to the Pass Through Patch Configuration block */ - hw->eeprom.ops.read(hw, (fw_offset + - IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR), - &fw_ptp_cfg_offset); - - if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF)) - goto fw_version_out; - - /* get the firmware version */ - hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset + - IXGBE_FW_PATCH_VERSION_4), &fw_version); - - if (fw_version > 0x5) - status = 0; - -fw_version_out: - return status; -} - -/** - * ixgbe_verify_lesm_fw_enabled_82599 - Checks LESM FW module state. - * @hw: pointer to hardware structure - * - * Returns true if the LESM FW module is present and enabled. Otherwise - * returns false. Smart Speed must be disabled if LESM FW module is enabled. - **/ -bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw) -{ - bool lesm_enabled = false; - u16 fw_offset, fw_lesm_param_offset, fw_lesm_state; - s32 status; - - /* get the offset to the Firmware Module block */ - status = hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset); - - if ((status != 0) || - (fw_offset == 0) || (fw_offset == 0xFFFF)) - goto out; - - /* get the offset to the LESM Parameters block */ - status = hw->eeprom.ops.read(hw, (fw_offset + - IXGBE_FW_LESM_PARAMETERS_PTR), - &fw_lesm_param_offset); - - if ((status != 0) || - (fw_lesm_param_offset == 0) || (fw_lesm_param_offset == 0xFFFF)) - goto out; - - /* get the lesm state word */ - status = hw->eeprom.ops.read(hw, (fw_lesm_param_offset + - IXGBE_FW_LESM_STATE_1), - &fw_lesm_state); - - if ((status == 0) && - (fw_lesm_state & IXGBE_FW_LESM_STATE_ENABLED)) - lesm_enabled = true; - -out: - return lesm_enabled; -} - -/** - * ixgbe_read_eeprom_buffer_82599 - Read EEPROM word(s) using - * fastest available method - * - * @hw: pointer to hardware structure - * @offset: offset of word in EEPROM to read - * @words: number of words - * @data: word(s) read from the EEPROM - * - * Retrieves 16 bit word(s) read from EEPROM - **/ -static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data) -{ - struct ixgbe_eeprom_info *eeprom = &hw->eeprom; - s32 ret_val = IXGBE_ERR_CONFIG; - - /* - * If EEPROM is detected and can be addressed using 14 bits, - * use EERD otherwise use bit bang - */ - if ((eeprom->type == ixgbe_eeprom_spi) && - (offset + (words - 1) <= IXGBE_EERD_MAX_ADDR)) - ret_val = ixgbe_read_eerd_buffer_generic(hw, offset, words, - data); - else - ret_val = ixgbe_read_eeprom_buffer_bit_bang_generic(hw, offset, - words, - data); - - return ret_val; -} - -/** - * ixgbe_read_eeprom_82599 - Read EEPROM word using - * fastest available method - * - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to read - * @data: word read from the EEPROM - * - * Reads a 16 bit word from the EEPROM - **/ -static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw, - u16 offset, u16 *data) -{ - struct ixgbe_eeprom_info *eeprom = &hw->eeprom; - s32 ret_val = IXGBE_ERR_CONFIG; - - /* - * If EEPROM is detected and can be addressed using 14 bits, - * use EERD otherwise use bit bang - */ - if ((eeprom->type == ixgbe_eeprom_spi) && - (offset <= IXGBE_EERD_MAX_ADDR)) - ret_val = ixgbe_read_eerd_generic(hw, offset, data); - else - ret_val = ixgbe_read_eeprom_bit_bang_generic(hw, offset, data); - - return ret_val; -} - -/** - * ixgbe_read_i2c_byte_82599 - Reads 8 bit word over I2C - * @hw: pointer to hardware structure - * @byte_offset: byte offset to read - * @data: value read - * - * Performs byte read operation to SFP module's EEPROM over I2C interface at - * a specified device address. - **/ -static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, - u8 dev_addr, u8 *data) -{ - u32 esdp; - s32 status; - s32 timeout = 200; - - if (hw->phy.qsfp_shared_i2c_bus == TRUE) { - /* Acquire I2C bus ownership. */ - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - esdp |= IXGBE_ESDP_SDP0; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); - IXGBE_WRITE_FLUSH(hw); - - while (timeout) { - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - if (esdp & IXGBE_ESDP_SDP1) - break; - - msleep(5); - timeout--; - } - - if (!timeout) { - hw_dbg(hw, "Driver can't access resource," - " acquiring I2C bus timeout.\n"); - status = IXGBE_ERR_I2C; - goto release_i2c_access; - } - } - - status = ixgbe_read_i2c_byte_generic(hw, byte_offset, dev_addr, data); - -release_i2c_access: - - if (hw->phy.qsfp_shared_i2c_bus == TRUE) { - /* Release I2C bus ownership. */ - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - esdp &= ~IXGBE_ESDP_SDP0; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); - IXGBE_WRITE_FLUSH(hw); - } - - return status; -} - -/** - * ixgbe_write_i2c_byte_82599 - Writes 8 bit word over I2C - * @hw: pointer to hardware structure - * @byte_offset: byte offset to write - * @data: value to write - * - * Performs byte write operation to SFP module's EEPROM over I2C interface at - * a specified device address. - **/ -static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, - u8 dev_addr, u8 data) -{ - u32 esdp; - s32 status; - s32 timeout = 200; - - if (hw->phy.qsfp_shared_i2c_bus == TRUE) { - /* Acquire I2C bus ownership. */ - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - esdp |= IXGBE_ESDP_SDP0; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); - IXGBE_WRITE_FLUSH(hw); - - while (timeout) { - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - if (esdp & IXGBE_ESDP_SDP1) - break; - - msleep(5); - timeout--; - } - - if (!timeout) { - hw_dbg(hw, "Driver can't access resource," - " acquiring I2C bus timeout.\n"); - status = IXGBE_ERR_I2C; - goto release_i2c_access; - } - } - - status = ixgbe_write_i2c_byte_generic(hw, byte_offset, dev_addr, data); - -release_i2c_access: - - if (hw->phy.qsfp_shared_i2c_bus == TRUE) { - /* Release I2C bus ownership. */ - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - esdp &= ~IXGBE_ESDP_SDP0; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); - IXGBE_WRITE_FLUSH(hw); - } - - return status; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h deleted file mode 100644 index 0305ed73..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_82599_H_ -#define _IXGBE_82599_H_ - -s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, bool *autoneg); -enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw); -void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); -void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); -void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); -s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, - ixgbe_link_speed speed, bool autoneg, - bool autoneg_wait_to_complete); -s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw, - ixgbe_link_speed speed, bool autoneg, - bool autoneg_wait_to_complete); -s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, - bool autoneg_wait_to_complete); -s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, ixgbe_link_speed speed, - bool autoneg, bool autoneg_wait_to_complete); -s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw); -void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw); -s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw); -s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val); -s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val); -s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw); -s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw); -s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw); -u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw); -s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval); -bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw); -#endif /* _IXGBE_82599_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c deleted file mode 100644 index 1be4c64f..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c +++ /dev/null @@ -1,1142 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "ixgbe_api.h" -#include "ixgbe_common.h" - -/** - * ixgbe_init_shared_code - Initialize the shared code - * @hw: pointer to hardware structure - * - * This will assign function pointers and assign the MAC type and PHY code. - * Does not touch the hardware. This function must be called prior to any - * other function in the shared code. The ixgbe_hw structure should be - * memset to 0 prior to calling this function. The following fields in - * hw structure should be filled in prior to calling this function: - * hw_addr, back, device_id, vendor_id, subsystem_device_id, - * subsystem_vendor_id, and revision_id - **/ -s32 ixgbe_init_shared_code(struct ixgbe_hw *hw) -{ - s32 status; - - /* - * Set the mac type - */ - ixgbe_set_mac_type(hw); - - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - status = ixgbe_init_ops_82598(hw); - break; - case ixgbe_mac_82599EB: - status = ixgbe_init_ops_82599(hw); - break; - case ixgbe_mac_X540: - status = ixgbe_init_ops_X540(hw); - break; - default: - status = IXGBE_ERR_DEVICE_NOT_SUPPORTED; - break; - } - - return status; -} - -/** - * ixgbe_set_mac_type - Sets MAC type - * @hw: pointer to the HW structure - * - * This function sets the mac type of the adapter based on the - * vendor ID and device ID stored in the hw structure. - **/ -s32 ixgbe_set_mac_type(struct ixgbe_hw *hw) -{ - s32 ret_val = 0; - - if (hw->vendor_id == IXGBE_INTEL_VENDOR_ID) { - switch (hw->device_id) { - case IXGBE_DEV_ID_82598: - case IXGBE_DEV_ID_82598_BX: - case IXGBE_DEV_ID_82598AF_SINGLE_PORT: - case IXGBE_DEV_ID_82598AF_DUAL_PORT: - case IXGBE_DEV_ID_82598AT: - case IXGBE_DEV_ID_82598AT2: - case IXGBE_DEV_ID_82598EB_CX4: - case IXGBE_DEV_ID_82598_CX4_DUAL_PORT: - case IXGBE_DEV_ID_82598_DA_DUAL_PORT: - case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM: - case IXGBE_DEV_ID_82598EB_XF_LR: - case IXGBE_DEV_ID_82598EB_SFP_LOM: - hw->mac.type = ixgbe_mac_82598EB; - break; - case IXGBE_DEV_ID_82599_KX4: - case IXGBE_DEV_ID_82599_KX4_MEZZ: - case IXGBE_DEV_ID_82599_XAUI_LOM: - case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: - case IXGBE_DEV_ID_82599_KR: - case IXGBE_DEV_ID_82599_SFP: - case IXGBE_DEV_ID_82599_BACKPLANE_FCOE: - case IXGBE_DEV_ID_82599_SFP_FCOE: - case IXGBE_DEV_ID_82599_SFP_EM: - case IXGBE_DEV_ID_82599_SFP_SF2: - case IXGBE_DEV_ID_82599_QSFP_SF_QP: - case IXGBE_DEV_ID_82599EN_SFP: - case IXGBE_DEV_ID_82599_CX4: - case IXGBE_DEV_ID_82599_LS: - case IXGBE_DEV_ID_82599_T3_LOM: - hw->mac.type = ixgbe_mac_82599EB; - break; - case IXGBE_DEV_ID_X540T: - hw->mac.type = ixgbe_mac_X540; - break; - default: - ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED; - break; - } - } else { - ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED; - } - - hw_dbg(hw, "ixgbe_set_mac_type found mac: %d, returns: %d\n", - hw->mac.type, ret_val); - return ret_val; -} - -/** - * ixgbe_init_hw - Initialize the hardware - * @hw: pointer to hardware structure - * - * Initialize the hardware by resetting and then starting the hardware - **/ -s32 ixgbe_init_hw(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.init_hw, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_reset_hw - Performs a hardware reset - * @hw: pointer to hardware structure - * - * Resets the hardware by resetting the transmit and receive units, masks and - * clears all interrupts, performs a PHY reset, and performs a MAC reset - **/ -s32 ixgbe_reset_hw(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.reset_hw, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_start_hw - Prepares hardware for Rx/Tx - * @hw: pointer to hardware structure - * - * Starts the hardware by filling the bus info structure and media type, - * clears all on chip counters, initializes receive address registers, - * multicast table, VLAN filter table, calls routine to setup link and - * flow control settings, and leaves transmit and receive units disabled - * and uninitialized. - **/ -s32 ixgbe_start_hw(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.start_hw, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_clear_hw_cntrs - Clear hardware counters - * @hw: pointer to hardware structure - * - * Clears all hardware statistics counters by reading them from the hardware - * Statistics counters are clear on read. - **/ -s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.clear_hw_cntrs, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_media_type - Get media type - * @hw: pointer to hardware structure - * - * Returns the media type (fiber, copper, backplane) - **/ -enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_media_type, (hw), - ixgbe_media_type_unknown); -} - -/** - * ixgbe_get_mac_addr - Get MAC address - * @hw: pointer to hardware structure - * @mac_addr: Adapter MAC address - * - * Reads the adapter's MAC address from the first Receive Address Register - * (RAR0) A reset of the adapter must have been performed prior to calling - * this function in order for the MAC address to have been loaded from the - * EEPROM into RAR0 - **/ -s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_mac_addr, - (hw, mac_addr), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_san_mac_addr - Get SAN MAC address - * @hw: pointer to hardware structure - * @san_mac_addr: SAN MAC address - * - * Reads the SAN MAC address from the EEPROM, if it's available. This is - * per-port, so set_lan_id() must be called before reading the addresses. - **/ -s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_san_mac_addr, - (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_set_san_mac_addr - Write a SAN MAC address - * @hw: pointer to hardware structure - * @san_mac_addr: SAN MAC address - * - * Writes A SAN MAC address to the EEPROM. - **/ -s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr) -{ - return ixgbe_call_func(hw, hw->mac.ops.set_san_mac_addr, - (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_device_caps - Get additional device capabilities - * @hw: pointer to hardware structure - * @device_caps: the EEPROM word for device capabilities - * - * Reads the extra device capabilities from the EEPROM - **/ -s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_device_caps, - (hw, device_caps), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_wwn_prefix - Get alternative WWNN/WWPN prefix from the EEPROM - * @hw: pointer to hardware structure - * @wwnn_prefix: the alternative WWNN prefix - * @wwpn_prefix: the alternative WWPN prefix - * - * This function will read the EEPROM from the alternative SAN MAC address - * block to check the support for the alternative WWNN/WWPN prefix support. - **/ -s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix, - u16 *wwpn_prefix) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_wwn_prefix, - (hw, wwnn_prefix, wwpn_prefix), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_fcoe_boot_status - Get FCOE boot status from EEPROM - * @hw: pointer to hardware structure - * @bs: the fcoe boot status - * - * This function will read the FCOE boot status from the iSCSI FCOE block - **/ -s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_fcoe_boot_status, - (hw, bs), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_bus_info - Set PCI bus info - * @hw: pointer to hardware structure - * - * Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure - **/ -s32 ixgbe_get_bus_info(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_bus_info, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_num_of_tx_queues - Get Tx queues - * @hw: pointer to hardware structure - * - * Returns the number of transmit queues for the given adapter. - **/ -u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw) -{ - return hw->mac.max_tx_queues; -} - -/** - * ixgbe_get_num_of_rx_queues - Get Rx queues - * @hw: pointer to hardware structure - * - * Returns the number of receive queues for the given adapter. - **/ -u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw) -{ - return hw->mac.max_rx_queues; -} - -/** - * ixgbe_stop_adapter - Disable Rx/Tx units - * @hw: pointer to hardware structure - * - * Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts, - * disables transmit and receive units. The adapter_stopped flag is used by - * the shared code and drivers to determine if the adapter is in a stopped - * state and should not touch the hardware. - **/ -s32 ixgbe_stop_adapter(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.stop_adapter, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_read_pba_string - Reads part number string from EEPROM - * @hw: pointer to hardware structure - * @pba_num: stores the part number string from the EEPROM - * @pba_num_size: part number string buffer length - * - * Reads the part number string from the EEPROM. - **/ -s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size) -{ - return ixgbe_read_pba_string_generic(hw, pba_num, pba_num_size); -} - -/** - * ixgbe_identify_phy - Get PHY type - * @hw: pointer to hardware structure - * - * Determines the physical layer module found on the current adapter. - **/ -s32 ixgbe_identify_phy(struct ixgbe_hw *hw) -{ - s32 status = 0; - - if (hw->phy.type == ixgbe_phy_unknown) { - status = ixgbe_call_func(hw, hw->phy.ops.identify, (hw), - IXGBE_NOT_IMPLEMENTED); - } - - return status; -} - -/** - * ixgbe_reset_phy - Perform a PHY reset - * @hw: pointer to hardware structure - **/ -s32 ixgbe_reset_phy(struct ixgbe_hw *hw) -{ - s32 status = 0; - - if (hw->phy.type == ixgbe_phy_unknown) { - if (ixgbe_identify_phy(hw) != 0) - status = IXGBE_ERR_PHY; - } - - if (status == 0) { - status = ixgbe_call_func(hw, hw->phy.ops.reset, (hw), - IXGBE_NOT_IMPLEMENTED); - } - return status; -} - -/** - * ixgbe_get_phy_firmware_version - - * @hw: pointer to hardware structure - * @firmware_version: pointer to firmware version - **/ -s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, u16 *firmware_version) -{ - s32 status = 0; - - status = ixgbe_call_func(hw, hw->phy.ops.get_firmware_version, - (hw, firmware_version), - IXGBE_NOT_IMPLEMENTED); - return status; -} - -/** - * ixgbe_read_phy_reg - Read PHY register - * @hw: pointer to hardware structure - * @reg_addr: 32 bit address of PHY register to read - * @phy_data: Pointer to read data from PHY register - * - * Reads a value from a specified PHY register - **/ -s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, - u16 *phy_data) -{ - if (hw->phy.id == 0) - ixgbe_identify_phy(hw); - - return ixgbe_call_func(hw, hw->phy.ops.read_reg, (hw, reg_addr, - device_type, phy_data), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_write_phy_reg - Write PHY register - * @hw: pointer to hardware structure - * @reg_addr: 32 bit PHY register to write - * @phy_data: Data to write to the PHY register - * - * Writes a value to specified PHY register - **/ -s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, - u16 phy_data) -{ - if (hw->phy.id == 0) - ixgbe_identify_phy(hw); - - return ixgbe_call_func(hw, hw->phy.ops.write_reg, (hw, reg_addr, - device_type, phy_data), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_setup_phy_link - Restart PHY autoneg - * @hw: pointer to hardware structure - * - * Restart autonegotiation and PHY and waits for completion. - **/ -s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->phy.ops.setup_link, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_check_phy_link - Determine link and speed status - * @hw: pointer to hardware structure - * - * Reads a PHY register to determine if link is up and the current speed for - * the PHY. - **/ -s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, - bool *link_up) -{ - return ixgbe_call_func(hw, hw->phy.ops.check_link, (hw, speed, - link_up), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_setup_phy_link_speed - Set auto advertise - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - * - * Sets the auto advertised capabilities - **/ -s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete) -{ - return ixgbe_call_func(hw, hw->phy.ops.setup_link_speed, (hw, speed, - autoneg, autoneg_wait_to_complete), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_check_link - Get link and speed status - * @hw: pointer to hardware structure - * - * Reads the links register to determine if link is up and the current speed - **/ -s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, - bool *link_up, bool link_up_wait_to_complete) -{ - return ixgbe_call_func(hw, hw->mac.ops.check_link, (hw, speed, - link_up, link_up_wait_to_complete), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_disable_tx_laser - Disable Tx laser - * @hw: pointer to hardware structure - * - * If the driver needs to disable the laser on SFI optics. - **/ -void ixgbe_disable_tx_laser(struct ixgbe_hw *hw) -{ - if (hw->mac.ops.disable_tx_laser) - hw->mac.ops.disable_tx_laser(hw); -} - -/** - * ixgbe_enable_tx_laser - Enable Tx laser - * @hw: pointer to hardware structure - * - * If the driver needs to enable the laser on SFI optics. - **/ -void ixgbe_enable_tx_laser(struct ixgbe_hw *hw) -{ - if (hw->mac.ops.enable_tx_laser) - hw->mac.ops.enable_tx_laser(hw); -} - -/** - * ixgbe_flap_tx_laser - flap Tx laser to start autotry process - * @hw: pointer to hardware structure - * - * When the driver changes the link speeds that it can support then - * flap the tx laser to alert the link partner to start autotry - * process on its end. - **/ -void ixgbe_flap_tx_laser(struct ixgbe_hw *hw) -{ - if (hw->mac.ops.flap_tx_laser) - hw->mac.ops.flap_tx_laser(hw); -} - -/** - * ixgbe_setup_link - Set link speed - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - * - * Configures link settings. Restarts the link. - * Performs autonegotiation if needed. - **/ -s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete) -{ - return ixgbe_call_func(hw, hw->mac.ops.setup_link, (hw, speed, - autoneg, autoneg_wait_to_complete), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_link_capabilities - Returns link capabilities - * @hw: pointer to hardware structure - * - * Determines the link capabilities of the current configuration. - **/ -s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed, - bool *autoneg) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_link_capabilities, (hw, - speed, autoneg), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_led_on - Turn on LEDs - * @hw: pointer to hardware structure - * @index: led number to turn on - * - * Turns on the software controllable LEDs. - **/ -s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index) -{ - return ixgbe_call_func(hw, hw->mac.ops.led_on, (hw, index), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_led_off - Turn off LEDs - * @hw: pointer to hardware structure - * @index: led number to turn off - * - * Turns off the software controllable LEDs. - **/ -s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index) -{ - return ixgbe_call_func(hw, hw->mac.ops.led_off, (hw, index), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_blink_led_start - Blink LEDs - * @hw: pointer to hardware structure - * @index: led number to blink - * - * Blink LED based on index. - **/ -s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index) -{ - return ixgbe_call_func(hw, hw->mac.ops.blink_led_start, (hw, index), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_blink_led_stop - Stop blinking LEDs - * @hw: pointer to hardware structure - * - * Stop blinking LED based on index. - **/ -s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index) -{ - return ixgbe_call_func(hw, hw->mac.ops.blink_led_stop, (hw, index), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_init_eeprom_params - Initialize EEPROM parameters - * @hw: pointer to hardware structure - * - * Initializes the EEPROM parameters ixgbe_eeprom_info within the - * ixgbe_hw struct in order to set up EEPROM access. - **/ -s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->eeprom.ops.init_params, (hw), - IXGBE_NOT_IMPLEMENTED); -} - - -/** - * ixgbe_write_eeprom - Write word to EEPROM - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be written to - * @data: 16 bit word to be written to the EEPROM - * - * Writes 16 bit value to EEPROM. If ixgbe_eeprom_update_checksum is not - * called after this function, the EEPROM will most likely contain an - * invalid checksum. - **/ -s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data) -{ - return ixgbe_call_func(hw, hw->eeprom.ops.write, (hw, offset, data), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_write_eeprom_buffer - Write word(s) to EEPROM - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be written to - * @data: 16 bit word(s) to be written to the EEPROM - * @words: number of words - * - * Writes 16 bit word(s) to EEPROM. If ixgbe_eeprom_update_checksum is not - * called after this function, the EEPROM will most likely contain an - * invalid checksum. - **/ -s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, u16 words, - u16 *data) -{ - return ixgbe_call_func(hw, hw->eeprom.ops.write_buffer, - (hw, offset, words, data), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_read_eeprom - Read word from EEPROM - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be read - * @data: read 16 bit value from EEPROM - * - * Reads 16 bit value from EEPROM - **/ -s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data) -{ - return ixgbe_call_func(hw, hw->eeprom.ops.read, (hw, offset, data), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_read_eeprom_buffer - Read word(s) from EEPROM - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be read - * @data: read 16 bit word(s) from EEPROM - * @words: number of words - * - * Reads 16 bit word(s) from EEPROM - **/ -s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data) -{ - return ixgbe_call_func(hw, hw->eeprom.ops.read_buffer, - (hw, offset, words, data), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_validate_eeprom_checksum - Validate EEPROM checksum - * @hw: pointer to hardware structure - * @checksum_val: calculated checksum - * - * Performs checksum calculation and validates the EEPROM checksum - **/ -s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val) -{ - return ixgbe_call_func(hw, hw->eeprom.ops.validate_checksum, - (hw, checksum_val), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_eeprom_update_checksum - Updates the EEPROM checksum - * @hw: pointer to hardware structure - **/ -s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->eeprom.ops.update_checksum, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_insert_mac_addr - Find a RAR for this mac address - * @hw: pointer to hardware structure - * @addr: Address to put into receive address register - * @vmdq: VMDq pool to assign - * - * Puts an ethernet address into a receive address register, or - * finds the rar that it is already in; adds to the pool list - **/ -s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) -{ - return ixgbe_call_func(hw, hw->mac.ops.insert_mac_addr, - (hw, addr, vmdq), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_set_rar - Set Rx address register - * @hw: pointer to hardware structure - * @index: Receive address register to write - * @addr: Address to put into receive address register - * @vmdq: VMDq "set" - * @enable_addr: set flag that address is active - * - * Puts an ethernet address into a receive address register. - **/ -s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, - u32 enable_addr) -{ - return ixgbe_call_func(hw, hw->mac.ops.set_rar, (hw, index, addr, vmdq, - enable_addr), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_clear_rar - Clear Rx address register - * @hw: pointer to hardware structure - * @index: Receive address register to write - * - * Puts an ethernet address into a receive address register. - **/ -s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index) -{ - return ixgbe_call_func(hw, hw->mac.ops.clear_rar, (hw, index), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_set_vmdq - Associate a VMDq index with a receive address - * @hw: pointer to hardware structure - * @rar: receive address register index to associate with VMDq index - * @vmdq: VMDq set or pool index - **/ -s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq) -{ - return ixgbe_call_func(hw, hw->mac.ops.set_vmdq, (hw, rar, vmdq), - IXGBE_NOT_IMPLEMENTED); - -} - -/** - * ixgbe_set_vmdq_san_mac - Associate VMDq index 127 with a receive address - * @hw: pointer to hardware structure - * @vmdq: VMDq default pool index - **/ -s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq) -{ - return ixgbe_call_func(hw, hw->mac.ops.set_vmdq_san_mac, - (hw, vmdq), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_clear_vmdq - Disassociate a VMDq index from a receive address - * @hw: pointer to hardware structure - * @rar: receive address register index to disassociate with VMDq index - * @vmdq: VMDq set or pool index - **/ -s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq) -{ - return ixgbe_call_func(hw, hw->mac.ops.clear_vmdq, (hw, rar, vmdq), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_init_rx_addrs - Initializes receive address filters. - * @hw: pointer to hardware structure - * - * Places the MAC address in receive address register 0 and clears the rest - * of the receive address registers. Clears the multicast table. Assumes - * the receiver is in reset when the routine is called. - **/ -s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.init_rx_addrs, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_num_rx_addrs - Returns the number of RAR entries. - * @hw: pointer to hardware structure - **/ -u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw) -{ - return hw->mac.num_rar_entries; -} - -/** - * ixgbe_update_uc_addr_list - Updates the MAC's list of secondary addresses - * @hw: pointer to hardware structure - * @addr_list: the list of new multicast addresses - * @addr_count: number of addresses - * @func: iterator function to walk the multicast address list - * - * The given list replaces any existing list. Clears the secondary addrs from - * receive address registers. Uses unused receive address registers for the - * first secondary addresses, and falls back to promiscuous mode as needed. - **/ -s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list, - u32 addr_count, ixgbe_mc_addr_itr func) -{ - return ixgbe_call_func(hw, hw->mac.ops.update_uc_addr_list, (hw, - addr_list, addr_count, func), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_update_mc_addr_list - Updates the MAC's list of multicast addresses - * @hw: pointer to hardware structure - * @mc_addr_list: the list of new multicast addresses - * @mc_addr_count: number of addresses - * @func: iterator function to walk the multicast address list - * - * The given list replaces any existing list. Clears the MC addrs from receive - * address registers and the multicast table. Uses unused receive address - * registers for the first multicast addresses, and hashes the rest into the - * multicast table. - **/ -s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list, - u32 mc_addr_count, ixgbe_mc_addr_itr func, - bool clear) -{ - return ixgbe_call_func(hw, hw->mac.ops.update_mc_addr_list, (hw, - mc_addr_list, mc_addr_count, func, clear), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_enable_mc - Enable multicast address in RAR - * @hw: pointer to hardware structure - * - * Enables multicast address in RAR and the use of the multicast hash table. - **/ -s32 ixgbe_enable_mc(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.enable_mc, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_disable_mc - Disable multicast address in RAR - * @hw: pointer to hardware structure - * - * Disables multicast address in RAR and the use of the multicast hash table. - **/ -s32 ixgbe_disable_mc(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.disable_mc, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_clear_vfta - Clear VLAN filter table - * @hw: pointer to hardware structure - * - * Clears the VLAN filer table, and the VMDq index associated with the filter - **/ -s32 ixgbe_clear_vfta(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.clear_vfta, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_set_vfta - Set VLAN filter table - * @hw: pointer to hardware structure - * @vlan: VLAN id to write to VLAN filter - * @vind: VMDq output index that maps queue to VLAN id in VFTA - * @vlan_on: boolean flag to turn on/off VLAN in VFTA - * - * Turn on/off specified VLAN in the VLAN filter table. - **/ -s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on) -{ - return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind, - vlan_on), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_set_vlvf - Set VLAN Pool Filter - * @hw: pointer to hardware structure - * @vlan: VLAN id to write to VLAN filter - * @vind: VMDq output index that maps queue to VLAN id in VFVFB - * @vlan_on: boolean flag to turn on/off VLAN in VFVF - * @vfta_changed: pointer to boolean flag which indicates whether VFTA - * should be changed - * - * Turn on/off specified bit in VLVF table. - **/ -s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, - bool *vfta_changed) -{ - return ixgbe_call_func(hw, hw->mac.ops.set_vlvf, (hw, vlan, vind, - vlan_on, vfta_changed), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_fc_enable - Enable flow control - * @hw: pointer to hardware structure - * - * Configures the flow control settings based on SW configuration. - **/ -s32 ixgbe_fc_enable(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.fc_enable, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_set_fw_drv_ver - Try to send the driver version number FW - * @hw: pointer to hardware structure - * @maj: driver major number to be sent to firmware - * @min: driver minor number to be sent to firmware - * @build: driver build number to be sent to firmware - * @ver: driver version number to be sent to firmware - **/ -s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, - u8 ver) -{ - return ixgbe_call_func(hw, hw->mac.ops.set_fw_drv_ver, (hw, maj, min, - build, ver), IXGBE_NOT_IMPLEMENTED); -} - - -/** - * ixgbe_get_thermal_sensor_data - Gathers thermal sensor data - * @hw: pointer to hardware structure - * - * Updates the temperatures in mac.thermal_sensor_data - **/ -s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_thermal_sensor_data, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_init_thermal_sensor_thresh - Inits thermal sensor thresholds - * @hw: pointer to hardware structure - * - * Inits the thermal sensor thresholds according to the NVM map - **/ -s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.init_thermal_sensor_thresh, (hw), - IXGBE_NOT_IMPLEMENTED); -} -/** - * ixgbe_read_analog_reg8 - Reads 8 bit analog register - * @hw: pointer to hardware structure - * @reg: analog register to read - * @val: read value - * - * Performs write operation to analog register specified. - **/ -s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val) -{ - return ixgbe_call_func(hw, hw->mac.ops.read_analog_reg8, (hw, reg, - val), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_write_analog_reg8 - Writes 8 bit analog register - * @hw: pointer to hardware structure - * @reg: analog register to write - * @val: value to write - * - * Performs write operation to Atlas analog register specified. - **/ -s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val) -{ - return ixgbe_call_func(hw, hw->mac.ops.write_analog_reg8, (hw, reg, - val), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_init_uta_tables - Initializes Unicast Table Arrays. - * @hw: pointer to hardware structure - * - * Initializes the Unicast Table Arrays to zero on device load. This - * is part of the Rx init addr execution path. - **/ -s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.init_uta_tables, (hw), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_read_i2c_byte - Reads 8 bit word over I2C at specified device address - * @hw: pointer to hardware structure - * @byte_offset: byte offset to read - * @data: value read - * - * Performs byte read operation to SFP module's EEPROM over I2C interface. - **/ -s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, - u8 *data) -{ - return ixgbe_call_func(hw, hw->phy.ops.read_i2c_byte, (hw, byte_offset, - dev_addr, data), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_write_i2c_byte - Writes 8 bit word over I2C - * @hw: pointer to hardware structure - * @byte_offset: byte offset to write - * @data: value to write - * - * Performs byte write operation to SFP module's EEPROM over I2C interface - * at a specified device address. - **/ -s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, - u8 data) -{ - return ixgbe_call_func(hw, hw->phy.ops.write_i2c_byte, (hw, byte_offset, - dev_addr, data), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_write_i2c_eeprom - Writes 8 bit EEPROM word over I2C interface - * @hw: pointer to hardware structure - * @byte_offset: EEPROM byte offset to write - * @eeprom_data: value to write - * - * Performs byte write operation to SFP module's EEPROM over I2C interface. - **/ -s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, - u8 byte_offset, u8 eeprom_data) -{ - return ixgbe_call_func(hw, hw->phy.ops.write_i2c_eeprom, - (hw, byte_offset, eeprom_data), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_read_i2c_eeprom - Reads 8 bit EEPROM word over I2C interface - * @hw: pointer to hardware structure - * @byte_offset: EEPROM byte offset to read - * @eeprom_data: value read - * - * Performs byte read operation to SFP module's EEPROM over I2C interface. - **/ -s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data) -{ - return ixgbe_call_func(hw, hw->phy.ops.read_i2c_eeprom, - (hw, byte_offset, eeprom_data), - IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_get_supported_physical_layer - Returns physical layer type - * @hw: pointer to hardware structure - * - * Determines physical layer capabilities of the current configuration. - **/ -u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.get_supported_physical_layer, - (hw), IXGBE_PHYSICAL_LAYER_UNKNOWN); -} - -/** - * ixgbe_enable_rx_dma - Enables Rx DMA unit, dependent on device specifics - * @hw: pointer to hardware structure - * @regval: bitfield to write to the Rx DMA register - * - * Enables the Rx DMA unit of the device. - **/ -s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval) -{ - return ixgbe_call_func(hw, hw->mac.ops.enable_rx_dma, - (hw, regval), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_disable_sec_rx_path - Stops the receive data path - * @hw: pointer to hardware structure - * - * Stops the receive data path. - **/ -s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.disable_sec_rx_path, - (hw), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_enable_sec_rx_path - Enables the receive data path - * @hw: pointer to hardware structure - * - * Enables the receive data path. - **/ -s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw) -{ - return ixgbe_call_func(hw, hw->mac.ops.enable_sec_rx_path, - (hw), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_acquire_swfw_semaphore - Acquire SWFW semaphore - * @hw: pointer to hardware structure - * @mask: Mask to specify which semaphore to acquire - * - * Acquires the SWFW semaphore through SW_FW_SYNC register for the specified - * function (CSR, PHY0, PHY1, EEPROM, Flash) - **/ -s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask) -{ - return ixgbe_call_func(hw, hw->mac.ops.acquire_swfw_sync, - (hw, mask), IXGBE_NOT_IMPLEMENTED); -} - -/** - * ixgbe_release_swfw_semaphore - Release SWFW semaphore - * @hw: pointer to hardware structure - * @mask: Mask to specify which semaphore to release - * - * Releases the SWFW semaphore through SW_FW_SYNC register for the specified - * function (CSR, PHY0, PHY1, EEPROM, Flash) - **/ -void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask) -{ - if (hw->mac.ops.release_swfw_sync) - hw->mac.ops.release_swfw_sync(hw, mask); -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h deleted file mode 100644 index 11247a0b..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h +++ /dev/null @@ -1,153 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_API_H_ -#define _IXGBE_API_H_ - -#include "ixgbe_type.h" - -s32 ixgbe_init_shared_code(struct ixgbe_hw *hw); - -extern s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw); -extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw); -extern s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw); - -s32 ixgbe_set_mac_type(struct ixgbe_hw *hw); -s32 ixgbe_init_hw(struct ixgbe_hw *hw); -s32 ixgbe_reset_hw(struct ixgbe_hw *hw); -s32 ixgbe_start_hw(struct ixgbe_hw *hw); -s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw); -enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw); -s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr); -s32 ixgbe_get_bus_info(struct ixgbe_hw *hw); -u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw); -u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw); -s32 ixgbe_stop_adapter(struct ixgbe_hw *hw); -s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size); - -s32 ixgbe_identify_phy(struct ixgbe_hw *hw); -s32 ixgbe_reset_phy(struct ixgbe_hw *hw); -s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, - u16 *phy_data); -s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, - u16 phy_data); - -s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw); -s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, - bool *link_up); -s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete); -void ixgbe_disable_tx_laser(struct ixgbe_hw *hw); -void ixgbe_enable_tx_laser(struct ixgbe_hw *hw); -void ixgbe_flap_tx_laser(struct ixgbe_hw *hw); -s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed, - bool autoneg, bool autoneg_wait_to_complete); -s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, - bool *link_up, bool link_up_wait_to_complete); -s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed, - bool *autoneg); -s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index); -s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index); -s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index); -s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index); - -s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw); -s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data); -s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data); -s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data); -s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data); - -s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val); -s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw); - -s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq); -s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, - u32 enable_addr); -s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index); -s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq); -s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq); -s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq); -s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw); -u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw); -s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list, - u32 addr_count, ixgbe_mc_addr_itr func); -s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list, - u32 mc_addr_count, ixgbe_mc_addr_itr func, - bool clear); -void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr_list, u32 vmdq); -s32 ixgbe_enable_mc(struct ixgbe_hw *hw); -s32 ixgbe_disable_mc(struct ixgbe_hw *hw); -s32 ixgbe_clear_vfta(struct ixgbe_hw *hw); -s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, - u32 vind, bool vlan_on); -s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, - bool vlan_on, bool *vfta_changed); -s32 ixgbe_fc_enable(struct ixgbe_hw *hw); -s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, - u8 ver); -s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw); -s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw); -void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr); -s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, - u16 *firmware_version); -s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val); -s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val); -s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw); -s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data); -u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw); -s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval); -s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw); -s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw); -s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw); -s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl); -s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl); -s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, - union ixgbe_atr_hash_dword input, - union ixgbe_atr_hash_dword common, - u8 queue); -s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, - union ixgbe_atr_input *input_mask); -s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw, - union ixgbe_atr_input *input, - u16 soft_id, u8 queue); -s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, - union ixgbe_atr_input *input, - u16 soft_id); -s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, - union ixgbe_atr_input *input, - union ixgbe_atr_input *mask, - u16 soft_id, - u8 queue); -void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, - union ixgbe_atr_input *mask); -u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input, - union ixgbe_atr_hash_dword common); -s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, - u8 *data); -s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, - u8 data); -s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data); -s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr); -s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr); -s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps); -s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask); -void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask); -s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix, - u16 *wwpn_prefix); -s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs); - -#endif /* _IXGBE_API_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c deleted file mode 100644 index e9b9529a..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c +++ /dev/null @@ -1,4067 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "ixgbe_common.h" -#include "ixgbe_phy.h" -#include "ixgbe_api.h" - -static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw); -static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw); -static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw); -static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw); -static void ixgbe_standby_eeprom(struct ixgbe_hw *hw); -static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data, - u16 count); -static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count); -static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec); -static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec); -static void ixgbe_release_eeprom(struct ixgbe_hw *hw); - -static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr); -static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw, - u16 *san_mac_offset); -static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data); -static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data); -static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw, - u16 offset); - -/** - * ixgbe_init_ops_generic - Inits function ptrs - * @hw: pointer to the hardware structure - * - * Initialize the function pointers. - **/ -s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw) -{ - struct ixgbe_eeprom_info *eeprom = &hw->eeprom; - struct ixgbe_mac_info *mac = &hw->mac; - u32 eec = IXGBE_READ_REG(hw, IXGBE_EEC); - - /* EEPROM */ - eeprom->ops.init_params = &ixgbe_init_eeprom_params_generic; - /* If EEPROM is valid (bit 8 = 1), use EERD otherwise use bit bang */ - if (eec & IXGBE_EEC_PRES) { - eeprom->ops.read = &ixgbe_read_eerd_generic; - eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_generic; - } else { - eeprom->ops.read = &ixgbe_read_eeprom_bit_bang_generic; - eeprom->ops.read_buffer = - &ixgbe_read_eeprom_buffer_bit_bang_generic; - } - eeprom->ops.write = &ixgbe_write_eeprom_generic; - eeprom->ops.write_buffer = &ixgbe_write_eeprom_buffer_bit_bang_generic; - eeprom->ops.validate_checksum = - &ixgbe_validate_eeprom_checksum_generic; - eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_generic; - eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_generic; - - /* MAC */ - mac->ops.init_hw = &ixgbe_init_hw_generic; - mac->ops.reset_hw = NULL; - mac->ops.start_hw = &ixgbe_start_hw_generic; - mac->ops.clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic; - mac->ops.get_media_type = NULL; - mac->ops.get_supported_physical_layer = NULL; - mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_generic; - mac->ops.get_mac_addr = &ixgbe_get_mac_addr_generic; - mac->ops.stop_adapter = &ixgbe_stop_adapter_generic; - mac->ops.get_bus_info = &ixgbe_get_bus_info_generic; - mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie; - mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync; - mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync; - - /* LEDs */ - mac->ops.led_on = &ixgbe_led_on_generic; - mac->ops.led_off = &ixgbe_led_off_generic; - mac->ops.blink_led_start = &ixgbe_blink_led_start_generic; - mac->ops.blink_led_stop = &ixgbe_blink_led_stop_generic; - - /* RAR, Multicast, VLAN */ - mac->ops.set_rar = &ixgbe_set_rar_generic; - mac->ops.clear_rar = &ixgbe_clear_rar_generic; - mac->ops.insert_mac_addr = NULL; - mac->ops.set_vmdq = NULL; - mac->ops.clear_vmdq = NULL; - mac->ops.init_rx_addrs = &ixgbe_init_rx_addrs_generic; - mac->ops.update_uc_addr_list = &ixgbe_update_uc_addr_list_generic; - mac->ops.update_mc_addr_list = &ixgbe_update_mc_addr_list_generic; - mac->ops.enable_mc = &ixgbe_enable_mc_generic; - mac->ops.disable_mc = &ixgbe_disable_mc_generic; - mac->ops.clear_vfta = NULL; - mac->ops.set_vfta = NULL; - mac->ops.set_vlvf = NULL; - mac->ops.init_uta_tables = NULL; - - /* Flow Control */ - mac->ops.fc_enable = &ixgbe_fc_enable_generic; - - /* Link */ - mac->ops.get_link_capabilities = NULL; - mac->ops.setup_link = NULL; - mac->ops.check_link = NULL; - - return 0; -} - -/** - * ixgbe_device_supports_autoneg_fc - Check if phy supports autoneg flow - * control - * @hw: pointer to hardware structure - * - * There are several phys that do not support autoneg flow control. This - * function check the device id to see if the associated phy supports - * autoneg flow control. - **/ -static s32 ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) -{ - - switch (hw->device_id) { - case IXGBE_DEV_ID_X540T: - return 0; - case IXGBE_DEV_ID_82599_T3_LOM: - return 0; - default: - return IXGBE_ERR_FC_NOT_SUPPORTED; - } -} - -/** - * ixgbe_setup_fc - Set up flow control - * @hw: pointer to hardware structure - * - * Called at init time to set up flow control. - **/ -static s32 ixgbe_setup_fc(struct ixgbe_hw *hw) -{ - s32 ret_val = 0; - u32 reg = 0, reg_bp = 0; - u16 reg_cu = 0; - - /* - * Validate the requested mode. Strict IEEE mode does not allow - * ixgbe_fc_rx_pause because it will cause us to fail at UNH. - */ - if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { - hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; - goto out; - } - - /* - * 10gig parts do not have a word in the EEPROM to determine the - * default flow control setting, so we explicitly set it to full. - */ - if (hw->fc.requested_mode == ixgbe_fc_default) - hw->fc.requested_mode = ixgbe_fc_full; - - /* - * Set up the 1G and 10G flow control advertisement registers so the - * HW will be able to do fc autoneg once the cable is plugged in. If - * we link at 10G, the 1G advertisement is harmless and vice versa. - */ - switch (hw->phy.media_type) { - case ixgbe_media_type_fiber: - case ixgbe_media_type_backplane: - reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); - reg_bp = IXGBE_READ_REG(hw, IXGBE_AUTOC); - break; - case ixgbe_media_type_copper: - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®_cu); - break; - default: - break; - } - - /* - * The possible values of fc.requested_mode are: - * 0: Flow control is completely disabled - * 1: Rx flow control is enabled (we can receive pause frames, - * but not send pause frames). - * 2: Tx flow control is enabled (we can send pause frames but - * we do not support receiving pause frames). - * 3: Both Rx and Tx flow control (symmetric) are enabled. - * other: Invalid. - */ - switch (hw->fc.requested_mode) { - case ixgbe_fc_none: - /* Flow control completely disabled by software override. */ - reg &= ~(IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE); - if (hw->phy.media_type == ixgbe_media_type_backplane) - reg_bp &= ~(IXGBE_AUTOC_SYM_PAUSE | - IXGBE_AUTOC_ASM_PAUSE); - else if (hw->phy.media_type == ixgbe_media_type_copper) - reg_cu &= ~(IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE); - break; - case ixgbe_fc_tx_pause: - /* - * Tx Flow control is enabled, and Rx Flow control is - * disabled by software override. - */ - reg |= IXGBE_PCS1GANA_ASM_PAUSE; - reg &= ~IXGBE_PCS1GANA_SYM_PAUSE; - if (hw->phy.media_type == ixgbe_media_type_backplane) { - reg_bp |= IXGBE_AUTOC_ASM_PAUSE; - reg_bp &= ~IXGBE_AUTOC_SYM_PAUSE; - } else if (hw->phy.media_type == ixgbe_media_type_copper) { - reg_cu |= IXGBE_TAF_ASM_PAUSE; - reg_cu &= ~IXGBE_TAF_SYM_PAUSE; - } - break; - case ixgbe_fc_rx_pause: - /* - * Rx Flow control is enabled and Tx Flow control is - * disabled by software override. Since there really - * isn't a way to advertise that we are capable of RX - * Pause ONLY, we will advertise that we support both - * symmetric and asymmetric Rx PAUSE, as such we fall - * through to the fc_full statement. Later, we will - * disable the adapter's ability to send PAUSE frames. - */ - case ixgbe_fc_full: - /* Flow control (both Rx and Tx) is enabled by SW override. */ - reg |= IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE; - if (hw->phy.media_type == ixgbe_media_type_backplane) - reg_bp |= IXGBE_AUTOC_SYM_PAUSE | - IXGBE_AUTOC_ASM_PAUSE; - else if (hw->phy.media_type == ixgbe_media_type_copper) - reg_cu |= IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE; - break; - default: - hw_dbg(hw, "Flow control param set incorrectly\n"); - ret_val = IXGBE_ERR_CONFIG; - goto out; - break; - } - - if (hw->mac.type != ixgbe_mac_X540) { - /* - * Enable auto-negotiation between the MAC & PHY; - * the MAC will advertise clause 37 flow control. - */ - IXGBE_WRITE_REG(hw, IXGBE_PCS1GANA, reg); - reg = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL); - - /* Disable AN timeout */ - if (hw->fc.strict_ieee) - reg &= ~IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN; - - IXGBE_WRITE_REG(hw, IXGBE_PCS1GLCTL, reg); - hw_dbg(hw, "Set up FC; PCS1GLCTL = 0x%08X\n", reg); - } - - /* - * AUTOC restart handles negotiation of 1G and 10G on backplane - * and copper. There is no need to set the PCS1GCTL register. - * - */ - if (hw->phy.media_type == ixgbe_media_type_backplane) { - reg_bp |= IXGBE_AUTOC_AN_RESTART; - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_bp); - } else if ((hw->phy.media_type == ixgbe_media_type_copper) && - (ixgbe_device_supports_autoneg_fc(hw) == 0)) { - hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg_cu); - } - - hw_dbg(hw, "Set up FC; IXGBE_AUTOC = 0x%08X\n", reg); -out: - return ret_val; -} - -/** - * ixgbe_start_hw_generic - Prepare hardware for Tx/Rx - * @hw: pointer to hardware structure - * - * Starts the hardware by filling the bus info structure and media type, clears - * all on chip counters, initializes receive address registers, multicast - * table, VLAN filter table, calls routine to set up link and flow control - * settings, and leaves transmit and receive units disabled and uninitialized - **/ -s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) -{ - s32 ret_val; - u32 ctrl_ext; - - /* Set the media type */ - hw->phy.media_type = hw->mac.ops.get_media_type(hw); - - /* PHY ops initialization must be done in reset_hw() */ - - /* Clear the VLAN filter table */ - hw->mac.ops.clear_vfta(hw); - - /* Clear statistics registers */ - hw->mac.ops.clear_hw_cntrs(hw); - - /* Set No Snoop Disable */ - ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); - ctrl_ext |= IXGBE_CTRL_EXT_NS_DIS; - IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); - IXGBE_WRITE_FLUSH(hw); - - /* Setup flow control */ - ret_val = ixgbe_setup_fc(hw); - if (ret_val != 0) - goto out; - - /* Clear adapter stopped flag */ - hw->adapter_stopped = false; - -out: - return ret_val; -} - -/** - * ixgbe_start_hw_gen2 - Init sequence for common device family - * @hw: pointer to hw structure - * - * Performs the init sequence common to the second generation - * of 10 GbE devices. - * Devices in the second generation: - * 82599 - * X540 - **/ -s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw) -{ - u32 i; - u32 regval; - - /* Clear the rate limiters */ - for (i = 0; i < hw->mac.max_tx_queues; i++) { - IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i); - IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, 0); - } - IXGBE_WRITE_FLUSH(hw); - - /* Disable relaxed ordering */ - for (i = 0; i < hw->mac.max_tx_queues; i++) { - regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); - regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval); - } - - for (i = 0; i < hw->mac.max_rx_queues; i++) { - regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); - regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | - IXGBE_DCA_RXCTRL_HEAD_WRO_EN); - IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); - } - - return 0; -} - -/** - * ixgbe_init_hw_generic - Generic hardware initialization - * @hw: pointer to hardware structure - * - * Initialize the hardware by resetting the hardware, filling the bus info - * structure and media type, clears all on chip counters, initializes receive - * address registers, multicast table, VLAN filter table, calls routine to set - * up link and flow control settings, and leaves transmit and receive units - * disabled and uninitialized - **/ -s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw) -{ - s32 status; - - /* Reset the hardware */ - status = hw->mac.ops.reset_hw(hw); - - if (status == 0) { - /* Start the HW */ - status = hw->mac.ops.start_hw(hw); - } - - return status; -} - -/** - * ixgbe_clear_hw_cntrs_generic - Generic clear hardware counters - * @hw: pointer to hardware structure - * - * Clears all hardware statistics counters by reading them from the hardware - * Statistics counters are clear on read. - **/ -s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw) -{ - u16 i = 0; - - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - IXGBE_READ_REG(hw, IXGBE_ILLERRC); - IXGBE_READ_REG(hw, IXGBE_ERRBC); - IXGBE_READ_REG(hw, IXGBE_MSPDC); - for (i = 0; i < 8; i++) - IXGBE_READ_REG(hw, IXGBE_MPC(i)); - - IXGBE_READ_REG(hw, IXGBE_MLFC); - IXGBE_READ_REG(hw, IXGBE_MRFC); - IXGBE_READ_REG(hw, IXGBE_RLEC); - IXGBE_READ_REG(hw, IXGBE_LXONTXC); - IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); - if (hw->mac.type >= ixgbe_mac_82599EB) { - IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); - IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); - } else { - IXGBE_READ_REG(hw, IXGBE_LXONRXC); - IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); - } - - for (i = 0; i < 8; i++) { - IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); - IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); - if (hw->mac.type >= ixgbe_mac_82599EB) { - IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); - IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i)); - } else { - IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); - IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); - } - } - if (hw->mac.type >= ixgbe_mac_82599EB) - for (i = 0; i < 8; i++) - IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i)); - IXGBE_READ_REG(hw, IXGBE_PRC64); - IXGBE_READ_REG(hw, IXGBE_PRC127); - IXGBE_READ_REG(hw, IXGBE_PRC255); - IXGBE_READ_REG(hw, IXGBE_PRC511); - IXGBE_READ_REG(hw, IXGBE_PRC1023); - IXGBE_READ_REG(hw, IXGBE_PRC1522); - IXGBE_READ_REG(hw, IXGBE_GPRC); - IXGBE_READ_REG(hw, IXGBE_BPRC); - IXGBE_READ_REG(hw, IXGBE_MPRC); - IXGBE_READ_REG(hw, IXGBE_GPTC); - IXGBE_READ_REG(hw, IXGBE_GORCL); - IXGBE_READ_REG(hw, IXGBE_GORCH); - IXGBE_READ_REG(hw, IXGBE_GOTCL); - IXGBE_READ_REG(hw, IXGBE_GOTCH); - if (hw->mac.type == ixgbe_mac_82598EB) - for (i = 0; i < 8; i++) - IXGBE_READ_REG(hw, IXGBE_RNBC(i)); - IXGBE_READ_REG(hw, IXGBE_RUC); - IXGBE_READ_REG(hw, IXGBE_RFC); - IXGBE_READ_REG(hw, IXGBE_ROC); - IXGBE_READ_REG(hw, IXGBE_RJC); - IXGBE_READ_REG(hw, IXGBE_MNGPRC); - IXGBE_READ_REG(hw, IXGBE_MNGPDC); - IXGBE_READ_REG(hw, IXGBE_MNGPTC); - IXGBE_READ_REG(hw, IXGBE_TORL); - IXGBE_READ_REG(hw, IXGBE_TORH); - IXGBE_READ_REG(hw, IXGBE_TPR); - IXGBE_READ_REG(hw, IXGBE_TPT); - IXGBE_READ_REG(hw, IXGBE_PTC64); - IXGBE_READ_REG(hw, IXGBE_PTC127); - IXGBE_READ_REG(hw, IXGBE_PTC255); - IXGBE_READ_REG(hw, IXGBE_PTC511); - IXGBE_READ_REG(hw, IXGBE_PTC1023); - IXGBE_READ_REG(hw, IXGBE_PTC1522); - IXGBE_READ_REG(hw, IXGBE_MPTC); - IXGBE_READ_REG(hw, IXGBE_BPTC); - for (i = 0; i < 16; i++) { - IXGBE_READ_REG(hw, IXGBE_QPRC(i)); - IXGBE_READ_REG(hw, IXGBE_QPTC(i)); - if (hw->mac.type >= ixgbe_mac_82599EB) { - IXGBE_READ_REG(hw, IXGBE_QBRC_L(i)); - IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); - IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); - IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); - IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); - } else { - IXGBE_READ_REG(hw, IXGBE_QBRC(i)); - IXGBE_READ_REG(hw, IXGBE_QBTC(i)); - } - } - - if (hw->mac.type == ixgbe_mac_X540) { - if (hw->phy.id == 0) - ixgbe_identify_phy(hw); - hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECL, - IXGBE_MDIO_PCS_DEV_TYPE, &i); - hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECH, - IXGBE_MDIO_PCS_DEV_TYPE, &i); - hw->phy.ops.read_reg(hw, IXGBE_LDPCECL, - IXGBE_MDIO_PCS_DEV_TYPE, &i); - hw->phy.ops.read_reg(hw, IXGBE_LDPCECH, - IXGBE_MDIO_PCS_DEV_TYPE, &i); - } - - return 0; -} - -/** - * ixgbe_read_pba_string_generic - Reads part number string from EEPROM - * @hw: pointer to hardware structure - * @pba_num: stores the part number string from the EEPROM - * @pba_num_size: part number string buffer length - * - * Reads the part number string from the EEPROM. - **/ -s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, - u32 pba_num_size) -{ - s32 ret_val; - u16 data; - u16 pba_ptr; - u16 offset; - u16 length; - - if (pba_num == NULL) { - hw_dbg(hw, "PBA string buffer was null\n"); - return IXGBE_ERR_INVALID_ARGUMENT; - } - - ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data); - if (ret_val) { - hw_dbg(hw, "NVM Read Error\n"); - return ret_val; - } - - ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM1_PTR, &pba_ptr); - if (ret_val) { - hw_dbg(hw, "NVM Read Error\n"); - return ret_val; - } - - /* - * if data is not ptr guard the PBA must be in legacy format which - * means pba_ptr is actually our second data word for the PBA number - * and we can decode it into an ascii string - */ - if (data != IXGBE_PBANUM_PTR_GUARD) { - hw_dbg(hw, "NVM PBA number is not stored as string\n"); - - /* we will need 11 characters to store the PBA */ - if (pba_num_size < 11) { - hw_dbg(hw, "PBA string buffer too small\n"); - return IXGBE_ERR_NO_SPACE; - } - - /* extract hex string from data and pba_ptr */ - pba_num[0] = (data >> 12) & 0xF; - pba_num[1] = (data >> 8) & 0xF; - pba_num[2] = (data >> 4) & 0xF; - pba_num[3] = data & 0xF; - pba_num[4] = (pba_ptr >> 12) & 0xF; - pba_num[5] = (pba_ptr >> 8) & 0xF; - pba_num[6] = '-'; - pba_num[7] = 0; - pba_num[8] = (pba_ptr >> 4) & 0xF; - pba_num[9] = pba_ptr & 0xF; - - /* put a null character on the end of our string */ - pba_num[10] = '\0'; - - /* switch all the data but the '-' to hex char */ - for (offset = 0; offset < 10; offset++) { - if (pba_num[offset] < 0xA) - pba_num[offset] += '0'; - else if (pba_num[offset] < 0x10) - pba_num[offset] += 'A' - 0xA; - } - - return 0; - } - - ret_val = hw->eeprom.ops.read(hw, pba_ptr, &length); - if (ret_val) { - hw_dbg(hw, "NVM Read Error\n"); - return ret_val; - } - - if (length == 0xFFFF || length == 0) { - hw_dbg(hw, "NVM PBA number section invalid length\n"); - return IXGBE_ERR_PBA_SECTION; - } - - /* check if pba_num buffer is big enough */ - if (pba_num_size < (((u32)length * 2) - 1)) { - hw_dbg(hw, "PBA string buffer too small\n"); - return IXGBE_ERR_NO_SPACE; - } - - /* trim pba length from start of string */ - pba_ptr++; - length--; - - for (offset = 0; offset < length; offset++) { - ret_val = hw->eeprom.ops.read(hw, pba_ptr + offset, &data); - if (ret_val) { - hw_dbg(hw, "NVM Read Error\n"); - return ret_val; - } - pba_num[offset * 2] = (u8)(data >> 8); - pba_num[(offset * 2) + 1] = (u8)(data & 0xFF); - } - pba_num[offset * 2] = '\0'; - - return 0; -} - -/** - * ixgbe_get_mac_addr_generic - Generic get MAC address - * @hw: pointer to hardware structure - * @mac_addr: Adapter MAC address - * - * Reads the adapter's MAC address from first Receive Address Register (RAR0) - * A reset of the adapter must be performed prior to calling this function - * in order for the MAC address to have been loaded from the EEPROM into RAR0 - **/ -s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr) -{ - u32 rar_high; - u32 rar_low; - u16 i; - - rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0)); - rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0)); - - for (i = 0; i < 4; i++) - mac_addr[i] = (u8)(rar_low >> (i*8)); - - for (i = 0; i < 2; i++) - mac_addr[i+4] = (u8)(rar_high >> (i*8)); - - return 0; -} - -/** - * ixgbe_get_bus_info_generic - Generic set PCI bus info - * @hw: pointer to hardware structure - * - * Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure - **/ -s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw) -{ - struct ixgbe_mac_info *mac = &hw->mac; - u16 link_status; - - hw->bus.type = ixgbe_bus_type_pci_express; - - /* Get the negotiated link width and speed from PCI config space */ - link_status = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS); - - switch (link_status & IXGBE_PCI_LINK_WIDTH) { - case IXGBE_PCI_LINK_WIDTH_1: - hw->bus.width = ixgbe_bus_width_pcie_x1; - break; - case IXGBE_PCI_LINK_WIDTH_2: - hw->bus.width = ixgbe_bus_width_pcie_x2; - break; - case IXGBE_PCI_LINK_WIDTH_4: - hw->bus.width = ixgbe_bus_width_pcie_x4; - break; - case IXGBE_PCI_LINK_WIDTH_8: - hw->bus.width = ixgbe_bus_width_pcie_x8; - break; - default: - hw->bus.width = ixgbe_bus_width_unknown; - break; - } - - switch (link_status & IXGBE_PCI_LINK_SPEED) { - case IXGBE_PCI_LINK_SPEED_2500: - hw->bus.speed = ixgbe_bus_speed_2500; - break; - case IXGBE_PCI_LINK_SPEED_5000: - hw->bus.speed = ixgbe_bus_speed_5000; - break; - case IXGBE_PCI_LINK_SPEED_8000: - hw->bus.speed = ixgbe_bus_speed_8000; - break; - default: - hw->bus.speed = ixgbe_bus_speed_unknown; - break; - } - - mac->ops.set_lan_id(hw); - - return 0; -} - -/** - * ixgbe_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices - * @hw: pointer to the HW structure - * - * Determines the LAN function id by reading memory-mapped registers - * and swaps the port value if requested. - **/ -void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw) -{ - struct ixgbe_bus_info *bus = &hw->bus; - u32 reg; - - reg = IXGBE_READ_REG(hw, IXGBE_STATUS); - bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT; - bus->lan_id = bus->func; - - /* check for a port swap */ - reg = IXGBE_READ_REG(hw, IXGBE_FACTPS); - if (reg & IXGBE_FACTPS_LFS) - bus->func ^= 0x1; -} - -/** - * ixgbe_stop_adapter_generic - Generic stop Tx/Rx units - * @hw: pointer to hardware structure - * - * Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts, - * disables transmit and receive units. The adapter_stopped flag is used by - * the shared code and drivers to determine if the adapter is in a stopped - * state and should not touch the hardware. - **/ -s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) -{ - u32 reg_val; - u16 i; - - /* - * Set the adapter_stopped flag so other driver functions stop touching - * the hardware - */ - hw->adapter_stopped = true; - - /* Disable the receive unit */ - IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, 0); - - /* Clear interrupt mask to stop interrupts from being generated */ - IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK); - - /* Clear any pending interrupts, flush previous writes */ - IXGBE_READ_REG(hw, IXGBE_EICR); - - /* Disable the transmit unit. Each queue must be disabled. */ - for (i = 0; i < hw->mac.max_tx_queues; i++) - IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), IXGBE_TXDCTL_SWFLSH); - - /* Disable the receive unit by stopping each queue */ - for (i = 0; i < hw->mac.max_rx_queues; i++) { - reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); - reg_val &= ~IXGBE_RXDCTL_ENABLE; - reg_val |= IXGBE_RXDCTL_SWFLSH; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), reg_val); - } - - /* flush all queues disables */ - IXGBE_WRITE_FLUSH(hw); - msleep(2); - - /* - * Prevent the PCI-E bus from from hanging by disabling PCI-E master - * access and verify no pending requests - */ - return ixgbe_disable_pcie_master(hw); -} - -/** - * ixgbe_led_on_generic - Turns on the software controllable LEDs. - * @hw: pointer to hardware structure - * @index: led number to turn on - **/ -s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index) -{ - u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); - - /* To turn on the LED, set mode to ON. */ - led_reg &= ~IXGBE_LED_MODE_MASK(index); - led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index); - IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); - IXGBE_WRITE_FLUSH(hw); - - return 0; -} - -/** - * ixgbe_led_off_generic - Turns off the software controllable LEDs. - * @hw: pointer to hardware structure - * @index: led number to turn off - **/ -s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index) -{ - u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); - - /* To turn off the LED, set mode to OFF. */ - led_reg &= ~IXGBE_LED_MODE_MASK(index); - led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index); - IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); - IXGBE_WRITE_FLUSH(hw); - - return 0; -} - -/** - * ixgbe_init_eeprom_params_generic - Initialize EEPROM params - * @hw: pointer to hardware structure - * - * Initializes the EEPROM parameters ixgbe_eeprom_info within the - * ixgbe_hw struct in order to set up EEPROM access. - **/ -s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw) -{ - struct ixgbe_eeprom_info *eeprom = &hw->eeprom; - u32 eec; - u16 eeprom_size; - - if (eeprom->type == ixgbe_eeprom_uninitialized) { - eeprom->type = ixgbe_eeprom_none; - /* Set default semaphore delay to 10ms which is a well - * tested value */ - eeprom->semaphore_delay = 10; - /* Clear EEPROM page size, it will be initialized as needed */ - eeprom->word_page_size = 0; - - /* - * Check for EEPROM present first. - * If not present leave as none - */ - eec = IXGBE_READ_REG(hw, IXGBE_EEC); - if (eec & IXGBE_EEC_PRES) { - eeprom->type = ixgbe_eeprom_spi; - - /* - * SPI EEPROM is assumed here. This code would need to - * change if a future EEPROM is not SPI. - */ - eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> - IXGBE_EEC_SIZE_SHIFT); - eeprom->word_size = 1 << (eeprom_size + - IXGBE_EEPROM_WORD_SIZE_SHIFT); - } - - if (eec & IXGBE_EEC_ADDR_SIZE) - eeprom->address_bits = 16; - else - eeprom->address_bits = 8; - hw_dbg(hw, "Eeprom params: type = %d, size = %d, address bits: " - "%d\n", eeprom->type, eeprom->word_size, - eeprom->address_bits); - } - - return 0; -} - -/** - * ixgbe_write_eeprom_buffer_bit_bang_generic - Write EEPROM using bit-bang - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to write - * @words: number of word(s) - * @data: 16 bit word(s) to write to EEPROM - * - * Reads 16 bit word(s) from EEPROM through bit-bang method - **/ -s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data) -{ - s32 status = 0; - u16 i, count; - - hw->eeprom.ops.init_params(hw); - - if (words == 0) { - status = IXGBE_ERR_INVALID_ARGUMENT; - goto out; - } - - if (offset + words > hw->eeprom.word_size) { - status = IXGBE_ERR_EEPROM; - goto out; - } - - /* - * The EEPROM page size cannot be queried from the chip. We do lazy - * initialization. It is worth to do that when we write large buffer. - */ - if ((hw->eeprom.word_page_size == 0) && - (words > IXGBE_EEPROM_PAGE_SIZE_MAX)) - ixgbe_detect_eeprom_page_size_generic(hw, offset); - - /* - * We cannot hold synchronization semaphores for too long - * to avoid other entity starvation. However it is more efficient - * to read in bursts than synchronizing access for each word. - */ - for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) { - count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ? - IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i); - status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset + i, - count, &data[i]); - - if (status != 0) - break; - } - -out: - return status; -} - -/** - * ixgbe_write_eeprom_buffer_bit_bang - Writes 16 bit word(s) to EEPROM - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be written to - * @words: number of word(s) - * @data: 16 bit word(s) to be written to the EEPROM - * - * If ixgbe_eeprom_update_checksum is not called after this function, the - * EEPROM will most likely contain an invalid checksum. - **/ -static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data) -{ - s32 status; - u16 word; - u16 page_size; - u16 i; - u8 write_opcode = IXGBE_EEPROM_WRITE_OPCODE_SPI; - - /* Prepare the EEPROM for writing */ - status = ixgbe_acquire_eeprom(hw); - - if (status == 0) { - if (ixgbe_ready_eeprom(hw) != 0) { - ixgbe_release_eeprom(hw); - status = IXGBE_ERR_EEPROM; - } - } - - if (status == 0) { - for (i = 0; i < words; i++) { - ixgbe_standby_eeprom(hw); - - /* Send the WRITE ENABLE command (8 bit opcode ) */ - ixgbe_shift_out_eeprom_bits(hw, - IXGBE_EEPROM_WREN_OPCODE_SPI, - IXGBE_EEPROM_OPCODE_BITS); - - ixgbe_standby_eeprom(hw); - - /* - * Some SPI eeproms use the 8th address bit embedded - * in the opcode - */ - if ((hw->eeprom.address_bits == 8) && - ((offset + i) >= 128)) - write_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI; - - /* Send the Write command (8-bit opcode + addr) */ - ixgbe_shift_out_eeprom_bits(hw, write_opcode, - IXGBE_EEPROM_OPCODE_BITS); - ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2), - hw->eeprom.address_bits); - - page_size = hw->eeprom.word_page_size; - - /* Send the data in burst via SPI*/ - do { - word = data[i]; - word = (word >> 8) | (word << 8); - ixgbe_shift_out_eeprom_bits(hw, word, 16); - - if (page_size == 0) - break; - - /* do not wrap around page */ - if (((offset + i) & (page_size - 1)) == - (page_size - 1)) - break; - } while (++i < words); - - ixgbe_standby_eeprom(hw); - msleep(10); - } - /* Done with writing - release the EEPROM */ - ixgbe_release_eeprom(hw); - } - - return status; -} - -/** - * ixgbe_write_eeprom_generic - Writes 16 bit value to EEPROM - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be written to - * @data: 16 bit word to be written to the EEPROM - * - * If ixgbe_eeprom_update_checksum is not called after this function, the - * EEPROM will most likely contain an invalid checksum. - **/ -s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data) -{ - s32 status; - - hw->eeprom.ops.init_params(hw); - - if (offset >= hw->eeprom.word_size) { - status = IXGBE_ERR_EEPROM; - goto out; - } - - status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data); - -out: - return status; -} - -/** - * ixgbe_read_eeprom_buffer_bit_bang_generic - Read EEPROM using bit-bang - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be read - * @data: read 16 bit words(s) from EEPROM - * @words: number of word(s) - * - * Reads 16 bit word(s) from EEPROM through bit-bang method - **/ -s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data) -{ - s32 status = 0; - u16 i, count; - - hw->eeprom.ops.init_params(hw); - - if (words == 0) { - status = IXGBE_ERR_INVALID_ARGUMENT; - goto out; - } - - if (offset + words > hw->eeprom.word_size) { - status = IXGBE_ERR_EEPROM; - goto out; - } - - /* - * We cannot hold synchronization semaphores for too long - * to avoid other entity starvation. However it is more efficient - * to read in bursts than synchronizing access for each word. - */ - for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) { - count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ? - IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i); - - status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset + i, - count, &data[i]); - - if (status != 0) - break; - } - -out: - return status; -} - -/** - * ixgbe_read_eeprom_buffer_bit_bang - Read EEPROM using bit-bang - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be read - * @words: number of word(s) - * @data: read 16 bit word(s) from EEPROM - * - * Reads 16 bit word(s) from EEPROM through bit-bang method - **/ -static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data) -{ - s32 status; - u16 word_in; - u8 read_opcode = IXGBE_EEPROM_READ_OPCODE_SPI; - u16 i; - - /* Prepare the EEPROM for reading */ - status = ixgbe_acquire_eeprom(hw); - - if (status == 0) { - if (ixgbe_ready_eeprom(hw) != 0) { - ixgbe_release_eeprom(hw); - status = IXGBE_ERR_EEPROM; - } - } - - if (status == 0) { - for (i = 0; i < words; i++) { - ixgbe_standby_eeprom(hw); - /* - * Some SPI eeproms use the 8th address bit embedded - * in the opcode - */ - if ((hw->eeprom.address_bits == 8) && - ((offset + i) >= 128)) - read_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI; - - /* Send the READ command (opcode + addr) */ - ixgbe_shift_out_eeprom_bits(hw, read_opcode, - IXGBE_EEPROM_OPCODE_BITS); - ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2), - hw->eeprom.address_bits); - - /* Read the data. */ - word_in = ixgbe_shift_in_eeprom_bits(hw, 16); - data[i] = (word_in >> 8) | (word_in << 8); - } - - /* End this read operation */ - ixgbe_release_eeprom(hw); - } - - return status; -} - -/** - * ixgbe_read_eeprom_bit_bang_generic - Read EEPROM word using bit-bang - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be read - * @data: read 16 bit value from EEPROM - * - * Reads 16 bit value from EEPROM through bit-bang method - **/ -s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, - u16 *data) -{ - s32 status; - - hw->eeprom.ops.init_params(hw); - - if (offset >= hw->eeprom.word_size) { - status = IXGBE_ERR_EEPROM; - goto out; - } - - status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data); - -out: - return status; -} - -/** - * ixgbe_read_eerd_buffer_generic - Read EEPROM word(s) using EERD - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to read - * @words: number of word(s) - * @data: 16 bit word(s) from the EEPROM - * - * Reads a 16 bit word(s) from the EEPROM using the EERD register. - **/ -s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data) -{ - u32 eerd; - s32 status = 0; - u32 i; - - hw->eeprom.ops.init_params(hw); - - if (words == 0) { - status = IXGBE_ERR_INVALID_ARGUMENT; - goto out; - } - - if (offset >= hw->eeprom.word_size) { - status = IXGBE_ERR_EEPROM; - goto out; - } - - for (i = 0; i < words; i++) { - eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) + - IXGBE_EEPROM_RW_REG_START; - - IXGBE_WRITE_REG(hw, IXGBE_EERD, eerd); - status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_READ); - - if (status == 0) { - data[i] = (IXGBE_READ_REG(hw, IXGBE_EERD) >> - IXGBE_EEPROM_RW_REG_DATA); - } else { - hw_dbg(hw, "Eeprom read timed out\n"); - goto out; - } - } -out: - return status; -} - -/** - * ixgbe_detect_eeprom_page_size_generic - Detect EEPROM page size - * @hw: pointer to hardware structure - * @offset: offset within the EEPROM to be used as a scratch pad - * - * Discover EEPROM page size by writing marching data at given offset. - * This function is called only when we are writing a new large buffer - * at given offset so the data would be overwritten anyway. - **/ -static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw, - u16 offset) -{ - u16 data[IXGBE_EEPROM_PAGE_SIZE_MAX]; - s32 status = 0; - u16 i; - - for (i = 0; i < IXGBE_EEPROM_PAGE_SIZE_MAX; i++) - data[i] = i; - - hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX; - status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset, - IXGBE_EEPROM_PAGE_SIZE_MAX, data); - hw->eeprom.word_page_size = 0; - if (status != 0) - goto out; - - status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data); - if (status != 0) - goto out; - - /* - * When writing in burst more than the actual page size - * EEPROM address wraps around current page. - */ - hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX - data[0]; - - hw_dbg(hw, "Detected EEPROM page size = %d words.", - hw->eeprom.word_page_size); -out: - return status; -} - -/** - * ixgbe_read_eerd_generic - Read EEPROM word using EERD - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to read - * @data: word read from the EEPROM - * - * Reads a 16 bit word from the EEPROM using the EERD register. - **/ -s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data) -{ - return ixgbe_read_eerd_buffer_generic(hw, offset, 1, data); -} - -/** - * ixgbe_write_eewr_buffer_generic - Write EEPROM word(s) using EEWR - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to write - * @words: number of word(s) - * @data: word(s) write to the EEPROM - * - * Write a 16 bit word(s) to the EEPROM using the EEWR register. - **/ -s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data) -{ - u32 eewr; - s32 status = 0; - u16 i; - - hw->eeprom.ops.init_params(hw); - - if (words == 0) { - status = IXGBE_ERR_INVALID_ARGUMENT; - goto out; - } - - if (offset >= hw->eeprom.word_size) { - status = IXGBE_ERR_EEPROM; - goto out; - } - - for (i = 0; i < words; i++) { - eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | - (data[i] << IXGBE_EEPROM_RW_REG_DATA) | - IXGBE_EEPROM_RW_REG_START; - - status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE); - if (status != 0) { - hw_dbg(hw, "Eeprom write EEWR timed out\n"); - goto out; - } - - IXGBE_WRITE_REG(hw, IXGBE_EEWR, eewr); - - status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE); - if (status != 0) { - hw_dbg(hw, "Eeprom write EEWR timed out\n"); - goto out; - } - } - -out: - return status; -} - -/** - * ixgbe_write_eewr_generic - Write EEPROM word using EEWR - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to write - * @data: word write to the EEPROM - * - * Write a 16 bit word to the EEPROM using the EEWR register. - **/ -s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data) -{ - return ixgbe_write_eewr_buffer_generic(hw, offset, 1, &data); -} - -/** - * ixgbe_poll_eerd_eewr_done - Poll EERD read or EEWR write status - * @hw: pointer to hardware structure - * @ee_reg: EEPROM flag for polling - * - * Polls the status bit (bit 1) of the EERD or EEWR to determine when the - * read or write is done respectively. - **/ -s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg) -{ - u32 i; - u32 reg; - s32 status = IXGBE_ERR_EEPROM; - - for (i = 0; i < IXGBE_EERD_EEWR_ATTEMPTS; i++) { - if (ee_reg == IXGBE_NVM_POLL_READ) - reg = IXGBE_READ_REG(hw, IXGBE_EERD); - else - reg = IXGBE_READ_REG(hw, IXGBE_EEWR); - - if (reg & IXGBE_EEPROM_RW_REG_DONE) { - status = 0; - break; - } - udelay(5); - } - return status; -} - -/** - * ixgbe_acquire_eeprom - Acquire EEPROM using bit-bang - * @hw: pointer to hardware structure - * - * Prepares EEPROM for access using bit-bang method. This function should - * be called before issuing a command to the EEPROM. - **/ -static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw) -{ - s32 status = 0; - u32 eec; - u32 i; - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) - != 0) - status = IXGBE_ERR_SWFW_SYNC; - - if (status == 0) { - eec = IXGBE_READ_REG(hw, IXGBE_EEC); - - /* Request EEPROM Access */ - eec |= IXGBE_EEC_REQ; - IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); - - for (i = 0; i < IXGBE_EEPROM_GRANT_ATTEMPTS; i++) { - eec = IXGBE_READ_REG(hw, IXGBE_EEC); - if (eec & IXGBE_EEC_GNT) - break; - udelay(5); - } - - /* Release if grant not acquired */ - if (!(eec & IXGBE_EEC_GNT)) { - eec &= ~IXGBE_EEC_REQ; - IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); - hw_dbg(hw, "Could not acquire EEPROM grant\n"); - - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - status = IXGBE_ERR_EEPROM; - } - - /* Setup EEPROM for Read/Write */ - if (status == 0) { - /* Clear CS and SK */ - eec &= ~(IXGBE_EEC_CS | IXGBE_EEC_SK); - IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); - IXGBE_WRITE_FLUSH(hw); - udelay(1); - } - } - return status; -} - -/** - * ixgbe_get_eeprom_semaphore - Get hardware semaphore - * @hw: pointer to hardware structure - * - * Sets the hardware semaphores so EEPROM access can occur for bit-bang method - **/ -static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) -{ - s32 status = IXGBE_ERR_EEPROM; - u32 timeout = 2000; - u32 i; - u32 swsm; - - /* Get SMBI software semaphore between device drivers first */ - for (i = 0; i < timeout; i++) { - /* - * If the SMBI bit is 0 when we read it, then the bit will be - * set and we have the semaphore - */ - swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); - if (!(swsm & IXGBE_SWSM_SMBI)) { - status = 0; - break; - } - udelay(50); - } - - if (i == timeout) { - hw_dbg(hw, "Driver can't access the Eeprom - SMBI Semaphore " - "not granted.\n"); - /* - * this release is particularly important because our attempts - * above to get the semaphore may have succeeded, and if there - * was a timeout, we should unconditionally clear the semaphore - * bits to free the driver to make progress - */ - ixgbe_release_eeprom_semaphore(hw); - - udelay(50); - /* - * one last try - * If the SMBI bit is 0 when we read it, then the bit will be - * set and we have the semaphore - */ - swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); - if (!(swsm & IXGBE_SWSM_SMBI)) - status = 0; - } - - /* Now get the semaphore between SW/FW through the SWESMBI bit */ - if (status == 0) { - for (i = 0; i < timeout; i++) { - swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); - - /* Set the SW EEPROM semaphore bit to request access */ - swsm |= IXGBE_SWSM_SWESMBI; - IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); - - /* - * If we set the bit successfully then we got the - * semaphore. - */ - swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); - if (swsm & IXGBE_SWSM_SWESMBI) - break; - - udelay(50); - } - - /* - * Release semaphores and return error if SW EEPROM semaphore - * was not granted because we don't have access to the EEPROM - */ - if (i >= timeout) { - hw_dbg(hw, "SWESMBI Software EEPROM semaphore " - "not granted.\n"); - ixgbe_release_eeprom_semaphore(hw); - status = IXGBE_ERR_EEPROM; - } - } else { - hw_dbg(hw, "Software semaphore SMBI between device drivers " - "not granted.\n"); - } - - return status; -} - -/** - * ixgbe_release_eeprom_semaphore - Release hardware semaphore - * @hw: pointer to hardware structure - * - * This function clears hardware semaphore bits. - **/ -static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw) -{ - u32 swsm; - - swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); - - /* Release both semaphores by writing 0 to the bits SWESMBI and SMBI */ - swsm &= ~(IXGBE_SWSM_SWESMBI | IXGBE_SWSM_SMBI); - IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); - IXGBE_WRITE_FLUSH(hw); -} - -/** - * ixgbe_ready_eeprom - Polls for EEPROM ready - * @hw: pointer to hardware structure - **/ -static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw) -{ - s32 status = 0; - u16 i; - u8 spi_stat_reg; - - /* - * Read "Status Register" repeatedly until the LSB is cleared. The - * EEPROM will signal that the command has been completed by clearing - * bit 0 of the internal status register. If it's not cleared within - * 5 milliseconds, then error out. - */ - for (i = 0; i < IXGBE_EEPROM_MAX_RETRY_SPI; i += 5) { - ixgbe_shift_out_eeprom_bits(hw, IXGBE_EEPROM_RDSR_OPCODE_SPI, - IXGBE_EEPROM_OPCODE_BITS); - spi_stat_reg = (u8)ixgbe_shift_in_eeprom_bits(hw, 8); - if (!(spi_stat_reg & IXGBE_EEPROM_STATUS_RDY_SPI)) - break; - - udelay(5); - ixgbe_standby_eeprom(hw); - }; - - /* - * On some parts, SPI write time could vary from 0-20mSec on 3.3V - * devices (and only 0-5mSec on 5V devices) - */ - if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) { - hw_dbg(hw, "SPI EEPROM Status error\n"); - status = IXGBE_ERR_EEPROM; - } - - return status; -} - -/** - * ixgbe_standby_eeprom - Returns EEPROM to a "standby" state - * @hw: pointer to hardware structure - **/ -static void ixgbe_standby_eeprom(struct ixgbe_hw *hw) -{ - u32 eec; - - eec = IXGBE_READ_REG(hw, IXGBE_EEC); - - /* Toggle CS to flush commands */ - eec |= IXGBE_EEC_CS; - IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); - IXGBE_WRITE_FLUSH(hw); - udelay(1); - eec &= ~IXGBE_EEC_CS; - IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); - IXGBE_WRITE_FLUSH(hw); - udelay(1); -} - -/** - * ixgbe_shift_out_eeprom_bits - Shift data bits out to the EEPROM. - * @hw: pointer to hardware structure - * @data: data to send to the EEPROM - * @count: number of bits to shift out - **/ -static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data, - u16 count) -{ - u32 eec; - u32 mask; - u32 i; - - eec = IXGBE_READ_REG(hw, IXGBE_EEC); - - /* - * Mask is used to shift "count" bits of "data" out to the EEPROM - * one bit at a time. Determine the starting bit based on count - */ - mask = 0x01 << (count - 1); - - for (i = 0; i < count; i++) { - /* - * A "1" is shifted out to the EEPROM by setting bit "DI" to a - * "1", and then raising and then lowering the clock (the SK - * bit controls the clock input to the EEPROM). A "0" is - * shifted out to the EEPROM by setting "DI" to "0" and then - * raising and then lowering the clock. - */ - if (data & mask) - eec |= IXGBE_EEC_DI; - else - eec &= ~IXGBE_EEC_DI; - - IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); - IXGBE_WRITE_FLUSH(hw); - - udelay(1); - - ixgbe_raise_eeprom_clk(hw, &eec); - ixgbe_lower_eeprom_clk(hw, &eec); - - /* - * Shift mask to signify next bit of data to shift in to the - * EEPROM - */ - mask = mask >> 1; - }; - - /* We leave the "DI" bit set to "0" when we leave this routine. */ - eec &= ~IXGBE_EEC_DI; - IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); - IXGBE_WRITE_FLUSH(hw); -} - -/** - * ixgbe_shift_in_eeprom_bits - Shift data bits in from the EEPROM - * @hw: pointer to hardware structure - **/ -static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count) -{ - u32 eec; - u32 i; - u16 data = 0; - - /* - * In order to read a register from the EEPROM, we need to shift - * 'count' bits in from the EEPROM. Bits are "shifted in" by raising - * the clock input to the EEPROM (setting the SK bit), and then reading - * the value of the "DO" bit. During this "shifting in" process the - * "DI" bit should always be clear. - */ - eec = IXGBE_READ_REG(hw, IXGBE_EEC); - - eec &= ~(IXGBE_EEC_DO | IXGBE_EEC_DI); - - for (i = 0; i < count; i++) { - data = data << 1; - ixgbe_raise_eeprom_clk(hw, &eec); - - eec = IXGBE_READ_REG(hw, IXGBE_EEC); - - eec &= ~(IXGBE_EEC_DI); - if (eec & IXGBE_EEC_DO) - data |= 1; - - ixgbe_lower_eeprom_clk(hw, &eec); - } - - return data; -} - -/** - * ixgbe_raise_eeprom_clk - Raises the EEPROM's clock input. - * @hw: pointer to hardware structure - * @eec: EEC register's current value - **/ -static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec) -{ - /* - * Raise the clock input to the EEPROM - * (setting the SK bit), then delay - */ - *eec = *eec | IXGBE_EEC_SK; - IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec); - IXGBE_WRITE_FLUSH(hw); - udelay(1); -} - -/** - * ixgbe_lower_eeprom_clk - Lowers the EEPROM's clock input. - * @hw: pointer to hardware structure - * @eecd: EECD's current value - **/ -static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec) -{ - /* - * Lower the clock input to the EEPROM (clearing the SK bit), then - * delay - */ - *eec = *eec & ~IXGBE_EEC_SK; - IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec); - IXGBE_WRITE_FLUSH(hw); - udelay(1); -} - -/** - * ixgbe_release_eeprom - Release EEPROM, release semaphores - * @hw: pointer to hardware structure - **/ -static void ixgbe_release_eeprom(struct ixgbe_hw *hw) -{ - u32 eec; - - eec = IXGBE_READ_REG(hw, IXGBE_EEC); - - eec |= IXGBE_EEC_CS; /* Pull CS high */ - eec &= ~IXGBE_EEC_SK; /* Lower SCK */ - - IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); - IXGBE_WRITE_FLUSH(hw); - - udelay(1); - - /* Stop requesting EEPROM access */ - eec &= ~IXGBE_EEC_REQ; - IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); - - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - - /* Delay before attempt to obtain semaphore again to allow FW access */ - msleep(hw->eeprom.semaphore_delay); -} - -/** - * ixgbe_calc_eeprom_checksum_generic - Calculates and returns the checksum - * @hw: pointer to hardware structure - **/ -u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) -{ - u16 i; - u16 j; - u16 checksum = 0; - u16 length = 0; - u16 pointer = 0; - u16 word = 0; - - /* Include 0x0-0x3F in the checksum */ - for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) { - if (hw->eeprom.ops.read(hw, i, &word) != 0) { - hw_dbg(hw, "EEPROM read failed\n"); - break; - } - checksum += word; - } - - /* Include all data from pointers except for the fw pointer */ - for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) { - hw->eeprom.ops.read(hw, i, &pointer); - - /* Make sure the pointer seems valid */ - if (pointer != 0xFFFF && pointer != 0) { - hw->eeprom.ops.read(hw, pointer, &length); - - if (length != 0xFFFF && length != 0) { - for (j = pointer+1; j <= pointer+length; j++) { - hw->eeprom.ops.read(hw, j, &word); - checksum += word; - } - } - } - } - - checksum = (u16)IXGBE_EEPROM_SUM - checksum; - - return checksum; -} - -/** - * ixgbe_validate_eeprom_checksum_generic - Validate EEPROM checksum - * @hw: pointer to hardware structure - * @checksum_val: calculated checksum - * - * Performs checksum calculation and validates the EEPROM checksum. If the - * caller does not need checksum_val, the value can be NULL. - **/ -s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw, - u16 *checksum_val) -{ - s32 status; - u16 checksum; - u16 read_checksum = 0; - - /* - * Read the first word from the EEPROM. If this times out or fails, do - * not continue or we could be in for a very long wait while every - * EEPROM read fails - */ - status = hw->eeprom.ops.read(hw, 0, &checksum); - - if (status == 0) { - checksum = hw->eeprom.ops.calc_checksum(hw); - - hw->eeprom.ops.read(hw, IXGBE_EEPROM_CHECKSUM, &read_checksum); - - /* - * Verify read checksum from EEPROM is the same as - * calculated checksum - */ - if (read_checksum != checksum) - status = IXGBE_ERR_EEPROM_CHECKSUM; - - /* If the user cares, return the calculated checksum */ - if (checksum_val) - *checksum_val = checksum; - } else { - hw_dbg(hw, "EEPROM read failed\n"); - } - - return status; -} - -/** - * ixgbe_update_eeprom_checksum_generic - Updates the EEPROM checksum - * @hw: pointer to hardware structure - **/ -s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw) -{ - s32 status; - u16 checksum; - - /* - * Read the first word from the EEPROM. If this times out or fails, do - * not continue or we could be in for a very long wait while every - * EEPROM read fails - */ - status = hw->eeprom.ops.read(hw, 0, &checksum); - - if (status == 0) { - checksum = hw->eeprom.ops.calc_checksum(hw); - status = hw->eeprom.ops.write(hw, IXGBE_EEPROM_CHECKSUM, - checksum); - } else { - hw_dbg(hw, "EEPROM read failed\n"); - } - - return status; -} - -/** - * ixgbe_validate_mac_addr - Validate MAC address - * @mac_addr: pointer to MAC address. - * - * Tests a MAC address to ensure it is a valid Individual Address - **/ -s32 ixgbe_validate_mac_addr(u8 *mac_addr) -{ - s32 status = 0; - - /* Make sure it is not a multicast address */ - if (IXGBE_IS_MULTICAST(mac_addr)) { - hw_dbg(hw, "MAC address is multicast\n"); - status = IXGBE_ERR_INVALID_MAC_ADDR; - /* Not a broadcast address */ - } else if (IXGBE_IS_BROADCAST(mac_addr)) { - hw_dbg(hw, "MAC address is broadcast\n"); - status = IXGBE_ERR_INVALID_MAC_ADDR; - /* Reject the zero address */ - } else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 && - mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) { - hw_dbg(hw, "MAC address is all zeros\n"); - status = IXGBE_ERR_INVALID_MAC_ADDR; - } - return status; -} - -/** - * ixgbe_set_rar_generic - Set Rx address register - * @hw: pointer to hardware structure - * @index: Receive address register to write - * @addr: Address to put into receive address register - * @vmdq: VMDq "set" or "pool" index - * @enable_addr: set flag that address is active - * - * Puts an ethernet address into a receive address register. - **/ -s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, - u32 enable_addr) -{ - u32 rar_low, rar_high; - u32 rar_entries = hw->mac.num_rar_entries; - - /* Make sure we are using a valid rar index range */ - if (index >= rar_entries) { - hw_dbg(hw, "RAR index %d is out of range.\n", index); - return IXGBE_ERR_INVALID_ARGUMENT; - } - - /* setup VMDq pool selection before this RAR gets enabled */ - hw->mac.ops.set_vmdq(hw, index, vmdq); - - /* - * HW expects these in little endian so we reverse the byte - * order from network order (big endian) to little endian - */ - rar_low = ((u32)addr[0] | - ((u32)addr[1] << 8) | - ((u32)addr[2] << 16) | - ((u32)addr[3] << 24)); - /* - * Some parts put the VMDq setting in the extra RAH bits, - * so save everything except the lower 16 bits that hold part - * of the address and the address valid bit. - */ - rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index)); - rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV); - rar_high |= ((u32)addr[4] | ((u32)addr[5] << 8)); - - if (enable_addr != 0) - rar_high |= IXGBE_RAH_AV; - - IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low); - IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); - - return 0; -} - -/** - * ixgbe_clear_rar_generic - Remove Rx address register - * @hw: pointer to hardware structure - * @index: Receive address register to write - * - * Clears an ethernet address from a receive address register. - **/ -s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) -{ - u32 rar_high; - u32 rar_entries = hw->mac.num_rar_entries; - - /* Make sure we are using a valid rar index range */ - if (index >= rar_entries) { - hw_dbg(hw, "RAR index %d is out of range.\n", index); - return IXGBE_ERR_INVALID_ARGUMENT; - } - - /* - * Some parts put the VMDq setting in the extra RAH bits, - * so save everything except the lower 16 bits that hold part - * of the address and the address valid bit. - */ - rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index)); - rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV); - - IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0); - IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); - - /* clear VMDq pool/queue selection for this RAR */ - hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL); - - return 0; -} - -/** - * ixgbe_init_rx_addrs_generic - Initializes receive address filters. - * @hw: pointer to hardware structure - * - * Places the MAC address in receive address register 0 and clears the rest - * of the receive address registers. Clears the multicast table. Assumes - * the receiver is in reset when the routine is called. - **/ -s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw) -{ - u32 i; - u32 rar_entries = hw->mac.num_rar_entries; - - /* - * If the current mac address is valid, assume it is a software override - * to the permanent address. - * Otherwise, use the permanent address from the eeprom. - */ - if (ixgbe_validate_mac_addr(hw->mac.addr) == - IXGBE_ERR_INVALID_MAC_ADDR) { - /* Get the MAC address from the RAR0 for later reference */ - hw->mac.ops.get_mac_addr(hw, hw->mac.addr); - - hw_dbg(hw, " Keeping Current RAR0 Addr =%.2X %.2X %.2X ", - hw->mac.addr[0], hw->mac.addr[1], - hw->mac.addr[2]); - hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3], - hw->mac.addr[4], hw->mac.addr[5]); - } else { - /* Setup the receive address. */ - hw_dbg(hw, "Overriding MAC Address in RAR[0]\n"); - hw_dbg(hw, " New MAC Addr =%.2X %.2X %.2X ", - hw->mac.addr[0], hw->mac.addr[1], - hw->mac.addr[2]); - hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3], - hw->mac.addr[4], hw->mac.addr[5]); - - hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); - - /* clear VMDq pool/queue selection for RAR 0 */ - hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL); - } - hw->addr_ctrl.overflow_promisc = 0; - - hw->addr_ctrl.rar_used_count = 1; - - /* Zero out the other receive addresses. */ - hw_dbg(hw, "Clearing RAR[1-%d]\n", rar_entries - 1); - for (i = 1; i < rar_entries; i++) { - IXGBE_WRITE_REG(hw, IXGBE_RAL(i), 0); - IXGBE_WRITE_REG(hw, IXGBE_RAH(i), 0); - } - - /* Clear the MTA */ - hw->addr_ctrl.mta_in_use = 0; - IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type); - - hw_dbg(hw, " Clearing MTA\n"); - for (i = 0; i < hw->mac.mcft_size; i++) - IXGBE_WRITE_REG(hw, IXGBE_MTA(i), 0); - - ixgbe_init_uta_tables(hw); - - return 0; -} - -/** - * ixgbe_add_uc_addr - Adds a secondary unicast address. - * @hw: pointer to hardware structure - * @addr: new address - * - * Adds it to unused receive address register or goes into promiscuous mode. - **/ -void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) -{ - u32 rar_entries = hw->mac.num_rar_entries; - u32 rar; - - hw_dbg(hw, " UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); - - /* - * Place this address in the RAR if there is room, - * else put the controller into promiscuous mode - */ - if (hw->addr_ctrl.rar_used_count < rar_entries) { - rar = hw->addr_ctrl.rar_used_count; - hw->mac.ops.set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); - hw_dbg(hw, "Added a secondary address to RAR[%d]\n", rar); - hw->addr_ctrl.rar_used_count++; - } else { - hw->addr_ctrl.overflow_promisc++; - } - - hw_dbg(hw, "ixgbe_add_uc_addr Complete\n"); -} - -/** - * ixgbe_update_uc_addr_list_generic - Updates MAC list of secondary addresses - * @hw: pointer to hardware structure - * @addr_list: the list of new addresses - * @addr_count: number of addresses - * @next: iterator function to walk the address list - * - * The given list replaces any existing list. Clears the secondary addrs from - * receive address registers. Uses unused receive address registers for the - * first secondary addresses, and falls back to promiscuous mode as needed. - * - * Drivers using secondary unicast addresses must set user_set_promisc when - * manually putting the device into promiscuous mode. - **/ -s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, - u32 addr_count, ixgbe_mc_addr_itr next) -{ - u8 *addr; - u32 i; - u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc; - u32 uc_addr_in_use; - u32 fctrl; - u32 vmdq; - - /* - * Clear accounting of old secondary address list, - * don't count RAR[0] - */ - uc_addr_in_use = hw->addr_ctrl.rar_used_count - 1; - hw->addr_ctrl.rar_used_count -= uc_addr_in_use; - hw->addr_ctrl.overflow_promisc = 0; - - /* Zero out the other receive addresses */ - hw_dbg(hw, "Clearing RAR[1-%d]\n", uc_addr_in_use+1); - for (i = 0; i < uc_addr_in_use; i++) { - IXGBE_WRITE_REG(hw, IXGBE_RAL(1+i), 0); - IXGBE_WRITE_REG(hw, IXGBE_RAH(1+i), 0); - } - - /* Add the new addresses */ - for (i = 0; i < addr_count; i++) { - hw_dbg(hw, " Adding the secondary addresses:\n"); - addr = next(hw, &addr_list, &vmdq); - ixgbe_add_uc_addr(hw, addr, vmdq); - } - - if (hw->addr_ctrl.overflow_promisc) { - /* enable promisc if not already in overflow or set by user */ - if (!old_promisc_setting && !hw->addr_ctrl.user_set_promisc) { - hw_dbg(hw, " Entering address overflow promisc mode\n"); - fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); - fctrl |= IXGBE_FCTRL_UPE; - IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - } - } else { - /* only disable if set by overflow, not by user */ - if (old_promisc_setting && !hw->addr_ctrl.user_set_promisc) { - hw_dbg(hw, " Leaving address overflow promisc mode\n"); - fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); - fctrl &= ~IXGBE_FCTRL_UPE; - IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - } - } - - hw_dbg(hw, "ixgbe_update_uc_addr_list_generic Complete\n"); - return 0; -} - -/** - * ixgbe_mta_vector - Determines bit-vector in multicast table to set - * @hw: pointer to hardware structure - * @mc_addr: the multicast address - * - * Extracts the 12 bits, from a multicast address, to determine which - * bit-vector to set in the multicast table. The hardware uses 12 bits, from - * incoming rx multicast addresses, to determine the bit-vector to check in - * the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set - * by the MO field of the MCSTCTRL. The MO field is set during initialization - * to mc_filter_type. - **/ -static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr) -{ - u32 vector = 0; - - switch (hw->mac.mc_filter_type) { - case 0: /* use bits [47:36] of the address */ - vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4)); - break; - case 1: /* use bits [46:35] of the address */ - vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5)); - break; - case 2: /* use bits [45:34] of the address */ - vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6)); - break; - case 3: /* use bits [43:32] of the address */ - vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8)); - break; - default: /* Invalid mc_filter_type */ - hw_dbg(hw, "MC filter type param set incorrectly\n"); - break; - } - - /* vector can only be 12-bits or boundary will be exceeded */ - vector &= 0xFFF; - return vector; -} - -/** - * ixgbe_set_mta - Set bit-vector in multicast table - * @hw: pointer to hardware structure - * @hash_value: Multicast address hash value - * - * Sets the bit-vector in the multicast table. - **/ -void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr) -{ - u32 vector; - u32 vector_bit; - u32 vector_reg; - - hw->addr_ctrl.mta_in_use++; - - vector = ixgbe_mta_vector(hw, mc_addr); - hw_dbg(hw, " bit-vector = 0x%03X\n", vector); - - /* - * The MTA is a register array of 128 32-bit registers. It is treated - * like an array of 4096 bits. We want to set bit - * BitArray[vector_value]. So we figure out what register the bit is - * in, read it, OR in the new bit, then write back the new value. The - * register is determined by the upper 7 bits of the vector value and - * the bit within that register are determined by the lower 5 bits of - * the value. - */ - vector_reg = (vector >> 5) & 0x7F; - vector_bit = vector & 0x1F; - hw->mac.mta_shadow[vector_reg] |= (1 << vector_bit); -} - -/** - * ixgbe_update_mc_addr_list_generic - Updates MAC list of multicast addresses - * @hw: pointer to hardware structure - * @mc_addr_list: the list of new multicast addresses - * @mc_addr_count: number of addresses - * @next: iterator function to walk the multicast address list - * @clear: flag, when set clears the table beforehand - * - * When the clear flag is set, the given list replaces any existing list. - * Hashes the given addresses into the multicast table. - **/ -s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, - u32 mc_addr_count, ixgbe_mc_addr_itr next, - bool clear) -{ - u32 i; - u32 vmdq; - - /* - * Set the new number of MC addresses that we are being requested to - * use. - */ - hw->addr_ctrl.num_mc_addrs = mc_addr_count; - hw->addr_ctrl.mta_in_use = 0; - - /* Clear mta_shadow */ - if (clear) { - hw_dbg(hw, " Clearing MTA\n"); - memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); - } - - /* Update mta_shadow */ - for (i = 0; i < mc_addr_count; i++) { - hw_dbg(hw, " Adding the multicast addresses:\n"); - ixgbe_set_mta(hw, next(hw, &mc_addr_list, &vmdq)); - } - - /* Enable mta */ - for (i = 0; i < hw->mac.mcft_size; i++) - IXGBE_WRITE_REG_ARRAY(hw, IXGBE_MTA(0), i, - hw->mac.mta_shadow[i]); - - if (hw->addr_ctrl.mta_in_use > 0) - IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, - IXGBE_MCSTCTRL_MFE | hw->mac.mc_filter_type); - - hw_dbg(hw, "ixgbe_update_mc_addr_list_generic Complete\n"); - return 0; -} - -/** - * ixgbe_enable_mc_generic - Enable multicast address in RAR - * @hw: pointer to hardware structure - * - * Enables multicast address in RAR and the use of the multicast hash table. - **/ -s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw) -{ - struct ixgbe_addr_filter_info *a = &hw->addr_ctrl; - - if (a->mta_in_use > 0) - IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, IXGBE_MCSTCTRL_MFE | - hw->mac.mc_filter_type); - - return 0; -} - -/** - * ixgbe_disable_mc_generic - Disable multicast address in RAR - * @hw: pointer to hardware structure - * - * Disables multicast address in RAR and the use of the multicast hash table. - **/ -s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw) -{ - struct ixgbe_addr_filter_info *a = &hw->addr_ctrl; - - if (a->mta_in_use > 0) - IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type); - - return 0; -} - -/** - * ixgbe_fc_enable_generic - Enable flow control - * @hw: pointer to hardware structure - * - * Enable flow control according to the current settings. - **/ -s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) -{ - s32 ret_val = 0; - u32 mflcn_reg, fccfg_reg; - u32 reg; - u32 fcrtl, fcrth; - int i; - - /* Validate the water mark configuration */ - if (!hw->fc.pause_time) { - ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; - goto out; - } - - /* Low water mark of zero causes XOFF floods */ - for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { - if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && - hw->fc.high_water[i]) { - if (!hw->fc.low_water[i] || - hw->fc.low_water[i] >= hw->fc.high_water[i]) { - hw_dbg(hw, "Invalid water mark configuration\n"); - ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; - goto out; - } - } - } - - /* Negotiate the fc mode to use */ - ixgbe_fc_autoneg(hw); - - /* Disable any previous flow control settings */ - mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN); - mflcn_reg &= ~(IXGBE_MFLCN_RPFCE_MASK | IXGBE_MFLCN_RFCE); - - fccfg_reg = IXGBE_READ_REG(hw, IXGBE_FCCFG); - fccfg_reg &= ~(IXGBE_FCCFG_TFCE_802_3X | IXGBE_FCCFG_TFCE_PRIORITY); - - /* - * The possible values of fc.current_mode are: - * 0: Flow control is completely disabled - * 1: Rx flow control is enabled (we can receive pause frames, - * but not send pause frames). - * 2: Tx flow control is enabled (we can send pause frames but - * we do not support receiving pause frames). - * 3: Both Rx and Tx flow control (symmetric) are enabled. - * other: Invalid. - */ - switch (hw->fc.current_mode) { - case ixgbe_fc_none: - /* - * Flow control is disabled by software override or autoneg. - * The code below will actually disable it in the HW. - */ - break; - case ixgbe_fc_rx_pause: - /* - * Rx Flow control is enabled and Tx Flow control is - * disabled by software override. Since there really - * isn't a way to advertise that we are capable of RX - * Pause ONLY, we will advertise that we support both - * symmetric and asymmetric Rx PAUSE. Later, we will - * disable the adapter's ability to send PAUSE frames. - */ - mflcn_reg |= IXGBE_MFLCN_RFCE; - break; - case ixgbe_fc_tx_pause: - /* - * Tx Flow control is enabled, and Rx Flow control is - * disabled by software override. - */ - fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X; - break; - case ixgbe_fc_full: - /* Flow control (both Rx and Tx) is enabled by SW override. */ - mflcn_reg |= IXGBE_MFLCN_RFCE; - fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X; - break; - default: - hw_dbg(hw, "Flow control param set incorrectly\n"); - ret_val = IXGBE_ERR_CONFIG; - goto out; - break; - } - - /* Set 802.3x based flow control settings. */ - mflcn_reg |= IXGBE_MFLCN_DPF; - IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg); - IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg); - - - /* Set up and enable Rx high/low water mark thresholds, enable XON. */ - for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { - if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && - hw->fc.high_water[i]) { - fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; - IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl); - fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; - } else { - IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0); - /* - * In order to prevent Tx hangs when the internal Tx - * switch is enabled we must set the high water mark - * to the maximum FCRTH value. This allows the Tx - * switch to function even under heavy Rx workloads. - */ - fcrth = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 32; - } - - IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), fcrth); - } - - /* Configure pause time (2 TCs per register) */ - reg = hw->fc.pause_time * 0x00010001; - for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++) - IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg); - - /* Configure flow control refresh threshold value */ - IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2); - -out: - return ret_val; -} - -/** - * ixgbe_negotiate_fc - Negotiate flow control - * @hw: pointer to hardware structure - * @adv_reg: flow control advertised settings - * @lp_reg: link partner's flow control settings - * @adv_sym: symmetric pause bit in advertisement - * @adv_asm: asymmetric pause bit in advertisement - * @lp_sym: symmetric pause bit in link partner advertisement - * @lp_asm: asymmetric pause bit in link partner advertisement - * - * Find the intersection between advertised settings and link partner's - * advertised settings - **/ -static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, - u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) -{ - if (!adv_reg || !lp_reg) - return IXGBE_ERR_FC_NOT_NEGOTIATED; - - if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) { - /* - * Now we need to check if the user selected Rx ONLY - * of pause frames. In this case, we had to advertise - * FULL flow control because we could not advertise RX - * ONLY. Hence, we must now check to see if we need to - * turn OFF the TRANSMISSION of PAUSE frames. - */ - if (hw->fc.requested_mode == ixgbe_fc_full) { - hw->fc.current_mode = ixgbe_fc_full; - hw_dbg(hw, "Flow Control = FULL.\n"); - } else { - hw->fc.current_mode = ixgbe_fc_rx_pause; - hw_dbg(hw, "Flow Control=RX PAUSE frames only\n"); - } - } else if (!(adv_reg & adv_sym) && (adv_reg & adv_asm) && - (lp_reg & lp_sym) && (lp_reg & lp_asm)) { - hw->fc.current_mode = ixgbe_fc_tx_pause; - hw_dbg(hw, "Flow Control = TX PAUSE frames only.\n"); - } else if ((adv_reg & adv_sym) && (adv_reg & adv_asm) && - !(lp_reg & lp_sym) && (lp_reg & lp_asm)) { - hw->fc.current_mode = ixgbe_fc_rx_pause; - hw_dbg(hw, "Flow Control = RX PAUSE frames only.\n"); - } else { - hw->fc.current_mode = ixgbe_fc_none; - hw_dbg(hw, "Flow Control = NONE.\n"); - } - return 0; -} - -/** - * ixgbe_fc_autoneg_fiber - Enable flow control on 1 gig fiber - * @hw: pointer to hardware structure - * - * Enable flow control according on 1 gig fiber. - **/ -static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw) -{ - u32 pcs_anadv_reg, pcs_lpab_reg, linkstat; - s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; - - /* - * On multispeed fiber at 1g, bail out if - * - link is up but AN did not complete, or if - * - link is up and AN completed but timed out - */ - - linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); - if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) || - (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1)) - goto out; - - pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); - pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); - - ret_val = ixgbe_negotiate_fc(hw, pcs_anadv_reg, - pcs_lpab_reg, IXGBE_PCS1GANA_SYM_PAUSE, - IXGBE_PCS1GANA_ASM_PAUSE, - IXGBE_PCS1GANA_SYM_PAUSE, - IXGBE_PCS1GANA_ASM_PAUSE); - -out: - return ret_val; -} - -/** - * ixgbe_fc_autoneg_backplane - Enable flow control IEEE clause 37 - * @hw: pointer to hardware structure - * - * Enable flow control according to IEEE clause 37. - **/ -static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw) -{ - u32 links2, anlp1_reg, autoc_reg, links; - s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; - - /* - * On backplane, bail out if - * - backplane autoneg was not completed, or if - * - we are 82599 and link partner is not AN enabled - */ - links = IXGBE_READ_REG(hw, IXGBE_LINKS); - if ((links & IXGBE_LINKS_KX_AN_COMP) == 0) - goto out; - - if (hw->mac.type == ixgbe_mac_82599EB) { - links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2); - if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0) - goto out; - } - /* - * Read the 10g AN autoc and LP ability registers and resolve - * local flow control settings accordingly - */ - autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); - anlp1_reg = IXGBE_READ_REG(hw, IXGBE_ANLP1); - - ret_val = ixgbe_negotiate_fc(hw, autoc_reg, - anlp1_reg, IXGBE_AUTOC_SYM_PAUSE, IXGBE_AUTOC_ASM_PAUSE, - IXGBE_ANLP1_SYM_PAUSE, IXGBE_ANLP1_ASM_PAUSE); - -out: - return ret_val; -} - -/** - * ixgbe_fc_autoneg_copper - Enable flow control IEEE clause 37 - * @hw: pointer to hardware structure - * - * Enable flow control according to IEEE clause 37. - **/ -static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw) -{ - u16 technology_ability_reg = 0; - u16 lp_technology_ability_reg = 0; - - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &technology_ability_reg); - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_LP, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &lp_technology_ability_reg); - - return ixgbe_negotiate_fc(hw, (u32)technology_ability_reg, - (u32)lp_technology_ability_reg, - IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE, - IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE); -} - -/** - * ixgbe_fc_autoneg - Configure flow control - * @hw: pointer to hardware structure - * - * Compares our advertised flow control capabilities to those advertised by - * our link partner, and determines the proper flow control mode to use. - **/ -void ixgbe_fc_autoneg(struct ixgbe_hw *hw) -{ - s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; - ixgbe_link_speed speed; - bool link_up; - - /* - * AN should have completed when the cable was plugged in. - * Look for reasons to bail out. Bail out if: - * - FC autoneg is disabled, or if - * - link is not up. - */ - if (hw->fc.disable_fc_autoneg) - goto out; - - hw->mac.ops.check_link(hw, &speed, &link_up, false); - if (!link_up) - goto out; - - switch (hw->phy.media_type) { - /* Autoneg flow control on fiber adapters */ - case ixgbe_media_type_fiber: - if (speed == IXGBE_LINK_SPEED_1GB_FULL) - ret_val = ixgbe_fc_autoneg_fiber(hw); - break; - - /* Autoneg flow control on backplane adapters */ - case ixgbe_media_type_backplane: - ret_val = ixgbe_fc_autoneg_backplane(hw); - break; - - /* Autoneg flow control on copper adapters */ - case ixgbe_media_type_copper: - if (ixgbe_device_supports_autoneg_fc(hw) == 0) - ret_val = ixgbe_fc_autoneg_copper(hw); - break; - - default: - break; - } - -out: - if (ret_val == 0) { - hw->fc.fc_was_autonegged = true; - } else { - hw->fc.fc_was_autonegged = false; - hw->fc.current_mode = hw->fc.requested_mode; - } -} - -/** - * ixgbe_disable_pcie_master - Disable PCI-express master access - * @hw: pointer to hardware structure - * - * Disables PCI-Express master access and verifies there are no pending - * requests. IXGBE_ERR_MASTER_REQUESTS_PENDING is returned if master disable - * bit hasn't caused the master requests to be disabled, else 0 - * is returned signifying master requests disabled. - **/ -s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) -{ - s32 status = 0; - u32 i; - - /* Always set this bit to ensure any future transactions are blocked */ - IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS); - - /* Exit if master requets are blocked */ - if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) - goto out; - - /* Poll for master request bit to clear */ - for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { - udelay(100); - if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) - goto out; - } - - /* - * Two consecutive resets are required via CTRL.RST per datasheet - * 5.2.5.3.2 Master Disable. We set a flag to inform the reset routine - * of this need. The first reset prevents new master requests from - * being issued by our device. We then must wait 1usec or more for any - * remaining completions from the PCIe bus to trickle in, and then reset - * again to clear out any effects they may have had on our device. - */ - hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n"); - hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; - - /* - * Before proceeding, make sure that the PCIe block does not have - * transactions pending. - */ - for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { - udelay(100); - if (!(IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_STATUS) & - IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING)) - goto out; - } - - hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n"); - status = IXGBE_ERR_MASTER_REQUESTS_PENDING; - -out: - return status; -} - -/** - * ixgbe_acquire_swfw_sync - Acquire SWFW semaphore - * @hw: pointer to hardware structure - * @mask: Mask to specify which semaphore to acquire - * - * Acquires the SWFW semaphore through the GSSR register for the specified - * function (CSR, PHY0, PHY1, EEPROM, Flash) - **/ -s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask) -{ - u32 gssr; - u32 swmask = mask; - u32 fwmask = mask << 5; - s32 timeout = 200; - - while (timeout) { - /* - * SW EEPROM semaphore bit is used for access to all - * SW_FW_SYNC/GSSR bits (not just EEPROM) - */ - if (ixgbe_get_eeprom_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; - - gssr = IXGBE_READ_REG(hw, IXGBE_GSSR); - if (!(gssr & (fwmask | swmask))) - break; - - /* - * Firmware currently using resource (fwmask) or other software - * thread currently using resource (swmask) - */ - ixgbe_release_eeprom_semaphore(hw); - msleep(5); - timeout--; - } - - if (!timeout) { - hw_dbg(hw, "Driver can't access resource, SW_FW_SYNC timeout.\n"); - return IXGBE_ERR_SWFW_SYNC; - } - - gssr |= swmask; - IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr); - - ixgbe_release_eeprom_semaphore(hw); - return 0; -} - -/** - * ixgbe_release_swfw_sync - Release SWFW semaphore - * @hw: pointer to hardware structure - * @mask: Mask to specify which semaphore to release - * - * Releases the SWFW semaphore through the GSSR register for the specified - * function (CSR, PHY0, PHY1, EEPROM, Flash) - **/ -void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask) -{ - u32 gssr; - u32 swmask = mask; - - ixgbe_get_eeprom_semaphore(hw); - - gssr = IXGBE_READ_REG(hw, IXGBE_GSSR); - gssr &= ~swmask; - IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr); - - ixgbe_release_eeprom_semaphore(hw); -} - -/** - * ixgbe_disable_sec_rx_path_generic - Stops the receive data path - * @hw: pointer to hardware structure - * - * Stops the receive data path and waits for the HW to internally empty - * the Rx security block - **/ -s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw) -{ -#define IXGBE_MAX_SECRX_POLL 40 - - int i; - int secrxreg; - - secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); - secrxreg |= IXGBE_SECRXCTRL_RX_DIS; - IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg); - for (i = 0; i < IXGBE_MAX_SECRX_POLL; i++) { - secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT); - if (secrxreg & IXGBE_SECRXSTAT_SECRX_RDY) - break; - else - /* Use interrupt-safe sleep just in case */ - udelay(1000); - } - - /* For informational purposes only */ - if (i >= IXGBE_MAX_SECRX_POLL) - hw_dbg(hw, "Rx unit being enabled before security " - "path fully disabled. Continuing with init.\n"); - - return 0; -} - -/** - * ixgbe_enable_sec_rx_path_generic - Enables the receive data path - * @hw: pointer to hardware structure - * - * Enables the receive data path. - **/ -s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw) -{ - int secrxreg; - - secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); - secrxreg &= ~IXGBE_SECRXCTRL_RX_DIS; - IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg); - IXGBE_WRITE_FLUSH(hw); - - return 0; -} - -/** - * ixgbe_enable_rx_dma_generic - Enable the Rx DMA unit - * @hw: pointer to hardware structure - * @regval: register value to write to RXCTRL - * - * Enables the Rx DMA unit - **/ -s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval) -{ - IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval); - - return 0; -} - -/** - * ixgbe_blink_led_start_generic - Blink LED based on index. - * @hw: pointer to hardware structure - * @index: led number to blink - **/ -s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) -{ - ixgbe_link_speed speed = 0; - bool link_up = 0; - u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); - u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); - - /* - * Link must be up to auto-blink the LEDs; - * Force it if link is down. - */ - hw->mac.ops.check_link(hw, &speed, &link_up, false); - - if (!link_up) { - autoc_reg |= IXGBE_AUTOC_AN_RESTART; - autoc_reg |= IXGBE_AUTOC_FLU; - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); - IXGBE_WRITE_FLUSH(hw); - msleep(10); - } - - led_reg &= ~IXGBE_LED_MODE_MASK(index); - led_reg |= IXGBE_LED_BLINK(index); - IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); - IXGBE_WRITE_FLUSH(hw); - - return 0; -} - -/** - * ixgbe_blink_led_stop_generic - Stop blinking LED based on index. - * @hw: pointer to hardware structure - * @index: led number to stop blinking - **/ -s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) -{ - u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); - u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); - - autoc_reg &= ~IXGBE_AUTOC_FLU; - autoc_reg |= IXGBE_AUTOC_AN_RESTART; - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); - - led_reg &= ~IXGBE_LED_MODE_MASK(index); - led_reg &= ~IXGBE_LED_BLINK(index); - led_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index); - IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); - IXGBE_WRITE_FLUSH(hw); - - return 0; -} - -/** - * ixgbe_get_san_mac_addr_offset - Get SAN MAC address offset from the EEPROM - * @hw: pointer to hardware structure - * @san_mac_offset: SAN MAC address offset - * - * This function will read the EEPROM location for the SAN MAC address - * pointer, and returns the value at that location. This is used in both - * get and set mac_addr routines. - **/ -static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw, - u16 *san_mac_offset) -{ - /* - * First read the EEPROM pointer to see if the MAC addresses are - * available. - */ - hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR, san_mac_offset); - - return 0; -} - -/** - * ixgbe_get_san_mac_addr_generic - SAN MAC address retrieval from the EEPROM - * @hw: pointer to hardware structure - * @san_mac_addr: SAN MAC address - * - * Reads the SAN MAC address from the EEPROM, if it's available. This is - * per-port, so set_lan_id() must be called before reading the addresses. - * set_lan_id() is called by identify_sfp(), but this cannot be relied - * upon for non-SFP connections, so we must call it here. - **/ -s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr) -{ - u16 san_mac_data, san_mac_offset; - u8 i; - - /* - * First read the EEPROM pointer to see if the MAC addresses are - * available. If they're not, no point in calling set_lan_id() here. - */ - ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset); - - if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) { - /* - * No addresses available in this EEPROM. It's not an - * error though, so just wipe the local address and return. - */ - for (i = 0; i < 6; i++) - san_mac_addr[i] = 0xFF; - - goto san_mac_addr_out; - } - - /* make sure we know which port we need to program */ - hw->mac.ops.set_lan_id(hw); - /* apply the port offset to the address offset */ - (hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) : - (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET); - for (i = 0; i < 3; i++) { - hw->eeprom.ops.read(hw, san_mac_offset, &san_mac_data); - san_mac_addr[i * 2] = (u8)(san_mac_data); - san_mac_addr[i * 2 + 1] = (u8)(san_mac_data >> 8); - san_mac_offset++; - } - -san_mac_addr_out: - return 0; -} - -/** - * ixgbe_set_san_mac_addr_generic - Write the SAN MAC address to the EEPROM - * @hw: pointer to hardware structure - * @san_mac_addr: SAN MAC address - * - * Write a SAN MAC address to the EEPROM. - **/ -s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr) -{ - s32 status = 0; - u16 san_mac_data, san_mac_offset; - u8 i; - - /* Look for SAN mac address pointer. If not defined, return */ - ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset); - - if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) { - status = IXGBE_ERR_NO_SAN_ADDR_PTR; - goto san_mac_addr_out; - } - - /* Make sure we know which port we need to write */ - hw->mac.ops.set_lan_id(hw); - /* Apply the port offset to the address offset */ - (hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) : - (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET); - - for (i = 0; i < 3; i++) { - san_mac_data = (u16)((u16)(san_mac_addr[i * 2 + 1]) << 8); - san_mac_data |= (u16)(san_mac_addr[i * 2]); - hw->eeprom.ops.write(hw, san_mac_offset, san_mac_data); - san_mac_offset++; - } - -san_mac_addr_out: - return status; -} - -/** - * ixgbe_get_pcie_msix_count_generic - Gets MSI-X vector count - * @hw: pointer to hardware structure - * - * Read PCIe configuration space, and get the MSI-X vector count from - * the capabilities table. - **/ -u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw) -{ - u16 msix_count = 1; - u16 max_msix_count; - u16 pcie_offset; - - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - pcie_offset = IXGBE_PCIE_MSIX_82598_CAPS; - max_msix_count = IXGBE_MAX_MSIX_VECTORS_82598; - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS; - max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599; - break; - default: - return msix_count; - } - - msix_count = IXGBE_READ_PCIE_WORD(hw, pcie_offset); - msix_count &= IXGBE_PCIE_MSIX_TBL_SZ_MASK; - - /* MSI-X count is zero-based in HW */ - msix_count++; - - if (msix_count > max_msix_count) - msix_count = max_msix_count; - - return msix_count; -} - -/** - * ixgbe_insert_mac_addr_generic - Find a RAR for this mac address - * @hw: pointer to hardware structure - * @addr: Address to put into receive address register - * @vmdq: VMDq pool to assign - * - * Puts an ethernet address into a receive address register, or - * finds the rar that it is already in; adds to the pool list - **/ -s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) -{ - static const u32 NO_EMPTY_RAR_FOUND = 0xFFFFFFFF; - u32 first_empty_rar = NO_EMPTY_RAR_FOUND; - u32 rar; - u32 rar_low, rar_high; - u32 addr_low, addr_high; - - /* swap bytes for HW little endian */ - addr_low = addr[0] | (addr[1] << 8) - | (addr[2] << 16) - | (addr[3] << 24); - addr_high = addr[4] | (addr[5] << 8); - - /* - * Either find the mac_id in rar or find the first empty space. - * rar_highwater points to just after the highest currently used - * rar in order to shorten the search. It grows when we add a new - * rar to the top. - */ - for (rar = 0; rar < hw->mac.rar_highwater; rar++) { - rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); - - if (((IXGBE_RAH_AV & rar_high) == 0) - && first_empty_rar == NO_EMPTY_RAR_FOUND) { - first_empty_rar = rar; - } else if ((rar_high & 0xFFFF) == addr_high) { - rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(rar)); - if (rar_low == addr_low) - break; /* found it already in the rars */ - } - } - - if (rar < hw->mac.rar_highwater) { - /* already there so just add to the pool bits */ - ixgbe_set_vmdq(hw, rar, vmdq); - } else if (first_empty_rar != NO_EMPTY_RAR_FOUND) { - /* stick it into first empty RAR slot we found */ - rar = first_empty_rar; - ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); - } else if (rar == hw->mac.rar_highwater) { - /* add it to the top of the list and inc the highwater mark */ - ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); - hw->mac.rar_highwater++; - } else if (rar >= hw->mac.num_rar_entries) { - return IXGBE_ERR_INVALID_MAC_ADDR; - } - - /* - * If we found rar[0], make sure the default pool bit (we use pool 0) - * remains cleared to be sure default pool packets will get delivered - */ - if (rar == 0) - ixgbe_clear_vmdq(hw, rar, 0); - - return rar; -} - -/** - * ixgbe_clear_vmdq_generic - Disassociate a VMDq pool index from a rx address - * @hw: pointer to hardware struct - * @rar: receive address register index to disassociate - * @vmdq: VMDq pool index to remove from the rar - **/ -s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) -{ - u32 mpsar_lo, mpsar_hi; - u32 rar_entries = hw->mac.num_rar_entries; - - /* Make sure we are using a valid rar index range */ - if (rar >= rar_entries) { - hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; - } - - mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); - mpsar_hi = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); - - if (!mpsar_lo && !mpsar_hi) - goto done; - - if (vmdq == IXGBE_CLEAR_VMDQ_ALL) { - if (mpsar_lo) { - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0); - mpsar_lo = 0; - } - if (mpsar_hi) { - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0); - mpsar_hi = 0; - } - } else if (vmdq < 32) { - mpsar_lo &= ~(1 << vmdq); - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar_lo); - } else { - mpsar_hi &= ~(1 << (vmdq - 32)); - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar_hi); - } - - /* was that the last pool using this rar? */ - if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0) - hw->mac.ops.clear_rar(hw, rar); -done: - return 0; -} - -/** - * ixgbe_set_vmdq_generic - Associate a VMDq pool index with a rx address - * @hw: pointer to hardware struct - * @rar: receive address register index to associate with a VMDq index - * @vmdq: VMDq pool index - **/ -s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) -{ - u32 mpsar; - u32 rar_entries = hw->mac.num_rar_entries; - - /* Make sure we are using a valid rar index range */ - if (rar >= rar_entries) { - hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; - } - - if (vmdq < 32) { - mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); - mpsar |= 1 << vmdq; - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar); - } else { - mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); - mpsar |= 1 << (vmdq - 32); - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar); - } - return 0; -} - -/** - * This function should only be involved in the IOV mode. - * In IOV mode, Default pool is next pool after the number of - * VFs advertized and not 0. - * MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index] - * - * ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address - * @hw: pointer to hardware struct - * @vmdq: VMDq pool index - **/ -s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq) -{ - u32 mpsar; - u32 rar = hw->mac.san_mac_rar_index; - - if (vmdq < 32) { - mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); - mpsar |= 1 << vmdq; - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar); - } else { - mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); - mpsar |= 1 << (vmdq - 32); - IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar); - } - - return 0; -} - -/** - * ixgbe_init_uta_tables_generic - Initialize the Unicast Table Array - * @hw: pointer to hardware structure - **/ -s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw) -{ - int i; - - hw_dbg(hw, " Clearing UTA\n"); - - for (i = 0; i < 128; i++) - IXGBE_WRITE_REG(hw, IXGBE_UTA(i), 0); - - return 0; -} - -/** - * ixgbe_find_vlvf_slot - find the vlanid or the first empty slot - * @hw: pointer to hardware structure - * @vlan: VLAN id to write to VLAN filter - * - * return the VLVF index where this VLAN id should be placed - * - **/ -s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan) -{ - u32 bits = 0; - u32 first_empty_slot = 0; - s32 regindex; - - /* short cut the special case */ - if (vlan == 0) - return 0; - - /* - * Search for the vlan id in the VLVF entries. Save off the first empty - * slot found along the way - */ - for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) { - bits = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex)); - if (!bits && !(first_empty_slot)) - first_empty_slot = regindex; - else if ((bits & 0x0FFF) == vlan) - break; - } - - /* - * If regindex is less than IXGBE_VLVF_ENTRIES, then we found the vlan - * in the VLVF. Else use the first empty VLVF register for this - * vlan id. - */ - if (regindex >= IXGBE_VLVF_ENTRIES) { - if (first_empty_slot) - regindex = first_empty_slot; - else { - hw_dbg(hw, "No space in VLVF.\n"); - regindex = IXGBE_ERR_NO_SPACE; - } - } - - return regindex; -} - -/** - * ixgbe_set_vfta_generic - Set VLAN filter table - * @hw: pointer to hardware structure - * @vlan: VLAN id to write to VLAN filter - * @vind: VMDq output index that maps queue to VLAN id in VFVFB - * @vlan_on: boolean flag to turn on/off VLAN in VFVF - * - * Turn on/off specified VLAN in the VLAN filter table. - **/ -s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, - bool vlan_on) -{ - s32 regindex; - u32 bitindex; - u32 vfta; - u32 targetbit; - s32 ret_val = 0; - bool vfta_changed = false; - - if (vlan > 4095) - return IXGBE_ERR_PARAM; - - /* - * this is a 2 part operation - first the VFTA, then the - * VLVF and VLVFB if VT Mode is set - * We don't write the VFTA until we know the VLVF part succeeded. - */ - - /* Part 1 - * The VFTA is a bitstring made up of 128 32-bit registers - * that enable the particular VLAN id, much like the MTA: - * bits[11-5]: which register - * bits[4-0]: which bit in the register - */ - regindex = (vlan >> 5) & 0x7F; - bitindex = vlan & 0x1F; - targetbit = (1 << bitindex); - vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); - - if (vlan_on) { - if (!(vfta & targetbit)) { - vfta |= targetbit; - vfta_changed = true; - } - } else { - if (vfta & targetbit) { - vfta &= ~targetbit; - vfta_changed = true; - } - } - - /* Part 2 - * Call ixgbe_set_vlvf_generic to set VLVFB and VLVF - */ - ret_val = ixgbe_set_vlvf_generic(hw, vlan, vind, vlan_on, - &vfta_changed); - if (ret_val != 0) - return ret_val; - - if (vfta_changed) - IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), vfta); - - return 0; -} - -/** - * ixgbe_set_vlvf_generic - Set VLAN Pool Filter - * @hw: pointer to hardware structure - * @vlan: VLAN id to write to VLAN filter - * @vind: VMDq output index that maps queue to VLAN id in VFVFB - * @vlan_on: boolean flag to turn on/off VLAN in VFVF - * @vfta_changed: pointer to boolean flag which indicates whether VFTA - * should be changed - * - * Turn on/off specified bit in VLVF table. - **/ -s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, - bool vlan_on, bool *vfta_changed) -{ - u32 vt; - - if (vlan > 4095) - return IXGBE_ERR_PARAM; - - /* If VT Mode is set - * Either vlan_on - * make sure the vlan is in VLVF - * set the vind bit in the matching VLVFB - * Or !vlan_on - * clear the pool bit and possibly the vind - */ - vt = IXGBE_READ_REG(hw, IXGBE_VT_CTL); - if (vt & IXGBE_VT_CTL_VT_ENABLE) { - s32 vlvf_index; - u32 bits; - - vlvf_index = ixgbe_find_vlvf_slot(hw, vlan); - if (vlvf_index < 0) - return vlvf_index; - - if (vlan_on) { - /* set the pool bit */ - if (vind < 32) { - bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB(vlvf_index * 2)); - bits |= (1 << vind); - IXGBE_WRITE_REG(hw, - IXGBE_VLVFB(vlvf_index * 2), - bits); - } else { - bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB((vlvf_index * 2) + 1)); - bits |= (1 << (vind - 32)); - IXGBE_WRITE_REG(hw, - IXGBE_VLVFB((vlvf_index * 2) + 1), - bits); - } - } else { - /* clear the pool bit */ - if (vind < 32) { - bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB(vlvf_index * 2)); - bits &= ~(1 << vind); - IXGBE_WRITE_REG(hw, - IXGBE_VLVFB(vlvf_index * 2), - bits); - bits |= IXGBE_READ_REG(hw, - IXGBE_VLVFB((vlvf_index * 2) + 1)); - } else { - bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB((vlvf_index * 2) + 1)); - bits &= ~(1 << (vind - 32)); - IXGBE_WRITE_REG(hw, - IXGBE_VLVFB((vlvf_index * 2) + 1), - bits); - bits |= IXGBE_READ_REG(hw, - IXGBE_VLVFB(vlvf_index * 2)); - } - } - - /* - * If there are still bits set in the VLVFB registers - * for the VLAN ID indicated we need to see if the - * caller is requesting that we clear the VFTA entry bit. - * If the caller has requested that we clear the VFTA - * entry bit but there are still pools/VFs using this VLAN - * ID entry then ignore the request. We're not worried - * about the case where we're turning the VFTA VLAN ID - * entry bit on, only when requested to turn it off as - * there may be multiple pools and/or VFs using the - * VLAN ID entry. In that case we cannot clear the - * VFTA bit until all pools/VFs using that VLAN ID have also - * been cleared. This will be indicated by "bits" being - * zero. - */ - if (bits) { - IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), - (IXGBE_VLVF_VIEN | vlan)); - if (!vlan_on && (vfta_changed != NULL)) { - /* someone wants to clear the vfta entry - * but some pools/VFs are still using it. - * Ignore it. */ - *vfta_changed = false; - } - } else - IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0); - } - - return 0; -} - -/** - * ixgbe_clear_vfta_generic - Clear VLAN filter table - * @hw: pointer to hardware structure - * - * Clears the VLAN filer table, and the VMDq index associated with the filter - **/ -s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) -{ - u32 offset; - - for (offset = 0; offset < hw->mac.vft_size; offset++) - IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0); - - for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) { - IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0); - IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0); - IXGBE_WRITE_REG(hw, IXGBE_VLVFB((offset * 2) + 1), 0); - } - - return 0; -} - -/** - * ixgbe_check_mac_link_generic - Determine link and speed status - * @hw: pointer to hardware structure - * @speed: pointer to link speed - * @link_up: true when link is up - * @link_up_wait_to_complete: bool used to wait for link up or not - * - * Reads the links register to determine if link is up and the current speed - **/ -s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, - bool *link_up, bool link_up_wait_to_complete) -{ - u32 links_reg, links_orig; - u32 i; - - /* clear the old state */ - links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS); - - links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); - - if (links_orig != links_reg) { - hw_dbg(hw, "LINKS changed from %08X to %08X\n", - links_orig, links_reg); - } - - if (link_up_wait_to_complete) { - for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { - if (links_reg & IXGBE_LINKS_UP) { - *link_up = true; - break; - } else { - *link_up = false; - } - msleep(100); - links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); - } - } else { - if (links_reg & IXGBE_LINKS_UP) - *link_up = true; - else - *link_up = false; - } - - if ((links_reg & IXGBE_LINKS_SPEED_82599) == - IXGBE_LINKS_SPEED_10G_82599) - *speed = IXGBE_LINK_SPEED_10GB_FULL; - else if ((links_reg & IXGBE_LINKS_SPEED_82599) == - IXGBE_LINKS_SPEED_1G_82599) - *speed = IXGBE_LINK_SPEED_1GB_FULL; - else if ((links_reg & IXGBE_LINKS_SPEED_82599) == - IXGBE_LINKS_SPEED_100_82599) - *speed = IXGBE_LINK_SPEED_100_FULL; - else - *speed = IXGBE_LINK_SPEED_UNKNOWN; - - return 0; -} - -/** - * ixgbe_get_wwn_prefix_generic - Get alternative WWNN/WWPN prefix from - * the EEPROM - * @hw: pointer to hardware structure - * @wwnn_prefix: the alternative WWNN prefix - * @wwpn_prefix: the alternative WWPN prefix - * - * This function will read the EEPROM from the alternative SAN MAC address - * block to check the support for the alternative WWNN/WWPN prefix support. - **/ -s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix, - u16 *wwpn_prefix) -{ - u16 offset, caps; - u16 alt_san_mac_blk_offset; - - /* clear output first */ - *wwnn_prefix = 0xFFFF; - *wwpn_prefix = 0xFFFF; - - /* check if alternative SAN MAC is supported */ - hw->eeprom.ops.read(hw, IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR, - &alt_san_mac_blk_offset); - - if ((alt_san_mac_blk_offset == 0) || - (alt_san_mac_blk_offset == 0xFFFF)) - goto wwn_prefix_out; - - /* check capability in alternative san mac address block */ - offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET; - hw->eeprom.ops.read(hw, offset, &caps); - if (!(caps & IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN)) - goto wwn_prefix_out; - - /* get the corresponding prefix for WWNN/WWPN */ - offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET; - hw->eeprom.ops.read(hw, offset, wwnn_prefix); - - offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET; - hw->eeprom.ops.read(hw, offset, wwpn_prefix); - -wwn_prefix_out: - return 0; -} - -/** - * ixgbe_get_fcoe_boot_status_generic - Get FCOE boot status from EEPROM - * @hw: pointer to hardware structure - * @bs: the fcoe boot status - * - * This function will read the FCOE boot status from the iSCSI FCOE block - **/ -s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs) -{ - u16 offset, caps, flags; - s32 status; - - /* clear output first */ - *bs = ixgbe_fcoe_bootstatus_unavailable; - - /* check if FCOE IBA block is present */ - offset = IXGBE_FCOE_IBA_CAPS_BLK_PTR; - status = hw->eeprom.ops.read(hw, offset, &caps); - if (status != 0) - goto out; - - if (!(caps & IXGBE_FCOE_IBA_CAPS_FCOE)) - goto out; - - /* check if iSCSI FCOE block is populated */ - status = hw->eeprom.ops.read(hw, IXGBE_ISCSI_FCOE_BLK_PTR, &offset); - if (status != 0) - goto out; - - if ((offset == 0) || (offset == 0xFFFF)) - goto out; - - /* read fcoe flags in iSCSI FCOE block */ - offset = offset + IXGBE_ISCSI_FCOE_FLAGS_OFFSET; - status = hw->eeprom.ops.read(hw, offset, &flags); - if (status != 0) - goto out; - - if (flags & IXGBE_ISCSI_FCOE_FLAGS_ENABLE) - *bs = ixgbe_fcoe_bootstatus_enabled; - else - *bs = ixgbe_fcoe_bootstatus_disabled; - -out: - return status; -} - -/** - * ixgbe_set_mac_anti_spoofing - Enable/Disable MAC anti-spoofing - * @hw: pointer to hardware structure - * @enable: enable or disable switch for anti-spoofing - * @pf: Physical Function pool - do not enable anti-spoofing for the PF - * - **/ -void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf) -{ - int j; - int pf_target_reg = pf >> 3; - int pf_target_shift = pf % 8; - u32 pfvfspoof = 0; - - if (hw->mac.type == ixgbe_mac_82598EB) - return; - - if (enable) - pfvfspoof = IXGBE_SPOOF_MACAS_MASK; - - /* - * PFVFSPOOF register array is size 8 with 8 bits assigned to - * MAC anti-spoof enables in each register array element. - */ - for (j = 0; j < IXGBE_PFVFSPOOF_REG_COUNT; j++) - IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof); - - /* If not enabling anti-spoofing then done */ - if (!enable) - return; - - /* - * The PF should be allowed to spoof so that it can support - * emulation mode NICs. Reset the bit assigned to the PF - */ - pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg)); - pfvfspoof ^= (1 << pf_target_shift); - IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg), pfvfspoof); -} - -/** - * ixgbe_set_vlan_anti_spoofing - Enable/Disable VLAN anti-spoofing - * @hw: pointer to hardware structure - * @enable: enable or disable switch for VLAN anti-spoofing - * @pf: Virtual Function pool - VF Pool to set for VLAN anti-spoofing - * - **/ -void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf) -{ - int vf_target_reg = vf >> 3; - int vf_target_shift = vf % 8 + IXGBE_SPOOF_VLANAS_SHIFT; - u32 pfvfspoof; - - if (hw->mac.type == ixgbe_mac_82598EB) - return; - - pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg)); - if (enable) - pfvfspoof |= (1 << vf_target_shift); - else - pfvfspoof &= ~(1 << vf_target_shift); - IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof); -} - -/** - * ixgbe_get_device_caps_generic - Get additional device capabilities - * @hw: pointer to hardware structure - * @device_caps: the EEPROM word with the extra device capabilities - * - * This function will read the EEPROM location for the device capabilities, - * and return the word through device_caps. - **/ -s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps) -{ - hw->eeprom.ops.read(hw, IXGBE_DEVICE_CAPS, device_caps); - - return 0; -} - -/** - * ixgbe_calculate_checksum - Calculate checksum for buffer - * @buffer: pointer to EEPROM - * @length: size of EEPROM to calculate a checksum for - * Calculates the checksum for some buffer on a specified length. The - * checksum calculated is returned. - **/ -static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length) -{ - u32 i; - u8 sum = 0; - - if (!buffer) - return 0; - for (i = 0; i < length; i++) - sum += buffer[i]; - - return (u8) (0 - sum); -} - -/** - * ixgbe_host_interface_command - Issue command to manageability block - * @hw: pointer to the HW structure - * @buffer: contains the command to write and where the return status will - * be placed - * @length: length of buffer, must be multiple of 4 bytes - * - * Communicates with the manageability block. On success return 0 - * else return IXGBE_ERR_HOST_INTERFACE_COMMAND. - **/ -static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, - u32 length) -{ - u32 hicr, i, bi; - u32 hdr_size = sizeof(struct ixgbe_hic_hdr); - u8 buf_len, dword_len; - - s32 ret_val = 0; - - if (length == 0 || length & 0x3 || - length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { - hw_dbg(hw, "Buffer length failure.\n"); - ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; - goto out; - } - - /* Check that the host interface is enabled. */ - hicr = IXGBE_READ_REG(hw, IXGBE_HICR); - if ((hicr & IXGBE_HICR_EN) == 0) { - hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n"); - ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; - goto out; - } - - /* Calculate length in DWORDs */ - dword_len = length >> 2; - - /* - * The device driver writes the relevant command block - * into the ram area. - */ - for (i = 0; i < dword_len; i++) - IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG, - i, IXGBE_CPU_TO_LE32(buffer[i])); - - /* Setting this bit tells the ARC that a new command is pending. */ - IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C); - - for (i = 0; i < IXGBE_HI_COMMAND_TIMEOUT; i++) { - hicr = IXGBE_READ_REG(hw, IXGBE_HICR); - if (!(hicr & IXGBE_HICR_C)) - break; - msleep(1); - } - - /* Check command successful completion. */ - if (i == IXGBE_HI_COMMAND_TIMEOUT || - (!(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV))) { - hw_dbg(hw, "Command has failed with no status valid.\n"); - ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; - goto out; - } - - /* Calculate length in DWORDs */ - dword_len = hdr_size >> 2; - - /* first pull in the header so we know the buffer length */ - for (bi = 0; bi < dword_len; bi++) { - buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); - IXGBE_LE32_TO_CPUS(&buffer[bi]); - } - - /* If there is any thing in data position pull it in */ - buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len; - if (buf_len == 0) - goto out; - - if (length < (buf_len + hdr_size)) { - hw_dbg(hw, "Buffer not large enough for reply message.\n"); - ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; - goto out; - } - - /* Calculate length in DWORDs, add 3 for odd lengths */ - dword_len = (buf_len + 3) >> 2; - - /* Pull in the rest of the buffer (bi is where we left off)*/ - for (; bi <= dword_len; bi++) { - buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); - IXGBE_LE32_TO_CPUS(&buffer[bi]); - } - -out: - return ret_val; -} - -/** - * ixgbe_set_fw_drv_ver_generic - Sends driver version to firmware - * @hw: pointer to the HW structure - * @maj: driver version major number - * @min: driver version minor number - * @build: driver version build number - * @sub: driver version sub build number - * - * Sends driver version number to firmware through the manageability - * block. On success return 0 - * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. - **/ -s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, - u8 build, u8 sub) -{ - struct ixgbe_hic_drv_info fw_cmd; - int i; - s32 ret_val = 0; - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM) - != 0) { - ret_val = IXGBE_ERR_SWFW_SYNC; - goto out; - } - - fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; - fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN; - fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED; - fw_cmd.port_num = (u8)hw->bus.func; - fw_cmd.ver_maj = maj; - fw_cmd.ver_min = min; - fw_cmd.ver_build = build; - fw_cmd.ver_sub = sub; - fw_cmd.hdr.checksum = 0; - fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, - (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); - fw_cmd.pad = 0; - fw_cmd.pad2 = 0; - - for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { - ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd, - sizeof(fw_cmd)); - if (ret_val != 0) - continue; - - if (fw_cmd.hdr.cmd_or_resp.ret_status == - FW_CEM_RESP_STATUS_SUCCESS) - ret_val = 0; - else - ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; - - break; - } - - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); -out: - return ret_val; -} - -/** - * ixgbe_set_rxpba_generic - Initialize Rx packet buffer - * @hw: pointer to hardware structure - * @num_pb: number of packet buffers to allocate - * @headroom: reserve n KB of headroom - * @strategy: packet buffer allocation strategy - **/ -void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom, - int strategy) -{ - u32 pbsize = hw->mac.rx_pb_size; - int i = 0; - u32 rxpktsize, txpktsize, txpbthresh; - - /* Reserve headroom */ - pbsize -= headroom; - - if (!num_pb) - num_pb = 1; - - /* Divide remaining packet buffer space amongst the number of packet - * buffers requested using supplied strategy. - */ - switch (strategy) { - case PBA_STRATEGY_WEIGHTED: - /* ixgbe_dcb_pba_80_48 strategy weight first half of packet - * buffer with 5/8 of the packet buffer space. - */ - rxpktsize = (pbsize * 5) / (num_pb * 4); - pbsize -= rxpktsize * (num_pb / 2); - rxpktsize <<= IXGBE_RXPBSIZE_SHIFT; - for (; i < (num_pb / 2); i++) - IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); - /* Fall through to configure remaining packet buffers */ - case PBA_STRATEGY_EQUAL: - rxpktsize = (pbsize / (num_pb - i)) << IXGBE_RXPBSIZE_SHIFT; - for (; i < num_pb; i++) - IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); - break; - default: - break; - } - - /* Only support an equally distributed Tx packet buffer strategy. */ - txpktsize = IXGBE_TXPBSIZE_MAX / num_pb; - txpbthresh = (txpktsize / 1024) - IXGBE_TXPKT_SIZE_MAX; - for (i = 0; i < num_pb; i++) { - IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize); - IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh); - } - - /* Clear unused TCs, if any, to zero buffer size*/ - for (; i < IXGBE_MAX_PB; i++) { - IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0); - IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0); - IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0); - } -} - -/** - * ixgbe_clear_tx_pending - Clear pending TX work from the PCIe fifo - * @hw: pointer to the hardware structure - * - * The 82599 and x540 MACs can experience issues if TX work is still pending - * when a reset occurs. This function prevents this by flushing the PCIe - * buffers on the system. - **/ -void ixgbe_clear_tx_pending(struct ixgbe_hw *hw) -{ - u32 gcr_ext, hlreg0; - - /* - * If double reset is not requested then all transactions should - * already be clear and as such there is no work to do - */ - if (!(hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED)) - return; - - /* - * Set loopback enable to prevent any transmits from being sent - * should the link come up. This assumes that the RXCTRL.RXEN bit - * has already been cleared. - */ - hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); - IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0 | IXGBE_HLREG0_LPBK); - - /* initiate cleaning flow for buffers in the PCIe transaction layer */ - gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); - IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, - gcr_ext | IXGBE_GCR_EXT_BUFFERS_CLEAR); - - /* Flush all writes and allow 20usec for all transactions to clear */ - IXGBE_WRITE_FLUSH(hw); - udelay(20); - - /* restore previous register values */ - IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); - IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); -} - -static const u8 ixgbe_emc_temp_data[4] = { - IXGBE_EMC_INTERNAL_DATA, - IXGBE_EMC_DIODE1_DATA, - IXGBE_EMC_DIODE2_DATA, - IXGBE_EMC_DIODE3_DATA -}; -static const u8 ixgbe_emc_therm_limit[4] = { - IXGBE_EMC_INTERNAL_THERM_LIMIT, - IXGBE_EMC_DIODE1_THERM_LIMIT, - IXGBE_EMC_DIODE2_THERM_LIMIT, - IXGBE_EMC_DIODE3_THERM_LIMIT -}; - -/** - * ixgbe_get_thermal_sensor_data - Gathers thermal sensor data - * @hw: pointer to hardware structure - * @data: pointer to the thermal sensor data structure - * - * Returns the thermal sensor data structure - **/ -s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw) -{ - s32 status = 0; - u16 ets_offset; - u16 ets_cfg; - u16 ets_sensor; - u8 num_sensors; - u8 sensor_index; - u8 sensor_location; - u8 i; - struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; - - /* Only support thermal sensors attached to 82599 physical port 0 */ - if ((hw->mac.type != ixgbe_mac_82599EB) || - (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) { - status = IXGBE_NOT_IMPLEMENTED; - goto out; - } - - status = hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset); - if (status) - goto out; - - if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) { - status = IXGBE_NOT_IMPLEMENTED; - goto out; - } - - status = hw->eeprom.ops.read(hw, ets_offset, &ets_cfg); - if (status) - goto out; - - if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT) - != IXGBE_ETS_TYPE_EMC) { - status = IXGBE_NOT_IMPLEMENTED; - goto out; - } - - num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK); - if (num_sensors > IXGBE_MAX_SENSORS) - num_sensors = IXGBE_MAX_SENSORS; - - for (i = 0; i < num_sensors; i++) { - status = hw->eeprom.ops.read(hw, (ets_offset + 1 + i), - &ets_sensor); - if (status) - goto out; - - sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> - IXGBE_ETS_DATA_INDEX_SHIFT); - sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> - IXGBE_ETS_DATA_LOC_SHIFT); - - if (sensor_location != 0) { - status = hw->phy.ops.read_i2c_byte(hw, - ixgbe_emc_temp_data[sensor_index], - IXGBE_I2C_THERMAL_SENSOR_ADDR, - &data->sensor[i].temp); - if (status) - goto out; - } - } -out: - return status; -} - -/** - * ixgbe_init_thermal_sensor_thresh_generic - Inits thermal sensor thresholds - * @hw: pointer to hardware structure - * - * Inits the thermal sensor thresholds according to the NVM map - * and save off the threshold and location values into mac.thermal_sensor_data - **/ -s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) -{ - s32 status = 0; - u16 ets_offset; - u16 ets_cfg; - u16 ets_sensor; - u8 low_thresh_delta; - u8 num_sensors; - u8 sensor_index; - u8 sensor_location; - u8 therm_limit; - u8 i; - struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; - - memset(data, 0, sizeof(struct ixgbe_thermal_sensor_data)); - - /* Only support thermal sensors attached to 82599 physical port 0 */ - if ((hw->mac.type != ixgbe_mac_82599EB) || - (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) - return IXGBE_NOT_IMPLEMENTED; - - hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset); - if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) - return IXGBE_NOT_IMPLEMENTED; - - hw->eeprom.ops.read(hw, ets_offset, &ets_cfg); - if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT) - != IXGBE_ETS_TYPE_EMC) - return IXGBE_NOT_IMPLEMENTED; - - low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >> - IXGBE_ETS_LTHRES_DELTA_SHIFT); - num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK); - - for (i = 0; i < num_sensors; i++) { - hw->eeprom.ops.read(hw, (ets_offset + 1 + i), &ets_sensor); - sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> - IXGBE_ETS_DATA_INDEX_SHIFT); - sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> - IXGBE_ETS_DATA_LOC_SHIFT); - therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK; - - hw->phy.ops.write_i2c_byte(hw, - ixgbe_emc_therm_limit[sensor_index], - IXGBE_I2C_THERMAL_SENSOR_ADDR, therm_limit); - - if ((i < IXGBE_MAX_SENSORS) && (sensor_location != 0)) { - data->sensor[i].location = sensor_location; - data->sensor[i].caution_thresh = therm_limit; - data->sensor[i].max_op_thresh = therm_limit - - low_thresh_delta; - } - } - return status; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h deleted file mode 100644 index 2989a80b..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_COMMON_H_ -#define _IXGBE_COMMON_H_ - -#include "ixgbe_type.h" - -u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw); - -s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw); -s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw); -s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw); -s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw); -s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw); -s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, - u32 pba_num_size); -s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr); -s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw); -void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw); -s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw); - -s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index); -s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index); - -s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw); -s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data); -s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data); -s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data); -s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data); -s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data); -s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data); -s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, - u16 *data); -s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, - u16 words, u16 *data); -u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw); -s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw, - u16 *checksum_val); -s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw); -s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg); - -s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, - u32 enable_addr); -s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index); -s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw); -s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, - u32 mc_addr_count, - ixgbe_mc_addr_itr func, bool clear); -s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, - u32 addr_count, ixgbe_mc_addr_itr func); -s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw); -s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw); -s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval); -s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw); -s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw); - -s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw); -void ixgbe_fc_autoneg(struct ixgbe_hw *hw); - -s32 ixgbe_validate_mac_addr(u8 *mac_addr); -s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask); -void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask); -s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw); - -s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index); -s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index); - -s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr); -s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr); - -s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq); -s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq); -s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq); -s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq); -s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw); -s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, - u32 vind, bool vlan_on); -s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, - bool vlan_on, bool *vfta_changed); -s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw); -s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan); - -s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, - bool *link_up, bool link_up_wait_to_complete); - -s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix, - u16 *wwpn_prefix); - -s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs); -void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf); -void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf); -s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps); -void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom, - int strategy); -s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, - u8 build, u8 ver); -void ixgbe_clear_tx_pending(struct ixgbe_hw *hw); - -#define IXGBE_I2C_THERMAL_SENSOR_ADDR 0xF8 -#define IXGBE_EMC_INTERNAL_DATA 0x00 -#define IXGBE_EMC_INTERNAL_THERM_LIMIT 0x20 -#define IXGBE_EMC_DIODE1_DATA 0x01 -#define IXGBE_EMC_DIODE1_THERM_LIMIT 0x19 -#define IXGBE_EMC_DIODE2_DATA 0x23 -#define IXGBE_EMC_DIODE2_THERM_LIMIT 0x1A -#define IXGBE_EMC_DIODE3_DATA 0x2A -#define IXGBE_EMC_DIODE3_THERM_LIMIT 0x30 - -s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw); -s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw); -#endif /* IXGBE_COMMON */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h deleted file mode 100644 index e9a099d5..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h +++ /dev/null @@ -1,153 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_DCB_H_ -#define _IXGBE_DCB_H_ - - -#include "ixgbe_type.h" - -/* DCB defines */ -/* DCB credit calculation defines */ -#define IXGBE_DCB_CREDIT_QUANTUM 64 -#define IXGBE_DCB_MAX_CREDIT_REFILL 200 /* 200 * 64B = 12800B */ -#define IXGBE_DCB_MAX_TSO_SIZE (32 * 1024) /* Max TSO pkt size in DCB*/ -#define IXGBE_DCB_MAX_CREDIT (2 * IXGBE_DCB_MAX_CREDIT_REFILL) - -/* 513 for 32KB TSO packet */ -#define IXGBE_DCB_MIN_TSO_CREDIT \ - ((IXGBE_DCB_MAX_TSO_SIZE / IXGBE_DCB_CREDIT_QUANTUM) + 1) - -/* DCB configuration defines */ -#define IXGBE_DCB_MAX_USER_PRIORITY 8 -#define IXGBE_DCB_MAX_BW_GROUP 8 -#define IXGBE_DCB_BW_PERCENT 100 - -#define IXGBE_DCB_TX_CONFIG 0 -#define IXGBE_DCB_RX_CONFIG 1 - -/* DCB capability defines */ -#define IXGBE_DCB_PG_SUPPORT 0x00000001 -#define IXGBE_DCB_PFC_SUPPORT 0x00000002 -#define IXGBE_DCB_BCN_SUPPORT 0x00000004 -#define IXGBE_DCB_UP2TC_SUPPORT 0x00000008 -#define IXGBE_DCB_GSP_SUPPORT 0x00000010 - -struct ixgbe_dcb_support { - u32 capabilities; /* DCB capabilities */ - - /* Each bit represents a number of TCs configurable in the hw. - * If 8 traffic classes can be configured, the value is 0x80. */ - u8 traffic_classes; - u8 pfc_traffic_classes; -}; - -enum ixgbe_dcb_tsa { - ixgbe_dcb_tsa_ets = 0, - ixgbe_dcb_tsa_group_strict_cee, - ixgbe_dcb_tsa_strict -}; - -/* Traffic class bandwidth allocation per direction */ -struct ixgbe_dcb_tc_path { - u8 bwg_id; /* Bandwidth Group (BWG) ID */ - u8 bwg_percent; /* % of BWG's bandwidth */ - u8 link_percent; /* % of link bandwidth */ - u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */ - u16 data_credits_refill; /* Credit refill amount in 64B granularity */ - u16 data_credits_max; /* Max credits for a configured packet buffer - * in 64B granularity.*/ - enum ixgbe_dcb_tsa tsa; /* Link or Group Strict Priority */ -}; - -enum ixgbe_dcb_pfc { - ixgbe_dcb_pfc_disabled = 0, - ixgbe_dcb_pfc_enabled, - ixgbe_dcb_pfc_enabled_txonly, - ixgbe_dcb_pfc_enabled_rxonly -}; - -/* Traffic class configuration */ -struct ixgbe_dcb_tc_config { - struct ixgbe_dcb_tc_path path[2]; /* One each for Tx/Rx */ - enum ixgbe_dcb_pfc pfc; /* Class based flow control setting */ - - u16 desc_credits_max; /* For Tx Descriptor arbitration */ - u8 tc; /* Traffic class (TC) */ -}; - -enum ixgbe_dcb_pba { - /* PBA[0-7] each use 64KB FIFO */ - ixgbe_dcb_pba_equal = PBA_STRATEGY_EQUAL, - /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */ - ixgbe_dcb_pba_80_48 = PBA_STRATEGY_WEIGHTED -}; - -struct ixgbe_dcb_num_tcs { - u8 pg_tcs; - u8 pfc_tcs; -}; - -struct ixgbe_dcb_config { - struct ixgbe_dcb_tc_config tc_config[IXGBE_DCB_MAX_TRAFFIC_CLASS]; - struct ixgbe_dcb_support support; - struct ixgbe_dcb_num_tcs num_tcs; - u8 bw_percentage[2][IXGBE_DCB_MAX_BW_GROUP]; /* One each for Tx/Rx */ - bool pfc_mode_enable; - bool round_robin_enable; - - enum ixgbe_dcb_pba rx_pba_cfg; - - u32 dcb_cfg_version; /* Not used...OS-specific? */ - u32 link_speed; /* For bandwidth allocation validation purpose */ - bool vt_mode; -}; - -/* DCB driver APIs */ - -/* DCB rule checking */ -s32 ixgbe_dcb_check_config_cee(struct ixgbe_dcb_config *); - -/* DCB credits calculation */ -s32 ixgbe_dcb_calculate_tc_credits(u8 *, u16 *, u16 *, int); -s32 ixgbe_dcb_calculate_tc_credits_cee(struct ixgbe_hw *, - struct ixgbe_dcb_config *, u32, u8); - -/* DCB PFC */ -s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, u8, u8 *); -s32 ixgbe_dcb_config_pfc_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *); - -/* DCB stats */ -s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *); -s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); -s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); - -/* DCB config arbiters */ -s32 ixgbe_dcb_config_tx_desc_arbiter_cee(struct ixgbe_hw *, - struct ixgbe_dcb_config *); -s32 ixgbe_dcb_config_tx_data_arbiter_cee(struct ixgbe_hw *, - struct ixgbe_dcb_config *); -s32 ixgbe_dcb_config_rx_arbiter_cee(struct ixgbe_hw *, - struct ixgbe_dcb_config *); - -/* DCB unpack routines */ -void ixgbe_dcb_unpack_pfc_cee(struct ixgbe_dcb_config *, u8 *, u8 *); -void ixgbe_dcb_unpack_refill_cee(struct ixgbe_dcb_config *, int, u16 *); -void ixgbe_dcb_unpack_max_cee(struct ixgbe_dcb_config *, u16 *); -void ixgbe_dcb_unpack_bwgid_cee(struct ixgbe_dcb_config *, int, u8 *); -void ixgbe_dcb_unpack_tsa_cee(struct ixgbe_dcb_config *, int, u8 *); -void ixgbe_dcb_unpack_map_cee(struct ixgbe_dcb_config *, int, u8 *); - -/* DCB initialization */ -s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, u16 *, u16 *, u8 *, u8 *, u8 *); -s32 ixgbe_dcb_hw_config_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *); -#endif /* _IXGBE_DCB_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c deleted file mode 100644 index 44cdc9f2..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c +++ /dev/null @@ -1,2886 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/* ethtool support for ixgbe */ - -#include -#include -#include -#include -#include -#include -#include -#ifdef SIOCETHTOOL -#include - -#include "ixgbe.h" - -#ifndef ETH_GSTRING_LEN -#define ETH_GSTRING_LEN 32 -#endif - -#define IXGBE_ALL_RAR_ENTRIES 16 - -#ifdef ETHTOOL_OPS_COMPAT -#include "kcompat_ethtool.c" -#endif -#ifdef ETHTOOL_GSTATS -struct ixgbe_stats { - char stat_string[ETH_GSTRING_LEN]; - int sizeof_stat; - int stat_offset; -}; - -#define IXGBE_NETDEV_STAT(_net_stat) { \ - .stat_string = #_net_stat, \ - .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \ - .stat_offset = offsetof(struct net_device_stats, _net_stat) \ -} -static const struct ixgbe_stats ixgbe_gstrings_net_stats[] = { - IXGBE_NETDEV_STAT(rx_packets), - IXGBE_NETDEV_STAT(tx_packets), - IXGBE_NETDEV_STAT(rx_bytes), - IXGBE_NETDEV_STAT(tx_bytes), - IXGBE_NETDEV_STAT(rx_errors), - IXGBE_NETDEV_STAT(tx_errors), - IXGBE_NETDEV_STAT(rx_dropped), - IXGBE_NETDEV_STAT(tx_dropped), - IXGBE_NETDEV_STAT(multicast), - IXGBE_NETDEV_STAT(collisions), - IXGBE_NETDEV_STAT(rx_over_errors), - IXGBE_NETDEV_STAT(rx_crc_errors), - IXGBE_NETDEV_STAT(rx_frame_errors), - IXGBE_NETDEV_STAT(rx_fifo_errors), - IXGBE_NETDEV_STAT(rx_missed_errors), - IXGBE_NETDEV_STAT(tx_aborted_errors), - IXGBE_NETDEV_STAT(tx_carrier_errors), - IXGBE_NETDEV_STAT(tx_fifo_errors), - IXGBE_NETDEV_STAT(tx_heartbeat_errors), -}; - -#define IXGBE_STAT(_name, _stat) { \ - .stat_string = _name, \ - .sizeof_stat = FIELD_SIZEOF(struct ixgbe_adapter, _stat), \ - .stat_offset = offsetof(struct ixgbe_adapter, _stat) \ -} -static struct ixgbe_stats ixgbe_gstrings_stats[] = { - IXGBE_STAT("rx_pkts_nic", stats.gprc), - IXGBE_STAT("tx_pkts_nic", stats.gptc), - IXGBE_STAT("rx_bytes_nic", stats.gorc), - IXGBE_STAT("tx_bytes_nic", stats.gotc), - IXGBE_STAT("lsc_int", lsc_int), - IXGBE_STAT("tx_busy", tx_busy), - IXGBE_STAT("non_eop_descs", non_eop_descs), -#ifndef CONFIG_IXGBE_NAPI - IXGBE_STAT("rx_dropped_backlog", rx_dropped_backlog), -#endif - IXGBE_STAT("broadcast", stats.bprc), - IXGBE_STAT("rx_no_buffer_count", stats.rnbc[0]) , - IXGBE_STAT("tx_timeout_count", tx_timeout_count), - IXGBE_STAT("tx_restart_queue", restart_queue), - IXGBE_STAT("rx_long_length_errors", stats.roc), - IXGBE_STAT("rx_short_length_errors", stats.ruc), - IXGBE_STAT("tx_flow_control_xon", stats.lxontxc), - IXGBE_STAT("rx_flow_control_xon", stats.lxonrxc), - IXGBE_STAT("tx_flow_control_xoff", stats.lxofftxc), - IXGBE_STAT("rx_flow_control_xoff", stats.lxoffrxc), - IXGBE_STAT("rx_csum_offload_errors", hw_csum_rx_error), - IXGBE_STAT("alloc_rx_page_failed", alloc_rx_page_failed), - IXGBE_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed), -#ifndef IXGBE_NO_LRO - IXGBE_STAT("lro_aggregated", lro_stats.coal), - IXGBE_STAT("lro_flushed", lro_stats.flushed), -#endif /* IXGBE_NO_LRO */ - IXGBE_STAT("rx_no_dma_resources", hw_rx_no_dma_resources), - IXGBE_STAT("hw_rsc_aggregated", rsc_total_count), - IXGBE_STAT("hw_rsc_flushed", rsc_total_flush), -#ifdef HAVE_TX_MQ - IXGBE_STAT("fdir_match", stats.fdirmatch), - IXGBE_STAT("fdir_miss", stats.fdirmiss), - IXGBE_STAT("fdir_overflow", fdir_overflow), -#endif /* HAVE_TX_MQ */ -#ifdef IXGBE_FCOE - IXGBE_STAT("fcoe_bad_fccrc", stats.fccrc), - IXGBE_STAT("fcoe_last_errors", stats.fclast), - IXGBE_STAT("rx_fcoe_dropped", stats.fcoerpdc), - IXGBE_STAT("rx_fcoe_packets", stats.fcoeprc), - IXGBE_STAT("rx_fcoe_dwords", stats.fcoedwrc), - IXGBE_STAT("fcoe_noddp", stats.fcoe_noddp), - IXGBE_STAT("fcoe_noddp_ext_buff", stats.fcoe_noddp_ext_buff), - IXGBE_STAT("tx_fcoe_packets", stats.fcoeptc), - IXGBE_STAT("tx_fcoe_dwords", stats.fcoedwtc), -#endif /* IXGBE_FCOE */ - IXGBE_STAT("os2bmc_rx_by_bmc", stats.o2bgptc), - IXGBE_STAT("os2bmc_tx_by_bmc", stats.b2ospc), - IXGBE_STAT("os2bmc_tx_by_host", stats.o2bspc), - IXGBE_STAT("os2bmc_rx_by_host", stats.b2ogprc), -}; - -#define IXGBE_QUEUE_STATS_LEN \ - ((((struct ixgbe_adapter *)netdev_priv(netdev))->num_tx_queues + \ - ((struct ixgbe_adapter *)netdev_priv(netdev))->num_rx_queues) * \ - (sizeof(struct ixgbe_queue_stats) / sizeof(u64))) -#define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats) -#define IXGBE_NETDEV_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_net_stats) -#define IXGBE_PB_STATS_LEN ( \ - (((struct ixgbe_adapter *)netdev_priv(netdev))->flags & \ - IXGBE_FLAG_DCB_ENABLED) ? \ - (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \ - sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \ - sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \ - sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \ - / sizeof(u64) : 0) -#define IXGBE_VF_STATS_LEN \ - ((((struct ixgbe_adapter *)netdev_priv(netdev))->num_vfs) * \ - (sizeof(struct vf_stats) / sizeof(u64))) -#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \ - IXGBE_NETDEV_STATS_LEN + \ - IXGBE_PB_STATS_LEN + \ - IXGBE_QUEUE_STATS_LEN + \ - IXGBE_VF_STATS_LEN) - -#endif /* ETHTOOL_GSTATS */ -#ifdef ETHTOOL_TEST -static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { - "Register test (offline)", "Eeprom test (offline)", - "Interrupt test (offline)", "Loopback test (offline)", - "Link test (on/offline)" -}; -#define IXGBE_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) -#endif /* ETHTOOL_TEST */ - -int ixgbe_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - u32 link_speed = 0; - bool link_up; - - ecmd->supported = SUPPORTED_10000baseT_Full; - ecmd->autoneg = AUTONEG_ENABLE; - ecmd->transceiver = XCVR_EXTERNAL; - if ((hw->phy.media_type == ixgbe_media_type_copper) || - (hw->phy.multispeed_fiber)) { - ecmd->supported |= (SUPPORTED_1000baseT_Full | - SUPPORTED_Autoneg); - switch (hw->mac.type) { - case ixgbe_mac_X540: - ecmd->supported |= SUPPORTED_100baseT_Full; - break; - default: - break; - } - - ecmd->advertising = ADVERTISED_Autoneg; - if (hw->phy.autoneg_advertised) { - if (hw->phy.autoneg_advertised & - IXGBE_LINK_SPEED_100_FULL) - ecmd->advertising |= ADVERTISED_100baseT_Full; - if (hw->phy.autoneg_advertised & - IXGBE_LINK_SPEED_10GB_FULL) - ecmd->advertising |= ADVERTISED_10000baseT_Full; - if (hw->phy.autoneg_advertised & - IXGBE_LINK_SPEED_1GB_FULL) - ecmd->advertising |= ADVERTISED_1000baseT_Full; - } else { - /* - * Default advertised modes in case - * phy.autoneg_advertised isn't set. - */ - ecmd->advertising |= (ADVERTISED_10000baseT_Full | - ADVERTISED_1000baseT_Full); - if (hw->mac.type == ixgbe_mac_X540) - ecmd->advertising |= ADVERTISED_100baseT_Full; - } - - if (hw->phy.media_type == ixgbe_media_type_copper) { - ecmd->supported |= SUPPORTED_TP; - ecmd->advertising |= ADVERTISED_TP; - ecmd->port = PORT_TP; - } else { - ecmd->supported |= SUPPORTED_FIBRE; - ecmd->advertising |= ADVERTISED_FIBRE; - ecmd->port = PORT_FIBRE; - } - } else if (hw->phy.media_type == ixgbe_media_type_backplane) { - /* Set as FIBRE until SERDES defined in kernel */ - if (hw->device_id == IXGBE_DEV_ID_82598_BX) { - ecmd->supported = (SUPPORTED_1000baseT_Full | - SUPPORTED_FIBRE); - ecmd->advertising = (ADVERTISED_1000baseT_Full | - ADVERTISED_FIBRE); - ecmd->port = PORT_FIBRE; - ecmd->autoneg = AUTONEG_DISABLE; - } else if ((hw->device_id == IXGBE_DEV_ID_82599_COMBO_BACKPLANE) - || (hw->device_id == IXGBE_DEV_ID_82599_KX4_MEZZ)) { - ecmd->supported |= (SUPPORTED_1000baseT_Full | - SUPPORTED_Autoneg | - SUPPORTED_FIBRE); - ecmd->advertising = (ADVERTISED_10000baseT_Full | - ADVERTISED_1000baseT_Full | - ADVERTISED_Autoneg | - ADVERTISED_FIBRE); - ecmd->port = PORT_FIBRE; - } else { - ecmd->supported |= (SUPPORTED_1000baseT_Full | - SUPPORTED_FIBRE); - ecmd->advertising = (ADVERTISED_10000baseT_Full | - ADVERTISED_1000baseT_Full | - ADVERTISED_FIBRE); - ecmd->port = PORT_FIBRE; - } - } else { - ecmd->supported |= SUPPORTED_FIBRE; - ecmd->advertising = (ADVERTISED_10000baseT_Full | - ADVERTISED_FIBRE); - ecmd->port = PORT_FIBRE; - ecmd->autoneg = AUTONEG_DISABLE; - } - -#ifdef HAVE_ETHTOOL_SFP_DISPLAY_PORT - /* Get PHY type */ - switch (adapter->hw.phy.type) { - case ixgbe_phy_tn: - case ixgbe_phy_aq: - case ixgbe_phy_cu_unknown: - /* Copper 10G-BASET */ - ecmd->port = PORT_TP; - break; - case ixgbe_phy_qt: - ecmd->port = PORT_FIBRE; - break; - case ixgbe_phy_nl: - case ixgbe_phy_sfp_passive_tyco: - case ixgbe_phy_sfp_passive_unknown: - case ixgbe_phy_sfp_ftl: - case ixgbe_phy_sfp_avago: - case ixgbe_phy_sfp_intel: - case ixgbe_phy_sfp_unknown: - switch (adapter->hw.phy.sfp_type) { - /* SFP+ devices, further checking needed */ - case ixgbe_sfp_type_da_cu: - case ixgbe_sfp_type_da_cu_core0: - case ixgbe_sfp_type_da_cu_core1: - ecmd->port = PORT_DA; - break; - case ixgbe_sfp_type_sr: - case ixgbe_sfp_type_lr: - case ixgbe_sfp_type_srlr_core0: - case ixgbe_sfp_type_srlr_core1: - ecmd->port = PORT_FIBRE; - break; - case ixgbe_sfp_type_not_present: - ecmd->port = PORT_NONE; - break; - case ixgbe_sfp_type_1g_cu_core0: - case ixgbe_sfp_type_1g_cu_core1: - ecmd->port = PORT_TP; - ecmd->supported = SUPPORTED_TP; - ecmd->advertising = (ADVERTISED_1000baseT_Full | - ADVERTISED_TP); - break; - case ixgbe_sfp_type_1g_sx_core0: - case ixgbe_sfp_type_1g_sx_core1: - ecmd->port = PORT_FIBRE; - ecmd->supported = SUPPORTED_FIBRE; - ecmd->advertising = (ADVERTISED_1000baseT_Full | - ADVERTISED_FIBRE); - break; - case ixgbe_sfp_type_unknown: - default: - ecmd->port = PORT_OTHER; - break; - } - break; - case ixgbe_phy_xaui: - ecmd->port = PORT_NONE; - break; - case ixgbe_phy_unknown: - case ixgbe_phy_generic: - case ixgbe_phy_sfp_unsupported: - default: - ecmd->port = PORT_OTHER; - break; - } -#endif - - if (!in_interrupt()) { - hw->mac.ops.check_link(hw, &link_speed, &link_up, false); - } else { - /* - * this case is a special workaround for RHEL5 bonding - * that calls this routine from interrupt context - */ - link_speed = adapter->link_speed; - link_up = adapter->link_up; - } - - if (link_up) { - switch (link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ecmd->speed = SPEED_10000; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - ecmd->speed = SPEED_1000; - break; - case IXGBE_LINK_SPEED_100_FULL: - ecmd->speed = SPEED_100; - break; - default: - break; - } - ecmd->duplex = DUPLEX_FULL; - } else { - ecmd->speed = -1; - ecmd->duplex = -1; - } - - return 0; -} - -static int ixgbe_set_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - u32 advertised, old; - s32 err = 0; - - if ((hw->phy.media_type == ixgbe_media_type_copper) || - (hw->phy.multispeed_fiber)) { - /* - * this function does not support duplex forcing, but can - * limit the advertising of the adapter to the specified speed - */ - if (ecmd->autoneg == AUTONEG_DISABLE) - return -EINVAL; - - if (ecmd->advertising & ~ecmd->supported) - return -EINVAL; - - old = hw->phy.autoneg_advertised; - advertised = 0; - if (ecmd->advertising & ADVERTISED_10000baseT_Full) - advertised |= IXGBE_LINK_SPEED_10GB_FULL; - - if (ecmd->advertising & ADVERTISED_1000baseT_Full) - advertised |= IXGBE_LINK_SPEED_1GB_FULL; - - if (ecmd->advertising & ADVERTISED_100baseT_Full) - advertised |= IXGBE_LINK_SPEED_100_FULL; - - if (old == advertised) - return err; - /* this sets the link speed and restarts auto-neg */ - hw->mac.autotry_restart = true; - err = hw->mac.ops.setup_link(hw, advertised, true, true); - if (err) { - e_info(probe, "setup link failed with code %d\n", err); - hw->mac.ops.setup_link(hw, old, true, true); - } - } - return err; -} - -static void ixgbe_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pause) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - - if (hw->fc.disable_fc_autoneg) - pause->autoneg = 0; - else - pause->autoneg = 1; - - if (hw->fc.current_mode == ixgbe_fc_rx_pause) { - pause->rx_pause = 1; - } else if (hw->fc.current_mode == ixgbe_fc_tx_pause) { - pause->tx_pause = 1; - } else if (hw->fc.current_mode == ixgbe_fc_full) { - pause->rx_pause = 1; - pause->tx_pause = 1; - } -} - -static int ixgbe_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pause) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - struct ixgbe_fc_info fc = hw->fc; - - /* 82598 does no support link flow control with DCB enabled */ - if ((hw->mac.type == ixgbe_mac_82598EB) && - (adapter->flags & IXGBE_FLAG_DCB_ENABLED)) - return -EINVAL; - - fc.disable_fc_autoneg = (pause->autoneg != AUTONEG_ENABLE); - - if ((pause->rx_pause && pause->tx_pause) || pause->autoneg) - fc.requested_mode = ixgbe_fc_full; - else if (pause->rx_pause) - fc.requested_mode = ixgbe_fc_rx_pause; - else if (pause->tx_pause) - fc.requested_mode = ixgbe_fc_tx_pause; - else - fc.requested_mode = ixgbe_fc_none; - - /* if the thing changed then we'll update and use new autoneg */ - if (memcmp(&fc, &hw->fc, sizeof(struct ixgbe_fc_info))) { - hw->fc = fc; - if (netif_running(netdev)) - ixgbe_reinit_locked(adapter); - else - ixgbe_reset(adapter); - } - - return 0; -} - -static u32 ixgbe_get_msglevel(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - return adapter->msg_enable; -} - -static void ixgbe_set_msglevel(struct net_device *netdev, u32 data) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - adapter->msg_enable = data; -} - -static int ixgbe_get_regs_len(struct net_device *netdev) -{ -#define IXGBE_REGS_LEN 1129 - return IXGBE_REGS_LEN * sizeof(u32); -} - -#define IXGBE_GET_STAT(_A_, _R_) (_A_->stats._R_) - - -static void ixgbe_get_regs(struct net_device *netdev, struct ethtool_regs *regs, - void *p) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - u32 *regs_buff = p; - u8 i; - - printk(KERN_DEBUG "ixgbe_get_regs_1\n"); - memset(p, 0, IXGBE_REGS_LEN * sizeof(u32)); - printk(KERN_DEBUG "ixgbe_get_regs_2 0x%p\n", hw->hw_addr); - - regs->version = (1 << 24) | hw->revision_id << 16 | hw->device_id; - - /* General Registers */ - regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_CTRL); - printk(KERN_DEBUG "ixgbe_get_regs_3\n"); - regs_buff[1] = IXGBE_READ_REG(hw, IXGBE_STATUS); - regs_buff[2] = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); - regs_buff[3] = IXGBE_READ_REG(hw, IXGBE_ESDP); - regs_buff[4] = IXGBE_READ_REG(hw, IXGBE_EODSDP); - regs_buff[5] = IXGBE_READ_REG(hw, IXGBE_LEDCTL); - regs_buff[6] = IXGBE_READ_REG(hw, IXGBE_FRTIMER); - regs_buff[7] = IXGBE_READ_REG(hw, IXGBE_TCPTIMER); - - printk(KERN_DEBUG "ixgbe_get_regs_4\n"); - - /* NVM Register */ - regs_buff[8] = IXGBE_READ_REG(hw, IXGBE_EEC); - regs_buff[9] = IXGBE_READ_REG(hw, IXGBE_EERD); - regs_buff[10] = IXGBE_READ_REG(hw, IXGBE_FLA); - regs_buff[11] = IXGBE_READ_REG(hw, IXGBE_EEMNGCTL); - regs_buff[12] = IXGBE_READ_REG(hw, IXGBE_EEMNGDATA); - regs_buff[13] = IXGBE_READ_REG(hw, IXGBE_FLMNGCTL); - regs_buff[14] = IXGBE_READ_REG(hw, IXGBE_FLMNGDATA); - regs_buff[15] = IXGBE_READ_REG(hw, IXGBE_FLMNGCNT); - regs_buff[16] = IXGBE_READ_REG(hw, IXGBE_FLOP); - regs_buff[17] = IXGBE_READ_REG(hw, IXGBE_GRC); - - /* Interrupt */ - /* don't read EICR because it can clear interrupt causes, instead - * read EICS which is a shadow but doesn't clear EICR */ - regs_buff[18] = IXGBE_READ_REG(hw, IXGBE_EICS); - regs_buff[19] = IXGBE_READ_REG(hw, IXGBE_EICS); - regs_buff[20] = IXGBE_READ_REG(hw, IXGBE_EIMS); - regs_buff[21] = IXGBE_READ_REG(hw, IXGBE_EIMC); - regs_buff[22] = IXGBE_READ_REG(hw, IXGBE_EIAC); - regs_buff[23] = IXGBE_READ_REG(hw, IXGBE_EIAM); - regs_buff[24] = IXGBE_READ_REG(hw, IXGBE_EITR(0)); - regs_buff[25] = IXGBE_READ_REG(hw, IXGBE_IVAR(0)); - regs_buff[26] = IXGBE_READ_REG(hw, IXGBE_MSIXT); - regs_buff[27] = IXGBE_READ_REG(hw, IXGBE_MSIXPBA); - regs_buff[28] = IXGBE_READ_REG(hw, IXGBE_PBACL(0)); - regs_buff[29] = IXGBE_READ_REG(hw, IXGBE_GPIE); - - /* Flow Control */ - regs_buff[30] = IXGBE_READ_REG(hw, IXGBE_PFCTOP); - regs_buff[31] = IXGBE_READ_REG(hw, IXGBE_FCTTV(0)); - regs_buff[32] = IXGBE_READ_REG(hw, IXGBE_FCTTV(1)); - regs_buff[33] = IXGBE_READ_REG(hw, IXGBE_FCTTV(2)); - regs_buff[34] = IXGBE_READ_REG(hw, IXGBE_FCTTV(3)); - for (i = 0; i < 8; i++) { - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL(i)); - regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH(i)); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - regs_buff[35 + i] = IXGBE_READ_REG(hw, - IXGBE_FCRTL_82599(i)); - regs_buff[43 + i] = IXGBE_READ_REG(hw, - IXGBE_FCRTH_82599(i)); - break; - default: - break; - } - } - regs_buff[51] = IXGBE_READ_REG(hw, IXGBE_FCRTV); - regs_buff[52] = IXGBE_READ_REG(hw, IXGBE_TFCS); - - /* Receive DMA */ - for (i = 0; i < 64; i++) - regs_buff[53 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i)); - for (i = 0; i < 64; i++) - regs_buff[117 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i)); - for (i = 0; i < 64; i++) - regs_buff[181 + i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i)); - for (i = 0; i < 64; i++) - regs_buff[245 + i] = IXGBE_READ_REG(hw, IXGBE_RDH(i)); - for (i = 0; i < 64; i++) - regs_buff[309 + i] = IXGBE_READ_REG(hw, IXGBE_RDT(i)); - for (i = 0; i < 64; i++) - regs_buff[373 + i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); - for (i = 0; i < 16; i++) - regs_buff[437 + i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); - for (i = 0; i < 16; i++) - regs_buff[453 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); - regs_buff[469] = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - for (i = 0; i < 8; i++) - regs_buff[470 + i] = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)); - regs_buff[478] = IXGBE_READ_REG(hw, IXGBE_RXCTRL); - regs_buff[479] = IXGBE_READ_REG(hw, IXGBE_DROPEN); - - /* Receive */ - regs_buff[480] = IXGBE_READ_REG(hw, IXGBE_RXCSUM); - regs_buff[481] = IXGBE_READ_REG(hw, IXGBE_RFCTL); - for (i = 0; i < 16; i++) - regs_buff[482 + i] = IXGBE_READ_REG(hw, IXGBE_RAL(i)); - for (i = 0; i < 16; i++) - regs_buff[498 + i] = IXGBE_READ_REG(hw, IXGBE_RAH(i)); - regs_buff[514] = IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)); - regs_buff[515] = IXGBE_READ_REG(hw, IXGBE_FCTRL); - regs_buff[516] = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - regs_buff[517] = IXGBE_READ_REG(hw, IXGBE_MCSTCTRL); - regs_buff[518] = IXGBE_READ_REG(hw, IXGBE_MRQC); - regs_buff[519] = IXGBE_READ_REG(hw, IXGBE_VMD_CTL); - for (i = 0; i < 8; i++) - regs_buff[520 + i] = IXGBE_READ_REG(hw, IXGBE_IMIR(i)); - for (i = 0; i < 8; i++) - regs_buff[528 + i] = IXGBE_READ_REG(hw, IXGBE_IMIREXT(i)); - regs_buff[536] = IXGBE_READ_REG(hw, IXGBE_IMIRVP); - - /* Transmit */ - for (i = 0; i < 32; i++) - regs_buff[537 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i)); - for (i = 0; i < 32; i++) - regs_buff[569 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i)); - for (i = 0; i < 32; i++) - regs_buff[601 + i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i)); - for (i = 0; i < 32; i++) - regs_buff[633 + i] = IXGBE_READ_REG(hw, IXGBE_TDH(i)); - for (i = 0; i < 32; i++) - regs_buff[665 + i] = IXGBE_READ_REG(hw, IXGBE_TDT(i)); - for (i = 0; i < 32; i++) - regs_buff[697 + i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i)); - for (i = 0; i < 32; i++) - regs_buff[729 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAL(i)); - for (i = 0; i < 32; i++) - regs_buff[761 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAH(i)); - regs_buff[793] = IXGBE_READ_REG(hw, IXGBE_DTXCTL); - for (i = 0; i < 16; i++) - regs_buff[794 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); - regs_buff[810] = IXGBE_READ_REG(hw, IXGBE_TIPG); - for (i = 0; i < 8; i++) - regs_buff[811 + i] = IXGBE_READ_REG(hw, IXGBE_TXPBSIZE(i)); - regs_buff[819] = IXGBE_READ_REG(hw, IXGBE_MNGTXMAP); - - /* Wake Up */ - regs_buff[820] = IXGBE_READ_REG(hw, IXGBE_WUC); - regs_buff[821] = IXGBE_READ_REG(hw, IXGBE_WUFC); - regs_buff[822] = IXGBE_READ_REG(hw, IXGBE_WUS); - regs_buff[823] = IXGBE_READ_REG(hw, IXGBE_IPAV); - regs_buff[824] = IXGBE_READ_REG(hw, IXGBE_IP4AT); - regs_buff[825] = IXGBE_READ_REG(hw, IXGBE_IP6AT); - regs_buff[826] = IXGBE_READ_REG(hw, IXGBE_WUPL); - regs_buff[827] = IXGBE_READ_REG(hw, IXGBE_WUPM); - regs_buff[828] = IXGBE_READ_REG(hw, IXGBE_FHFT(0)); - - /* DCB */ - regs_buff[829] = IXGBE_READ_REG(hw, IXGBE_RMCS); - regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_DPMCS); - regs_buff[831] = IXGBE_READ_REG(hw, IXGBE_PDPMCS); - regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RUPPBMR); - for (i = 0; i < 8; i++) - regs_buff[833 + i] = IXGBE_READ_REG(hw, IXGBE_RT2CR(i)); - for (i = 0; i < 8; i++) - regs_buff[841 + i] = IXGBE_READ_REG(hw, IXGBE_RT2SR(i)); - for (i = 0; i < 8; i++) - regs_buff[849 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCCR(i)); - for (i = 0; i < 8; i++) - regs_buff[857 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCSR(i)); - for (i = 0; i < 8; i++) - regs_buff[865 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCCR(i)); - for (i = 0; i < 8; i++) - regs_buff[873 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCSR(i)); - - /* Statistics */ - regs_buff[881] = IXGBE_GET_STAT(adapter, crcerrs); - regs_buff[882] = IXGBE_GET_STAT(adapter, illerrc); - regs_buff[883] = IXGBE_GET_STAT(adapter, errbc); - regs_buff[884] = IXGBE_GET_STAT(adapter, mspdc); - for (i = 0; i < 8; i++) - regs_buff[885 + i] = IXGBE_GET_STAT(adapter, mpc[i]); - regs_buff[893] = IXGBE_GET_STAT(adapter, mlfc); - regs_buff[894] = IXGBE_GET_STAT(adapter, mrfc); - regs_buff[895] = IXGBE_GET_STAT(adapter, rlec); - regs_buff[896] = IXGBE_GET_STAT(adapter, lxontxc); - regs_buff[897] = IXGBE_GET_STAT(adapter, lxonrxc); - regs_buff[898] = IXGBE_GET_STAT(adapter, lxofftxc); - regs_buff[899] = IXGBE_GET_STAT(adapter, lxoffrxc); - for (i = 0; i < 8; i++) - regs_buff[900 + i] = IXGBE_GET_STAT(adapter, pxontxc[i]); - for (i = 0; i < 8; i++) - regs_buff[908 + i] = IXGBE_GET_STAT(adapter, pxonrxc[i]); - for (i = 0; i < 8; i++) - regs_buff[916 + i] = IXGBE_GET_STAT(adapter, pxofftxc[i]); - for (i = 0; i < 8; i++) - regs_buff[924 + i] = IXGBE_GET_STAT(adapter, pxoffrxc[i]); - regs_buff[932] = IXGBE_GET_STAT(adapter, prc64); - regs_buff[933] = IXGBE_GET_STAT(adapter, prc127); - regs_buff[934] = IXGBE_GET_STAT(adapter, prc255); - regs_buff[935] = IXGBE_GET_STAT(adapter, prc511); - regs_buff[936] = IXGBE_GET_STAT(adapter, prc1023); - regs_buff[937] = IXGBE_GET_STAT(adapter, prc1522); - regs_buff[938] = IXGBE_GET_STAT(adapter, gprc); - regs_buff[939] = IXGBE_GET_STAT(adapter, bprc); - regs_buff[940] = IXGBE_GET_STAT(adapter, mprc); - regs_buff[941] = IXGBE_GET_STAT(adapter, gptc); - regs_buff[942] = IXGBE_GET_STAT(adapter, gorc); - regs_buff[944] = IXGBE_GET_STAT(adapter, gotc); - for (i = 0; i < 8; i++) - regs_buff[946 + i] = IXGBE_GET_STAT(adapter, rnbc[i]); - regs_buff[954] = IXGBE_GET_STAT(adapter, ruc); - regs_buff[955] = IXGBE_GET_STAT(adapter, rfc); - regs_buff[956] = IXGBE_GET_STAT(adapter, roc); - regs_buff[957] = IXGBE_GET_STAT(adapter, rjc); - regs_buff[958] = IXGBE_GET_STAT(adapter, mngprc); - regs_buff[959] = IXGBE_GET_STAT(adapter, mngpdc); - regs_buff[960] = IXGBE_GET_STAT(adapter, mngptc); - regs_buff[961] = IXGBE_GET_STAT(adapter, tor); - regs_buff[963] = IXGBE_GET_STAT(adapter, tpr); - regs_buff[964] = IXGBE_GET_STAT(adapter, tpt); - regs_buff[965] = IXGBE_GET_STAT(adapter, ptc64); - regs_buff[966] = IXGBE_GET_STAT(adapter, ptc127); - regs_buff[967] = IXGBE_GET_STAT(adapter, ptc255); - regs_buff[968] = IXGBE_GET_STAT(adapter, ptc511); - regs_buff[969] = IXGBE_GET_STAT(adapter, ptc1023); - regs_buff[970] = IXGBE_GET_STAT(adapter, ptc1522); - regs_buff[971] = IXGBE_GET_STAT(adapter, mptc); - regs_buff[972] = IXGBE_GET_STAT(adapter, bptc); - regs_buff[973] = IXGBE_GET_STAT(adapter, xec); - for (i = 0; i < 16; i++) - regs_buff[974 + i] = IXGBE_GET_STAT(adapter, qprc[i]); - for (i = 0; i < 16; i++) - regs_buff[990 + i] = IXGBE_GET_STAT(adapter, qptc[i]); - for (i = 0; i < 16; i++) - regs_buff[1006 + i] = IXGBE_GET_STAT(adapter, qbrc[i]); - for (i = 0; i < 16; i++) - regs_buff[1022 + i] = IXGBE_GET_STAT(adapter, qbtc[i]); - - /* MAC */ - regs_buff[1038] = IXGBE_READ_REG(hw, IXGBE_PCS1GCFIG); - regs_buff[1039] = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL); - regs_buff[1040] = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); - regs_buff[1041] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG0); - regs_buff[1042] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG1); - regs_buff[1043] = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); - regs_buff[1044] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); - regs_buff[1045] = IXGBE_READ_REG(hw, IXGBE_PCS1GANNP); - regs_buff[1046] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLPNP); - regs_buff[1047] = IXGBE_READ_REG(hw, IXGBE_HLREG0); - regs_buff[1048] = IXGBE_READ_REG(hw, IXGBE_HLREG1); - regs_buff[1049] = IXGBE_READ_REG(hw, IXGBE_PAP); - regs_buff[1050] = IXGBE_READ_REG(hw, IXGBE_MACA); - regs_buff[1051] = IXGBE_READ_REG(hw, IXGBE_APAE); - regs_buff[1052] = IXGBE_READ_REG(hw, IXGBE_ARD); - regs_buff[1053] = IXGBE_READ_REG(hw, IXGBE_AIS); - regs_buff[1054] = IXGBE_READ_REG(hw, IXGBE_MSCA); - regs_buff[1055] = IXGBE_READ_REG(hw, IXGBE_MSRWD); - regs_buff[1056] = IXGBE_READ_REG(hw, IXGBE_MLADD); - regs_buff[1057] = IXGBE_READ_REG(hw, IXGBE_MHADD); - regs_buff[1058] = IXGBE_READ_REG(hw, IXGBE_TREG); - regs_buff[1059] = IXGBE_READ_REG(hw, IXGBE_PCSS1); - regs_buff[1060] = IXGBE_READ_REG(hw, IXGBE_PCSS2); - regs_buff[1061] = IXGBE_READ_REG(hw, IXGBE_XPCSS); - regs_buff[1062] = IXGBE_READ_REG(hw, IXGBE_SERDESC); - regs_buff[1063] = IXGBE_READ_REG(hw, IXGBE_MACS); - regs_buff[1064] = IXGBE_READ_REG(hw, IXGBE_AUTOC); - regs_buff[1065] = IXGBE_READ_REG(hw, IXGBE_LINKS); - regs_buff[1066] = IXGBE_READ_REG(hw, IXGBE_AUTOC2); - regs_buff[1067] = IXGBE_READ_REG(hw, IXGBE_AUTOC3); - regs_buff[1068] = IXGBE_READ_REG(hw, IXGBE_ANLP1); - regs_buff[1069] = IXGBE_READ_REG(hw, IXGBE_ANLP2); - regs_buff[1070] = IXGBE_READ_REG(hw, IXGBE_ATLASCTL); - - /* Diagnostic */ - regs_buff[1071] = IXGBE_READ_REG(hw, IXGBE_RDSTATCTL); - for (i = 0; i < 8; i++) - regs_buff[1072 + i] = IXGBE_READ_REG(hw, IXGBE_RDSTAT(i)); - regs_buff[1080] = IXGBE_READ_REG(hw, IXGBE_RDHMPN); - for (i = 0; i < 4; i++) - regs_buff[1081 + i] = IXGBE_READ_REG(hw, IXGBE_RIC_DW(i)); - regs_buff[1085] = IXGBE_READ_REG(hw, IXGBE_RDPROBE); - regs_buff[1086] = IXGBE_READ_REG(hw, IXGBE_TDSTATCTL); - for (i = 0; i < 8; i++) - regs_buff[1087 + i] = IXGBE_READ_REG(hw, IXGBE_TDSTAT(i)); - regs_buff[1095] = IXGBE_READ_REG(hw, IXGBE_TDHMPN); - for (i = 0; i < 4; i++) - regs_buff[1096 + i] = IXGBE_READ_REG(hw, IXGBE_TIC_DW(i)); - regs_buff[1100] = IXGBE_READ_REG(hw, IXGBE_TDPROBE); - regs_buff[1101] = IXGBE_READ_REG(hw, IXGBE_TXBUFCTRL); - regs_buff[1102] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA0); - regs_buff[1103] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA1); - regs_buff[1104] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA2); - regs_buff[1105] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA3); - regs_buff[1106] = IXGBE_READ_REG(hw, IXGBE_RXBUFCTRL); - regs_buff[1107] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA0); - regs_buff[1108] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA1); - regs_buff[1109] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA2); - regs_buff[1110] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA3); - for (i = 0; i < 8; i++) - regs_buff[1111 + i] = IXGBE_READ_REG(hw, IXGBE_PCIE_DIAG(i)); - regs_buff[1119] = IXGBE_READ_REG(hw, IXGBE_RFVAL); - regs_buff[1120] = IXGBE_READ_REG(hw, IXGBE_MDFTC1); - regs_buff[1121] = IXGBE_READ_REG(hw, IXGBE_MDFTC2); - regs_buff[1122] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO1); - regs_buff[1123] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO2); - regs_buff[1124] = IXGBE_READ_REG(hw, IXGBE_MDFTS); - regs_buff[1125] = IXGBE_READ_REG(hw, IXGBE_PCIEECCCTL); - regs_buff[1126] = IXGBE_READ_REG(hw, IXGBE_PBTXECC); - regs_buff[1127] = IXGBE_READ_REG(hw, IXGBE_PBRXECC); - - /* 82599 X540 specific registers */ - regs_buff[1128] = IXGBE_READ_REG(hw, IXGBE_MFLCN); -} - -static int ixgbe_get_eeprom_len(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - return adapter->hw.eeprom.word_size * 2; -} - -static int ixgbe_get_eeprom(struct net_device *netdev, - struct ethtool_eeprom *eeprom, u8 *bytes) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - u16 *eeprom_buff; - int first_word, last_word, eeprom_len; - int ret_val = 0; - u16 i; - - if (eeprom->len == 0) - return -EINVAL; - - eeprom->magic = hw->vendor_id | (hw->device_id << 16); - - first_word = eeprom->offset >> 1; - last_word = (eeprom->offset + eeprom->len - 1) >> 1; - eeprom_len = last_word - first_word + 1; - - eeprom_buff = kmalloc(sizeof(u16) * eeprom_len, GFP_KERNEL); - if (!eeprom_buff) - return -ENOMEM; - - ret_val = ixgbe_read_eeprom_buffer(hw, first_word, eeprom_len, - eeprom_buff); - - /* Device's eeprom is always little-endian, word addressable */ - for (i = 0; i < eeprom_len; i++) - le16_to_cpus(&eeprom_buff[i]); - - memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len); - kfree(eeprom_buff); - - return ret_val; -} - -static int ixgbe_set_eeprom(struct net_device *netdev, - struct ethtool_eeprom *eeprom, u8 *bytes) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - u16 *eeprom_buff; - void *ptr; - int max_len, first_word, last_word, ret_val = 0; - u16 i; - - if (eeprom->len == 0) - return -EINVAL; - - if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) - return -EINVAL; - - max_len = hw->eeprom.word_size * 2; - - first_word = eeprom->offset >> 1; - last_word = (eeprom->offset + eeprom->len - 1) >> 1; - eeprom_buff = kmalloc(max_len, GFP_KERNEL); - if (!eeprom_buff) - return -ENOMEM; - - ptr = eeprom_buff; - - if (eeprom->offset & 1) { - /* - * need read/modify/write of first changed EEPROM word - * only the second byte of the word is being modified - */ - ret_val = ixgbe_read_eeprom(hw, first_word, &eeprom_buff[0]); - if (ret_val) - goto err; - - ptr++; - } - if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) { - /* - * need read/modify/write of last changed EEPROM word - * only the first byte of the word is being modified - */ - ret_val = ixgbe_read_eeprom(hw, last_word, - &eeprom_buff[last_word - first_word]); - if (ret_val) - goto err; - } - - /* Device's eeprom is always little-endian, word addressable */ - for (i = 0; i < last_word - first_word + 1; i++) - le16_to_cpus(&eeprom_buff[i]); - - memcpy(ptr, bytes, eeprom->len); - - for (i = 0; i < last_word - first_word + 1; i++) - cpu_to_le16s(&eeprom_buff[i]); - - ret_val = ixgbe_write_eeprom_buffer(hw, first_word, - last_word - first_word + 1, - eeprom_buff); - - /* Update the checksum */ - if (ret_val == 0) - ixgbe_update_eeprom_checksum(hw); - -err: - kfree(eeprom_buff); - return ret_val; -} - -static void ixgbe_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - strlcpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver)); - - strlcpy(drvinfo->version, ixgbe_driver_version, - sizeof(drvinfo->version)); - - strlcpy(drvinfo->fw_version, adapter->eeprom_id, - sizeof(drvinfo->fw_version)); - - strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), - sizeof(drvinfo->bus_info)); - - drvinfo->n_stats = IXGBE_STATS_LEN; - drvinfo->testinfo_len = IXGBE_TEST_LEN; - drvinfo->regdump_len = ixgbe_get_regs_len(netdev); -} - -static void ixgbe_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - ring->rx_max_pending = IXGBE_MAX_RXD; - ring->tx_max_pending = IXGBE_MAX_TXD; - ring->rx_mini_max_pending = 0; - ring->rx_jumbo_max_pending = 0; - ring->rx_pending = adapter->rx_ring_count; - ring->tx_pending = adapter->tx_ring_count; - ring->rx_mini_pending = 0; - ring->rx_jumbo_pending = 0; -} - -static int ixgbe_set_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_ring *tx_ring = NULL, *rx_ring = NULL; - u32 new_rx_count, new_tx_count; - int i, err = 0; - - if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) - return -EINVAL; - - new_tx_count = clamp_t(u32, ring->tx_pending, - IXGBE_MIN_TXD, IXGBE_MAX_TXD); - new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE); - - new_rx_count = clamp_t(u32, ring->rx_pending, - IXGBE_MIN_RXD, IXGBE_MAX_RXD); - new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE); - - /* if nothing to do return success */ - if ((new_tx_count == adapter->tx_ring_count) && - (new_rx_count == adapter->rx_ring_count)) - return 0; - - while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - - if (!netif_running(adapter->netdev)) { - for (i = 0; i < adapter->num_tx_queues; i++) - adapter->tx_ring[i]->count = new_tx_count; - for (i = 0; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i]->count = new_rx_count; - adapter->tx_ring_count = new_tx_count; - adapter->rx_ring_count = new_rx_count; - goto clear_reset; - } - - /* alloc updated Tx resources */ - if (new_tx_count != adapter->tx_ring_count) { - tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring)); - if (!tx_ring) { - err = -ENOMEM; - goto clear_reset; - } - - for (i = 0; i < adapter->num_tx_queues; i++) { - /* clone ring and setup updated count */ - tx_ring[i] = *adapter->tx_ring[i]; - tx_ring[i].count = new_tx_count; - err = ixgbe_setup_tx_resources(&tx_ring[i]); - if (err) { - while (i) { - i--; - ixgbe_free_tx_resources(&tx_ring[i]); - } - - vfree(tx_ring); - tx_ring = NULL; - - goto clear_reset; - } - } - } - - /* alloc updated Rx resources */ - if (new_rx_count != adapter->rx_ring_count) { - rx_ring = vmalloc(adapter->num_rx_queues * sizeof(*rx_ring)); - if (!rx_ring) { - err = -ENOMEM; - goto clear_reset; - } - - for (i = 0; i < adapter->num_rx_queues; i++) { - /* clone ring and setup updated count */ - rx_ring[i] = *adapter->rx_ring[i]; - rx_ring[i].count = new_rx_count; - err = ixgbe_setup_rx_resources(&rx_ring[i]); - if (err) { - while (i) { - i--; - ixgbe_free_rx_resources(&rx_ring[i]); - } - - vfree(rx_ring); - rx_ring = NULL; - - goto clear_reset; - } - } - } - - /* bring interface down to prepare for update */ - ixgbe_down(adapter); - - /* Tx */ - if (tx_ring) { - for (i = 0; i < adapter->num_tx_queues; i++) { - ixgbe_free_tx_resources(adapter->tx_ring[i]); - *adapter->tx_ring[i] = tx_ring[i]; - } - adapter->tx_ring_count = new_tx_count; - - vfree(tx_ring); - tx_ring = NULL; - } - - /* Rx */ - if (rx_ring) { - for (i = 0; i < adapter->num_rx_queues; i++) { - ixgbe_free_rx_resources(adapter->rx_ring[i]); - *adapter->rx_ring[i] = rx_ring[i]; - } - adapter->rx_ring_count = new_rx_count; - - vfree(rx_ring); - rx_ring = NULL; - } - - /* restore interface using new values */ - ixgbe_up(adapter); - -clear_reset: - /* free Tx resources if Rx error is encountered */ - if (tx_ring) { - for (i = 0; i < adapter->num_tx_queues; i++) - ixgbe_free_tx_resources(&tx_ring[i]); - vfree(tx_ring); - } - - clear_bit(__IXGBE_RESETTING, &adapter->state); - return err; -} - -#ifndef HAVE_ETHTOOL_GET_SSET_COUNT -static int ixgbe_get_stats_count(struct net_device *netdev) -{ - return IXGBE_STATS_LEN; -} - -#else /* HAVE_ETHTOOL_GET_SSET_COUNT */ -static int ixgbe_get_sset_count(struct net_device *netdev, int sset) -{ - switch (sset) { - case ETH_SS_TEST: - return IXGBE_TEST_LEN; - case ETH_SS_STATS: - return IXGBE_STATS_LEN; - default: - return -EOPNOTSUPP; - } -} - -#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ -static void ixgbe_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats *stats, u64 *data) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); -#ifdef HAVE_NETDEV_STATS_IN_NETDEV - struct net_device_stats *net_stats = &netdev->stats; -#else - struct net_device_stats *net_stats = &adapter->net_stats; -#endif - u64 *queue_stat; - int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64); - int i, j, k; - char *p; - - printk(KERN_DEBUG "ixgbe_stats 0\n"); - ixgbe_update_stats(adapter); - printk(KERN_DEBUG "ixgbe_stats 1\n"); - - for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) { - p = (char *)net_stats + ixgbe_gstrings_net_stats[i].stat_offset; - data[i] = (ixgbe_gstrings_net_stats[i].sizeof_stat == - sizeof(u64)) ? *(u64 *)p : *(u32 *)p; - } - for (j = 0; j < IXGBE_GLOBAL_STATS_LEN; j++, i++) { - p = (char *)adapter + ixgbe_gstrings_stats[j].stat_offset; - data[i] = (ixgbe_gstrings_stats[j].sizeof_stat == - sizeof(u64)) ? *(u64 *)p : *(u32 *)p; - } - printk(KERN_DEBUG "ixgbe_stats 2\n"); -#ifdef NO_VNIC - for (j = 0; j < adapter->num_tx_queues; j++) { - queue_stat = (u64 *)&adapter->tx_ring[j]->stats; - for (k = 0; k < stat_count; k++) - data[i + k] = queue_stat[k]; - i += k; - } - for (j = 0; j < adapter->num_rx_queues; j++) { - queue_stat = (u64 *)&adapter->rx_ring[j]->stats; - for (k = 0; k < stat_count; k++) - data[i + k] = queue_stat[k]; - i += k; - } - printk(KERN_DEBUG "ixgbe_stats 3\n"); -#endif - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { - for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) { - data[i++] = adapter->stats.pxontxc[j]; - data[i++] = adapter->stats.pxofftxc[j]; - } - for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) { - data[i++] = adapter->stats.pxonrxc[j]; - data[i++] = adapter->stats.pxoffrxc[j]; - } - } - printk(KERN_DEBUG "ixgbe_stats 4\n"); - stat_count = sizeof(struct vf_stats) / sizeof(u64); - for (j = 0; j < adapter->num_vfs; j++) { - queue_stat = (u64 *)&adapter->vfinfo[j].vfstats; - for (k = 0; k < stat_count; k++) - data[i + k] = queue_stat[k]; - queue_stat = (u64 *)&adapter->vfinfo[j].saved_rst_vfstats; - for (k = 0; k < stat_count; k++) - data[i + k] += queue_stat[k]; - i += k; - } -} - -static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, - u8 *data) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - char *p = (char *)data; - int i; - - switch (stringset) { - case ETH_SS_TEST: - memcpy(data, *ixgbe_gstrings_test, - IXGBE_TEST_LEN * ETH_GSTRING_LEN); - break; - case ETH_SS_STATS: - for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) { - memcpy(p, ixgbe_gstrings_net_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) { - memcpy(p, ixgbe_gstrings_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < adapter->num_tx_queues; i++) { - sprintf(p, "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < adapter->num_rx_queues; i++) { - sprintf(p, "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - } - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { - for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) { - sprintf(p, "tx_pb_%u_pxon", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_pb_%u_pxoff", i); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) { - sprintf(p, "rx_pb_%u_pxon", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_pb_%u_pxoff", i); - p += ETH_GSTRING_LEN; - } - } - for (i = 0; i < adapter->num_vfs; i++) { - sprintf(p, "VF %d Rx Packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "VF %d Rx Bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "VF %d Tx Packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "VF %d Tx Bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "VF %d MC Packets", i); - p += ETH_GSTRING_LEN; - } - /* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */ - break; - } -} - -static int ixgbe_link_test(struct ixgbe_adapter *adapter, u64 *data) -{ - struct ixgbe_hw *hw = &adapter->hw; - bool link_up; - u32 link_speed = 0; - *data = 0; - - hw->mac.ops.check_link(hw, &link_speed, &link_up, true); - if (link_up) - return *data; - else - *data = 1; - return *data; -} - -/* ethtool register test data */ -struct ixgbe_reg_test { - u16 reg; - u8 array_len; - u8 test_type; - u32 mask; - u32 write; -}; - -/* In the hardware, registers are laid out either singly, in arrays - * spaced 0x40 bytes apart, or in contiguous tables. We assume - * most tests take place on arrays or single registers (handled - * as a single-element array) and special-case the tables. - * Table tests are always pattern tests. - * - * We also make provision for some required setup steps by specifying - * registers to be written without any read-back testing. - */ - -#define PATTERN_TEST 1 -#define SET_READ_TEST 2 -#define WRITE_NO_TEST 3 -#define TABLE32_TEST 4 -#define TABLE64_TEST_LO 5 -#define TABLE64_TEST_HI 6 - -/* default 82599 register test */ -static struct ixgbe_reg_test reg_test_82599[] = { - { IXGBE_FCRTL_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, - { IXGBE_FCRTH_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, - { IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 }, - { IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 }, - { IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE }, - { IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 }, - { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, - { IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFF80 }, - { IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000001, 0x00000001 }, - { IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x8001FFFF, 0x800CFFFF }, - { IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { 0, 0, 0, 0 } -}; - -/* default 82598 register test */ -static struct ixgbe_reg_test reg_test_82598[] = { - { IXGBE_FCRTL(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, - { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, - { IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 }, - { IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - /* Enable all four RX queues before testing. */ - { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE }, - /* RDH is read-only for 82598, only test RDT. */ - { IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, - { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 }, - { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, - { IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_TIPG, 1, PATTERN_TEST, 0x000000FF, 0x000000FF }, - { IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, - { IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, - { IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000003, 0x00000003 }, - { IXGBE_DTXCTL, 1, SET_READ_TEST, 0x00000005, 0x00000005 }, - { IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, - { IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x800CFFFF, 0x800CFFFF }, - { IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, - { 0, 0, 0, 0 } -}; - -#define REG_PATTERN_TEST(R, M, W) \ -{ \ - u32 pat, val, before; \ - const u32 _test[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; \ - for (pat = 0; pat < ARRAY_SIZE(_test); pat++) { \ - before = readl(adapter->hw.hw_addr + R); \ - writel((_test[pat] & W), (adapter->hw.hw_addr + R)); \ - val = readl(adapter->hw.hw_addr + R); \ - if (val != (_test[pat] & W & M)) { \ - e_err(drv, "pattern test reg %04X failed: got " \ - "0x%08X expected 0x%08X\n", \ - R, val, (_test[pat] & W & M)); \ - *data = R; \ - writel(before, adapter->hw.hw_addr + R); \ - return 1; \ - } \ - writel(before, adapter->hw.hw_addr + R); \ - } \ -} - -#define REG_SET_AND_CHECK(R, M, W) \ -{ \ - u32 val, before; \ - before = readl(adapter->hw.hw_addr + R); \ - writel((W & M), (adapter->hw.hw_addr + R)); \ - val = readl(adapter->hw.hw_addr + R); \ - if ((W & M) != (val & M)) { \ - e_err(drv, "set/check reg %04X test failed: got 0x%08X " \ - "expected 0x%08X\n", R, (val & M), (W & M)); \ - *data = R; \ - writel(before, (adapter->hw.hw_addr + R)); \ - return 1; \ - } \ - writel(before, (adapter->hw.hw_addr + R)); \ -} - -static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data) -{ - struct ixgbe_reg_test *test; - u32 value, status_before, status_after; - u32 i, toggle; - - switch (adapter->hw.mac.type) { - case ixgbe_mac_82598EB: - toggle = 0x7FFFF3FF; - test = reg_test_82598; - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - toggle = 0x7FFFF30F; - test = reg_test_82599; - break; - default: - *data = 1; - return 1; - break; - } - - /* - * Because the status register is such a special case, - * we handle it separately from the rest of the register - * tests. Some bits are read-only, some toggle, and some - * are writeable on newer MACs. - */ - status_before = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS); - value = (IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, toggle); - status_after = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle; - if (value != status_after) { - e_err(drv, "failed STATUS register test got: " - "0x%08X expected: 0x%08X\n", status_after, value); - *data = 1; - return 1; - } - /* restore previous status */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, status_before); - - /* - * Perform the remainder of the register test, looping through - * the test table until we either fail or reach the null entry. - */ - while (test->reg) { - for (i = 0; i < test->array_len; i++) { - switch (test->test_type) { - case PATTERN_TEST: - REG_PATTERN_TEST(test->reg + (i * 0x40), - test->mask, - test->write); - break; - case SET_READ_TEST: - REG_SET_AND_CHECK(test->reg + (i * 0x40), - test->mask, - test->write); - break; - case WRITE_NO_TEST: - writel(test->write, - (adapter->hw.hw_addr + test->reg) - + (i * 0x40)); - break; - case TABLE32_TEST: - REG_PATTERN_TEST(test->reg + (i * 4), - test->mask, - test->write); - break; - case TABLE64_TEST_LO: - REG_PATTERN_TEST(test->reg + (i * 8), - test->mask, - test->write); - break; - case TABLE64_TEST_HI: - REG_PATTERN_TEST((test->reg + 4) + (i * 8), - test->mask, - test->write); - break; - } - } - test++; - } - - *data = 0; - return 0; -} - -static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data) -{ - if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL)) - *data = 1; - else - *data = 0; - return *data; -} - -static irqreturn_t ixgbe_test_intr(int irq, void *data) -{ - struct net_device *netdev = data; - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR); - - return IRQ_HANDLED; -} - -static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data) -{ - struct net_device *netdev = adapter->netdev; - u32 mask, i = 0, shared_int = true; - u32 irq = adapter->pdev->irq; - - *data = 0; - - /* Hook up test interrupt handler just for this test */ - if (adapter->msix_entries) { - /* NOTE: we don't test MSI-X interrupts here, yet */ - return 0; - } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { - shared_int = false; - if (request_irq(irq, &ixgbe_test_intr, 0, netdev->name, - netdev)) { - *data = 1; - return -1; - } - } else if (!request_irq(irq, &ixgbe_test_intr, IRQF_PROBE_SHARED, - netdev->name, netdev)) { - shared_int = false; - } else if (request_irq(irq, &ixgbe_test_intr, IRQF_SHARED, - netdev->name, netdev)) { - *data = 1; - return -1; - } - e_info(hw, "testing %s interrupt\n", - (shared_int ? "shared" : "unshared")); - - /* Disable all the interrupts */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); - IXGBE_WRITE_FLUSH(&adapter->hw); - usleep_range(10000, 20000); - - /* Test each interrupt */ - for (; i < 10; i++) { - /* Interrupt to test */ - mask = 1 << i; - - if (!shared_int) { - /* - * Disable the interrupts to be reported in - * the cause register and then force the same - * interrupt and see if one gets posted. If - * an interrupt was posted to the bus, the - * test failed. - */ - adapter->test_icr = 0; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, - ~mask & 0x00007FFF); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, - ~mask & 0x00007FFF); - IXGBE_WRITE_FLUSH(&adapter->hw); - usleep_range(10000, 20000); - - if (adapter->test_icr & mask) { - *data = 3; - break; - } - } - - /* - * Enable the interrupt to be reported in the cause - * register and then force the same interrupt and see - * if one gets posted. If an interrupt was not posted - * to the bus, the test failed. - */ - adapter->test_icr = 0; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); - IXGBE_WRITE_FLUSH(&adapter->hw); - usleep_range(10000, 20000); - - if (!(adapter->test_icr & mask)) { - *data = 4; - break; - } - - if (!shared_int) { - /* - * Disable the other interrupts to be reported in - * the cause register and then force the other - * interrupts and see if any get posted. If - * an interrupt was posted to the bus, the - * test failed. - */ - adapter->test_icr = 0; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, - ~mask & 0x00007FFF); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, - ~mask & 0x00007FFF); - IXGBE_WRITE_FLUSH(&adapter->hw); - usleep_range(10000, 20000); - - if (adapter->test_icr) { - *data = 5; - break; - } - } - } - - /* Disable all the interrupts */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); - IXGBE_WRITE_FLUSH(&adapter->hw); - usleep_range(10000, 20000); - - /* Unhook test interrupt handler */ - free_irq(irq, netdev); - - return *data; -} - - - -static int ixgbe_setup_loopback_test(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 reg_data; - - /* X540 needs to set the MACC.FLU bit to force link up */ - if (adapter->hw.mac.type == ixgbe_mac_X540) { - reg_data = IXGBE_READ_REG(hw, IXGBE_MACC); - reg_data |= IXGBE_MACC_FLU; - IXGBE_WRITE_REG(hw, IXGBE_MACC, reg_data); - } - - /* right now we only support MAC loopback in the driver */ - reg_data = IXGBE_READ_REG(hw, IXGBE_HLREG0); - /* Setup MAC loopback */ - reg_data |= IXGBE_HLREG0_LPBK; - IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_data); - - reg_data = IXGBE_READ_REG(hw, IXGBE_FCTRL); - reg_data |= IXGBE_FCTRL_BAM | IXGBE_FCTRL_SBP | IXGBE_FCTRL_MPE; - IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_data); - - reg_data = IXGBE_READ_REG(hw, IXGBE_AUTOC); - reg_data &= ~IXGBE_AUTOC_LMS_MASK; - reg_data |= IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU; - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_data); - IXGBE_WRITE_FLUSH(hw); - usleep_range(10000, 20000); - - /* Disable Atlas Tx lanes; re-enabled in reset path */ - if (hw->mac.type == ixgbe_mac_82598EB) { - u8 atlas; - - ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &atlas); - atlas |= IXGBE_ATLAS_PDN_TX_REG_EN; - ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, atlas); - - ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, &atlas); - atlas |= IXGBE_ATLAS_PDN_TX_10G_QL_ALL; - ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, atlas); - - ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, &atlas); - atlas |= IXGBE_ATLAS_PDN_TX_1G_QL_ALL; - ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, atlas); - - ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, &atlas); - atlas |= IXGBE_ATLAS_PDN_TX_AN_QL_ALL; - ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, atlas); - } - - return 0; -} - -static void ixgbe_loopback_cleanup(struct ixgbe_adapter *adapter) -{ - u32 reg_data; - - reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_HLREG0); - reg_data &= ~IXGBE_HLREG0_LPBK; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_HLREG0, reg_data); -} - - - - - - -static int ixgbe_loopback_test(struct ixgbe_adapter *adapter, u64 *data) -{ - - //*data = ixgbe_setup_desc_rings(adapter); - //if (*data) - // goto out; - *data = ixgbe_setup_loopback_test(adapter); - if (*data) - goto err_loopback; - //*data = ixgbe_run_loopback_test(adapter); - ixgbe_loopback_cleanup(adapter); - -err_loopback: - //ixgbe_free_desc_rings(adapter); -//out: - return *data; - -} - -#ifndef HAVE_ETHTOOL_GET_SSET_COUNT -static int ixgbe_diag_test_count(struct net_device *netdev) -{ - return IXGBE_TEST_LEN; -} - -#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ -static void ixgbe_diag_test(struct net_device *netdev, - struct ethtool_test *eth_test, u64 *data) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - bool if_running = netif_running(netdev); - - set_bit(__IXGBE_TESTING, &adapter->state); - if (eth_test->flags == ETH_TEST_FL_OFFLINE) { - /* Offline tests */ - - e_info(hw, "offline testing starting\n"); - - /* Link test performed before hardware reset so autoneg doesn't - * interfere with test result */ - if (ixgbe_link_test(adapter, &data[4])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { - int i; - for (i = 0; i < adapter->num_vfs; i++) { - if (adapter->vfinfo[i].clear_to_send) { - e_warn(drv, "Please take active VFS " - "offline and restart the " - "adapter before running NIC " - "diagnostics\n"); - data[0] = 1; - data[1] = 1; - data[2] = 1; - data[3] = 1; - eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__IXGBE_TESTING, - &adapter->state); - goto skip_ol_tests; - } - } - } - - if (if_running) - /* indicate we're in test mode */ - dev_close(netdev); - else - ixgbe_reset(adapter); - - e_info(hw, "register testing starting\n"); - if (ixgbe_reg_test(adapter, &data[0])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - ixgbe_reset(adapter); - e_info(hw, "eeprom testing starting\n"); - if (ixgbe_eeprom_test(adapter, &data[1])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - ixgbe_reset(adapter); - e_info(hw, "interrupt testing starting\n"); - if (ixgbe_intr_test(adapter, &data[2])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - /* If SRIOV or VMDq is enabled then skip MAC - * loopback diagnostic. */ - if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED | - IXGBE_FLAG_VMDQ_ENABLED)) { - e_info(hw, "skip MAC loopback diagnostic in VT mode\n"); - data[3] = 0; - goto skip_loopback; - } - - ixgbe_reset(adapter); - e_info(hw, "loopback testing starting\n"); - if (ixgbe_loopback_test(adapter, &data[3])) - eth_test->flags |= ETH_TEST_FL_FAILED; - -skip_loopback: - ixgbe_reset(adapter); - - clear_bit(__IXGBE_TESTING, &adapter->state); - if (if_running) - dev_open(netdev); - } else { - e_info(hw, "online testing starting\n"); - /* Online tests */ - if (ixgbe_link_test(adapter, &data[4])) - eth_test->flags |= ETH_TEST_FL_FAILED; - - /* Online tests aren't run; pass by default */ - data[0] = 0; - data[1] = 0; - data[2] = 0; - data[3] = 0; - - clear_bit(__IXGBE_TESTING, &adapter->state); - } -skip_ol_tests: - msleep_interruptible(4 * 1000); -} - -static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter, - struct ethtool_wolinfo *wol) -{ - struct ixgbe_hw *hw = &adapter->hw; - int retval = 1; - u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK; - - /* WOL not supported except for the following */ - switch (hw->device_id) { - case IXGBE_DEV_ID_82599_SFP: - /* Only these subdevice could supports WOL */ - switch (hw->subsystem_device_id) { - case IXGBE_SUBDEV_ID_82599_560FLR: - /* only support first port */ - if (hw->bus.func != 0) { - wol->supported = 0; - break; - } - case IXGBE_SUBDEV_ID_82599_SFP: - retval = 0; - break; - default: - wol->supported = 0; - break; - } - break; - case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: - /* All except this subdevice support WOL */ - if (hw->subsystem_device_id == - IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) { - wol->supported = 0; - break; - } - retval = 0; - break; - case IXGBE_DEV_ID_82599_KX4: - retval = 0; - break; - case IXGBE_DEV_ID_X540T: - /* check eeprom to see if enabled wol */ - if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) || - ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) && - (hw->bus.func == 0))) { - retval = 0; - break; - } - - /* All others not supported */ - wol->supported = 0; - break; - default: - wol->supported = 0; - } - return retval; -} - -static void ixgbe_get_wol(struct net_device *netdev, - struct ethtool_wolinfo *wol) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - wol->supported = WAKE_UCAST | WAKE_MCAST | - WAKE_BCAST | WAKE_MAGIC; - wol->wolopts = 0; - - if (ixgbe_wol_exclusion(adapter, wol) || - !device_can_wakeup(&adapter->pdev->dev)) - return; - - if (adapter->wol & IXGBE_WUFC_EX) - wol->wolopts |= WAKE_UCAST; - if (adapter->wol & IXGBE_WUFC_MC) - wol->wolopts |= WAKE_MCAST; - if (adapter->wol & IXGBE_WUFC_BC) - wol->wolopts |= WAKE_BCAST; - if (adapter->wol & IXGBE_WUFC_MAG) - wol->wolopts |= WAKE_MAGIC; -} - -static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE)) - return -EOPNOTSUPP; - - if (ixgbe_wol_exclusion(adapter, wol)) - return wol->wolopts ? -EOPNOTSUPP : 0; - - adapter->wol = 0; - - if (wol->wolopts & WAKE_UCAST) - adapter->wol |= IXGBE_WUFC_EX; - if (wol->wolopts & WAKE_MCAST) - adapter->wol |= IXGBE_WUFC_MC; - if (wol->wolopts & WAKE_BCAST) - adapter->wol |= IXGBE_WUFC_BC; - if (wol->wolopts & WAKE_MAGIC) - adapter->wol |= IXGBE_WUFC_MAG; - - device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); - - return 0; -} - -static int ixgbe_nway_reset(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - if (netif_running(netdev)) - ixgbe_reinit_locked(adapter); - - return 0; -} - -#ifdef HAVE_ETHTOOL_SET_PHYS_ID -static int ixgbe_set_phys_id(struct net_device *netdev, - enum ethtool_phys_id_state state) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - - switch (state) { - case ETHTOOL_ID_ACTIVE: - adapter->led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); - return 2; - - case ETHTOOL_ID_ON: - hw->mac.ops.led_on(hw, IXGBE_LED_ON); - break; - - case ETHTOOL_ID_OFF: - hw->mac.ops.led_off(hw, IXGBE_LED_ON); - break; - - case ETHTOOL_ID_INACTIVE: - /* Restore LED settings */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_LEDCTL, adapter->led_reg); - break; - } - - return 0; -} -#else -static int ixgbe_phys_id(struct net_device *netdev, u32 data) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); - u32 i; - - if (!data || data > 300) - data = 300; - - for (i = 0; i < (data * 1000); i += 400) { - ixgbe_led_on(hw, IXGBE_LED_ON); - msleep_interruptible(200); - ixgbe_led_off(hw, IXGBE_LED_ON); - msleep_interruptible(200); - } - - /* Restore LED settings */ - IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); - - return 0; -} -#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ - -static int ixgbe_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit; -#ifndef CONFIG_IXGBE_NAPI - ec->rx_max_coalesced_frames_irq = adapter->rx_work_limit; -#endif /* CONFIG_IXGBE_NAPI */ - /* only valid if in constant ITR mode */ - if (adapter->rx_itr_setting <= 1) - ec->rx_coalesce_usecs = adapter->rx_itr_setting; - else - ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; - - /* if in mixed tx/rx queues per vector mode, report only rx settings */ - if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count) - return 0; - - /* only valid if in constant ITR mode */ - if (adapter->tx_itr_setting <= 1) - ec->tx_coalesce_usecs = adapter->tx_itr_setting; - else - ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; - - return 0; -} - -/* - * this function must be called before setting the new value of - * rx_itr_setting - */ -#ifdef NO_VNIC -static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - - /* nothing to do if LRO or RSC are not enabled */ - if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) || - !(netdev->features & NETIF_F_LRO)) - return false; - - /* check the feature flag value and enable RSC if necessary */ - if (adapter->rx_itr_setting == 1 || - adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) { - if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { - adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; - e_info(probe, "rx-usecs value high enough " - "to re-enable RSC\n"); - return true; - } - /* if interrupt rate is too high then disable RSC */ - } else if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { - adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; -#ifdef IXGBE_NO_LRO - e_info(probe, "rx-usecs set too low, disabling RSC\n"); -#else - e_info(probe, "rx-usecs set too low, " - "falling back to software LRO\n"); -#endif - return true; - } - return false; -} -#endif - -static int ixgbe_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) -{ -#ifdef NO_VNIC - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_q_vector *q_vector; - int i; - int num_vectors; - u16 tx_itr_param, rx_itr_param; - bool need_reset = false; - - /* don't accept tx specific changes if we've got mixed RxTx vectors */ - if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count - && ec->tx_coalesce_usecs) - return -EINVAL; - - if (ec->tx_max_coalesced_frames_irq) - adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq; - -#ifndef CONFIG_IXGBE_NAPI - if (ec->rx_max_coalesced_frames_irq) - adapter->rx_work_limit = ec->rx_max_coalesced_frames_irq; - -#endif - if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) || - (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2))) - return -EINVAL; - - if (ec->rx_coalesce_usecs > 1) - adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2; - else - adapter->rx_itr_setting = ec->rx_coalesce_usecs; - - if (adapter->rx_itr_setting == 1) - rx_itr_param = IXGBE_20K_ITR; - else - rx_itr_param = adapter->rx_itr_setting; - - if (ec->tx_coalesce_usecs > 1) - adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2; - else - adapter->tx_itr_setting = ec->tx_coalesce_usecs; - - if (adapter->tx_itr_setting == 1) - tx_itr_param = IXGBE_10K_ITR; - else - tx_itr_param = adapter->tx_itr_setting; - - /* check the old value and enable RSC if necessary */ - need_reset = ixgbe_update_rsc(adapter); - - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) - num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - else - num_vectors = 1; - - for (i = 0; i < num_vectors; i++) { - q_vector = adapter->q_vector[i]; - q_vector->tx.work_limit = adapter->tx_work_limit; - q_vector->rx.work_limit = adapter->rx_work_limit; - if (q_vector->tx.count && !q_vector->rx.count) - /* tx only */ - q_vector->itr = tx_itr_param; - else - /* rx only or mixed */ - q_vector->itr = rx_itr_param; - ixgbe_write_eitr(q_vector); - } - - /* - * do reset here at the end to make sure EITR==0 case is handled - * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings - * also locks in RSC enable/disable which requires reset - */ - if (need_reset) - ixgbe_do_reset(netdev); -#endif - return 0; -} - -#ifndef HAVE_NDO_SET_FEATURES -static u32 ixgbe_get_rx_csum(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_ring *ring = adapter->rx_ring[0]; - return test_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state); -} - -static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - int i; - - for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *ring = adapter->rx_ring[i]; - if (data) - set_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state); - else - clear_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state); - } - - /* LRO and RSC both depend on RX checksum to function */ - if (!data && (netdev->features & NETIF_F_LRO)) { - netdev->features &= ~NETIF_F_LRO; - - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { - adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; - ixgbe_do_reset(netdev); - } - } - - return 0; -} - -static u32 ixgbe_get_tx_csum(struct net_device *netdev) -{ - return (netdev->features & NETIF_F_IP_CSUM) != 0; -} - -static int ixgbe_set_tx_csum(struct net_device *netdev, u32 data) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - u32 feature_list; - -#ifdef NETIF_F_IPV6_CSUM - feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; -#else - feature_list = NETIF_F_IP_CSUM; -#endif - switch (adapter->hw.mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - feature_list |= NETIF_F_SCTP_CSUM; - break; - default: - break; - } - if (data) - netdev->features |= feature_list; - else - netdev->features &= ~feature_list; - - return 0; -} - -#ifdef NETIF_F_TSO -static int ixgbe_set_tso(struct net_device *netdev, u32 data) -{ - if (data) { - netdev->features |= NETIF_F_TSO; -#ifdef NETIF_F_TSO6 - netdev->features |= NETIF_F_TSO6; -#endif - } else { -#ifndef HAVE_NETDEV_VLAN_FEATURES -#ifdef NETIF_F_HW_VLAN_TX - struct ixgbe_adapter *adapter = netdev_priv(netdev); - /* disable TSO on all VLANs if they're present */ - if (adapter->vlgrp) { - int i; - struct net_device *v_netdev; - for (i = 0; i < VLAN_N_VID; i++) { - v_netdev = - vlan_group_get_device(adapter->vlgrp, i); - if (v_netdev) { - v_netdev->features &= ~NETIF_F_TSO; -#ifdef NETIF_F_TSO6 - v_netdev->features &= ~NETIF_F_TSO6; -#endif - vlan_group_set_device(adapter->vlgrp, i, - v_netdev); - } - } - } -#endif -#endif /* HAVE_NETDEV_VLAN_FEATURES */ - netdev->features &= ~NETIF_F_TSO; -#ifdef NETIF_F_TSO6 - netdev->features &= ~NETIF_F_TSO6; -#endif - } - return 0; -} - -#endif /* NETIF_F_TSO */ -#ifdef ETHTOOL_GFLAGS -static int ixgbe_set_flags(struct net_device *netdev, u32 data) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN; - u32 changed = netdev->features ^ data; - bool need_reset = false; - int rc; - -#ifndef HAVE_VLAN_RX_REGISTER - if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && - !(data & ETH_FLAG_RXVLAN)) - return -EINVAL; - -#endif -#ifdef NETIF_F_RXHASH - if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) - supported_flags |= ETH_FLAG_RXHASH; -#endif -#ifdef IXGBE_NO_LRO - if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) -#endif - supported_flags |= ETH_FLAG_LRO; - -#ifdef ETHTOOL_GRXRINGS - switch (adapter->hw.mac.type) { - case ixgbe_mac_X540: - case ixgbe_mac_82599EB: - supported_flags |= ETH_FLAG_NTUPLE; - default: - break; - } - -#endif - rc = ethtool_op_set_flags(netdev, data, supported_flags); - if (rc) - return rc; - -#ifndef HAVE_VLAN_RX_REGISTER - if (changed & ETH_FLAG_RXVLAN) - ixgbe_vlan_mode(netdev, netdev->features); - -#endif - /* if state changes we need to update adapter->flags and reset */ - if (!(netdev->features & NETIF_F_LRO)) { - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) - need_reset = true; - adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; - } else if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) && - !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { - if (adapter->rx_itr_setting == 1 || - adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) { - adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; - need_reset = true; - } else if (changed & ETH_FLAG_LRO) { -#ifdef IXGBE_NO_LRO - e_info(probe, "rx-usecs set too low, " - "disabling RSC\n"); -#else - e_info(probe, "rx-usecs set too low, " - "falling back to software LRO\n"); -#endif - } - } - -#ifdef ETHTOOL_GRXRINGS - /* - * Check if Flow Director n-tuple support was enabled or disabled. If - * the state changed, we need to reset. - */ - if (!(netdev->features & NETIF_F_NTUPLE)) { - if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { - /* turn off Flow Director, set ATR and reset */ - if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) && - !(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) - adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; - need_reset = true; - } - adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; - } else if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) { - /* turn off ATR, enable perfect filters and reset */ - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; - need_reset = true; - } - -#endif /* ETHTOOL_GRXRINGS */ - if (need_reset) - ixgbe_do_reset(netdev); - - return 0; -} - -#endif /* ETHTOOL_GFLAGS */ -#endif /* HAVE_NDO_SET_FEATURES */ -#ifdef ETHTOOL_GRXRINGS -static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter, - struct ethtool_rxnfc *cmd) -{ - union ixgbe_atr_input *mask = &adapter->fdir_mask; - struct ethtool_rx_flow_spec *fsp = - (struct ethtool_rx_flow_spec *)&cmd->fs; - struct hlist_node *node, *node2; - struct ixgbe_fdir_filter *rule = NULL; - - /* report total rule count */ - cmd->data = (1024 << adapter->fdir_pballoc) - 2; - - hlist_for_each_entry_safe(rule, node, node2, - &adapter->fdir_filter_list, fdir_node) { - if (fsp->location <= rule->sw_idx) - break; - } - - if (!rule || fsp->location != rule->sw_idx) - return -EINVAL; - - /* fill out the flow spec entry */ - - /* set flow type field */ - switch (rule->filter.formatted.flow_type) { - case IXGBE_ATR_FLOW_TYPE_TCPV4: - fsp->flow_type = TCP_V4_FLOW; - break; - case IXGBE_ATR_FLOW_TYPE_UDPV4: - fsp->flow_type = UDP_V4_FLOW; - break; - case IXGBE_ATR_FLOW_TYPE_SCTPV4: - fsp->flow_type = SCTP_V4_FLOW; - break; - case IXGBE_ATR_FLOW_TYPE_IPV4: - fsp->flow_type = IP_USER_FLOW; - fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4; - fsp->h_u.usr_ip4_spec.proto = 0; - fsp->m_u.usr_ip4_spec.proto = 0; - break; - default: - return -EINVAL; - } - - fsp->h_u.tcp_ip4_spec.psrc = rule->filter.formatted.src_port; - fsp->m_u.tcp_ip4_spec.psrc = mask->formatted.src_port; - fsp->h_u.tcp_ip4_spec.pdst = rule->filter.formatted.dst_port; - fsp->m_u.tcp_ip4_spec.pdst = mask->formatted.dst_port; - fsp->h_u.tcp_ip4_spec.ip4src = rule->filter.formatted.src_ip[0]; - fsp->m_u.tcp_ip4_spec.ip4src = mask->formatted.src_ip[0]; - fsp->h_u.tcp_ip4_spec.ip4dst = rule->filter.formatted.dst_ip[0]; - fsp->m_u.tcp_ip4_spec.ip4dst = mask->formatted.dst_ip[0]; - fsp->h_ext.vlan_tci = rule->filter.formatted.vlan_id; - fsp->m_ext.vlan_tci = mask->formatted.vlan_id; - fsp->h_ext.vlan_etype = rule->filter.formatted.flex_bytes; - fsp->m_ext.vlan_etype = mask->formatted.flex_bytes; - fsp->h_ext.data[1] = htonl(rule->filter.formatted.vm_pool); - fsp->m_ext.data[1] = htonl(mask->formatted.vm_pool); - fsp->flow_type |= FLOW_EXT; - - /* record action */ - if (rule->action == IXGBE_FDIR_DROP_QUEUE) - fsp->ring_cookie = RX_CLS_FLOW_DISC; - else - fsp->ring_cookie = rule->action; - - return 0; -} - -static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter, - struct ethtool_rxnfc *cmd, - u32 *rule_locs) -{ - struct hlist_node *node, *node2; - struct ixgbe_fdir_filter *rule; - int cnt = 0; - - /* report total rule count */ - cmd->data = (1024 << adapter->fdir_pballoc) - 2; - - hlist_for_each_entry_safe(rule, node, node2, - &adapter->fdir_filter_list, fdir_node) { - if (cnt == cmd->rule_cnt) - return -EMSGSIZE; - rule_locs[cnt] = rule->sw_idx; - cnt++; - } - - cmd->rule_cnt = cnt; - - return 0; -} - -static int ixgbe_get_rss_hash_opts(struct ixgbe_adapter *adapter, - struct ethtool_rxnfc *cmd) -{ - cmd->data = 0; - - /* if RSS is disabled then report no hashing */ - if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) - return 0; - - /* Report default options for RSS on ixgbe */ - switch (cmd->flow_type) { - case TCP_V4_FLOW: - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - case UDP_V4_FLOW: - if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP) - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - case SCTP_V4_FLOW: - case AH_ESP_V4_FLOW: - case AH_V4_FLOW: - case ESP_V4_FLOW: - case IPV4_FLOW: - cmd->data |= RXH_IP_SRC | RXH_IP_DST; - break; - case TCP_V6_FLOW: - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - case UDP_V6_FLOW: - if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - case SCTP_V6_FLOW: - case AH_ESP_V6_FLOW: - case AH_V6_FLOW: - case ESP_V6_FLOW: - case IPV6_FLOW: - cmd->data |= RXH_IP_SRC | RXH_IP_DST; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, -#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS - void *rule_locs) -#else - u32 *rule_locs) -#endif -{ - struct ixgbe_adapter *adapter = netdev_priv(dev); - int ret = -EOPNOTSUPP; - - switch (cmd->cmd) { - case ETHTOOL_GRXRINGS: - cmd->data = adapter->num_rx_queues; - ret = 0; - break; - case ETHTOOL_GRXCLSRLCNT: - cmd->rule_cnt = adapter->fdir_filter_count; - ret = 0; - break; - case ETHTOOL_GRXCLSRULE: - ret = ixgbe_get_ethtool_fdir_entry(adapter, cmd); - break; - case ETHTOOL_GRXCLSRLALL: - ret = ixgbe_get_ethtool_fdir_all(adapter, cmd, - rule_locs); - break; - case ETHTOOL_GRXFH: - ret = ixgbe_get_rss_hash_opts(adapter, cmd); - break; - default: - break; - } - - return ret; -} - -static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, - struct ixgbe_fdir_filter *input, - u16 sw_idx) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct hlist_node *node, *node2, *parent; - struct ixgbe_fdir_filter *rule; - int err = -EINVAL; - - parent = NULL; - rule = NULL; - - hlist_for_each_entry_safe(rule, node, node2, - &adapter->fdir_filter_list, fdir_node) { - /* hash found, or no matching entry */ - if (rule->sw_idx >= sw_idx) - break; - parent = node; - } - - /* if there is an old rule occupying our place remove it */ - if (rule && (rule->sw_idx == sw_idx)) { - if (!input || (rule->filter.formatted.bkt_hash != - input->filter.formatted.bkt_hash)) { - err = ixgbe_fdir_erase_perfect_filter_82599(hw, - &rule->filter, - sw_idx); - } - - hlist_del(&rule->fdir_node); - kfree(rule); - adapter->fdir_filter_count--; - } - - /* - * If no input this was a delete, err should be 0 if a rule was - * successfully found and removed from the list else -EINVAL - */ - if (!input) - return err; - - /* initialize node and set software index */ - INIT_HLIST_NODE(&input->fdir_node); - - /* add filter to the list */ - if (parent) - hlist_add_after(parent, &input->fdir_node); - else - hlist_add_head(&input->fdir_node, - &adapter->fdir_filter_list); - - /* update counts */ - adapter->fdir_filter_count++; - - return 0; -} - -static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp, - u8 *flow_type) -{ - switch (fsp->flow_type & ~FLOW_EXT) { - case TCP_V4_FLOW: - *flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4; - break; - case UDP_V4_FLOW: - *flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4; - break; - case SCTP_V4_FLOW: - *flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4; - break; - case IP_USER_FLOW: - switch (fsp->h_u.usr_ip4_spec.proto) { - case IPPROTO_TCP: - *flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4; - break; - case IPPROTO_UDP: - *flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4; - break; - case IPPROTO_SCTP: - *flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4; - break; - case 0: - if (!fsp->m_u.usr_ip4_spec.proto) { - *flow_type = IXGBE_ATR_FLOW_TYPE_IPV4; - break; - } - default: - return 0; - } - break; - default: - return 0; - } - - return 1; -} - -static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter, - struct ethtool_rxnfc *cmd) -{ - struct ethtool_rx_flow_spec *fsp = - (struct ethtool_rx_flow_spec *)&cmd->fs; - struct ixgbe_hw *hw = &adapter->hw; - struct ixgbe_fdir_filter *input; - union ixgbe_atr_input mask; - int err; - - if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) - return -EOPNOTSUPP; - - /* - * Don't allow programming if the action is a queue greater than - * the number of online Rx queues. - */ - if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) && - (fsp->ring_cookie >= adapter->num_rx_queues)) - return -EINVAL; - - /* Don't allow indexes to exist outside of available space */ - if (fsp->location >= ((1024 << adapter->fdir_pballoc) - 2)) { - e_err(drv, "Location out of range\n"); - return -EINVAL; - } - - input = kzalloc(sizeof(*input), GFP_ATOMIC); - if (!input) - return -ENOMEM; - - memset(&mask, 0, sizeof(union ixgbe_atr_input)); - - /* set SW index */ - input->sw_idx = fsp->location; - - /* record flow type */ - if (!ixgbe_flowspec_to_flow_type(fsp, - &input->filter.formatted.flow_type)) { - e_err(drv, "Unrecognized flow type\n"); - goto err_out; - } - - mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK | - IXGBE_ATR_L4TYPE_MASK; - - if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4) - mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK; - - /* Copy input into formatted structures */ - input->filter.formatted.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src; - mask.formatted.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src; - input->filter.formatted.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; - mask.formatted.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst; - input->filter.formatted.src_port = fsp->h_u.tcp_ip4_spec.psrc; - mask.formatted.src_port = fsp->m_u.tcp_ip4_spec.psrc; - input->filter.formatted.dst_port = fsp->h_u.tcp_ip4_spec.pdst; - mask.formatted.dst_port = fsp->m_u.tcp_ip4_spec.pdst; - - if (fsp->flow_type & FLOW_EXT) { - input->filter.formatted.vm_pool = - (unsigned char)ntohl(fsp->h_ext.data[1]); - mask.formatted.vm_pool = - (unsigned char)ntohl(fsp->m_ext.data[1]); - input->filter.formatted.vlan_id = fsp->h_ext.vlan_tci; - mask.formatted.vlan_id = fsp->m_ext.vlan_tci; - input->filter.formatted.flex_bytes = - fsp->h_ext.vlan_etype; - mask.formatted.flex_bytes = fsp->m_ext.vlan_etype; - } - - /* determine if we need to drop or route the packet */ - if (fsp->ring_cookie == RX_CLS_FLOW_DISC) - input->action = IXGBE_FDIR_DROP_QUEUE; - else - input->action = fsp->ring_cookie; - - spin_lock(&adapter->fdir_perfect_lock); - - if (hlist_empty(&adapter->fdir_filter_list)) { - /* save mask and program input mask into HW */ - memcpy(&adapter->fdir_mask, &mask, sizeof(mask)); - err = ixgbe_fdir_set_input_mask_82599(hw, &mask); - if (err) { - e_err(drv, "Error writing mask\n"); - goto err_out_w_lock; - } - } else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) { - e_err(drv, "Only one mask supported per port\n"); - goto err_out_w_lock; - } - - /* apply mask and compute/store hash */ - ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask); - - /* program filters to filter memory */ - err = ixgbe_fdir_write_perfect_filter_82599(hw, - &input->filter, input->sw_idx, - (input->action == IXGBE_FDIR_DROP_QUEUE) ? - IXGBE_FDIR_DROP_QUEUE : - adapter->rx_ring[input->action]->reg_idx); - if (err) - goto err_out_w_lock; - - ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); - - spin_unlock(&adapter->fdir_perfect_lock); - - kfree(input); - return err; -err_out_w_lock: - spin_unlock(&adapter->fdir_perfect_lock); -err_out: - kfree(input); - return -EINVAL; -} - -static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter, - struct ethtool_rxnfc *cmd) -{ - struct ethtool_rx_flow_spec *fsp = - (struct ethtool_rx_flow_spec *)&cmd->fs; - int err; - - spin_lock(&adapter->fdir_perfect_lock); - err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, (u16)(fsp->location)); - spin_unlock(&adapter->fdir_perfect_lock); - - return err; -} - -#ifdef ETHTOOL_SRXNTUPLE -/* - * We need to keep this around for kernels 2.6.33 - 2.6.39 in order to avoid - * a null pointer dereference as it was assumend if the NETIF_F_NTUPLE flag - * was defined that this function was present. - */ -static int ixgbe_set_rx_ntuple(struct net_device *dev, - struct ethtool_rx_ntuple *cmd) -{ - return -EOPNOTSUPP; -} - -#endif -#define UDP_RSS_FLAGS (IXGBE_FLAG2_RSS_FIELD_IPV4_UDP | \ - IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) -static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter, - struct ethtool_rxnfc *nfc) -{ - u32 flags2 = adapter->flags2; - - /* - * RSS does not support anything other than hashing - * to queues on src and dst IPs and ports - */ - if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | - RXH_L4_B_0_1 | RXH_L4_B_2_3)) - return -EINVAL; - - switch (nfc->flow_type) { - case TCP_V4_FLOW: - case TCP_V6_FLOW: - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST) || - !(nfc->data & RXH_L4_B_0_1) || - !(nfc->data & RXH_L4_B_2_3)) - return -EINVAL; - break; - case UDP_V4_FLOW: - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST)) - return -EINVAL; - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV4_UDP; - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV4_UDP; - break; - default: - return -EINVAL; - } - break; - case UDP_V6_FLOW: - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST)) - return -EINVAL; - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV6_UDP; - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV6_UDP; - break; - default: - return -EINVAL; - } - break; - case AH_ESP_V4_FLOW: - case AH_V4_FLOW: - case ESP_V4_FLOW: - case SCTP_V4_FLOW: - case AH_ESP_V6_FLOW: - case AH_V6_FLOW: - case ESP_V6_FLOW: - case SCTP_V6_FLOW: - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST) || - (nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) - return -EINVAL; - break; - default: - return -EINVAL; - } - - /* if we changed something we need to update flags */ - if (flags2 != adapter->flags2) { - struct ixgbe_hw *hw = &adapter->hw; - u32 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC); - - if ((flags2 & UDP_RSS_FLAGS) && - !(adapter->flags2 & UDP_RSS_FLAGS)) - e_warn(drv, "enabling UDP RSS: fragmented packets" - " may arrive out of order to the stack above\n"); - - adapter->flags2 = flags2; - - /* Perform hash on these packet types */ - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4 - | IXGBE_MRQC_RSS_FIELD_IPV4_TCP - | IXGBE_MRQC_RSS_FIELD_IPV6 - | IXGBE_MRQC_RSS_FIELD_IPV6_TCP; - - mrqc &= ~(IXGBE_MRQC_RSS_FIELD_IPV4_UDP | - IXGBE_MRQC_RSS_FIELD_IPV6_UDP); - - if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; - - if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; - - IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); - } - - return 0; -} - -static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) -{ - struct ixgbe_adapter *adapter = netdev_priv(dev); - int ret = -EOPNOTSUPP; - - switch (cmd->cmd) { - case ETHTOOL_SRXCLSRLINS: - ret = ixgbe_add_ethtool_fdir_entry(adapter, cmd); - break; - case ETHTOOL_SRXCLSRLDEL: - ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd); - break; - case ETHTOOL_SRXFH: - ret = ixgbe_set_rss_hash_opt(adapter, cmd); - break; - default: - break; - } - - return ret; -} - -#endif /* ETHTOOL_GRXRINGS */ -//static -struct ethtool_ops ixgbe_ethtool_ops = { - .get_settings = ixgbe_get_settings, - .set_settings = ixgbe_set_settings, - .get_drvinfo = ixgbe_get_drvinfo, - .get_regs_len = ixgbe_get_regs_len, - .get_regs = ixgbe_get_regs, - .get_wol = ixgbe_get_wol, - .set_wol = ixgbe_set_wol, - .nway_reset = ixgbe_nway_reset, - .get_link = ethtool_op_get_link, - .get_eeprom_len = ixgbe_get_eeprom_len, - .get_eeprom = ixgbe_get_eeprom, - .set_eeprom = ixgbe_set_eeprom, - .get_ringparam = ixgbe_get_ringparam, - .set_ringparam = ixgbe_set_ringparam, - .get_pauseparam = ixgbe_get_pauseparam, - .set_pauseparam = ixgbe_set_pauseparam, - .get_msglevel = ixgbe_get_msglevel, - .set_msglevel = ixgbe_set_msglevel, -#ifndef HAVE_ETHTOOL_GET_SSET_COUNT - .self_test_count = ixgbe_diag_test_count, -#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ - .self_test = ixgbe_diag_test, - .get_strings = ixgbe_get_strings, -#ifdef HAVE_ETHTOOL_SET_PHYS_ID - .set_phys_id = ixgbe_set_phys_id, -#else - .phys_id = ixgbe_phys_id, -#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ -#ifndef HAVE_ETHTOOL_GET_SSET_COUNT - .get_stats_count = ixgbe_get_stats_count, -#else /* HAVE_ETHTOOL_GET_SSET_COUNT */ - .get_sset_count = ixgbe_get_sset_count, -#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ - .get_ethtool_stats = ixgbe_get_ethtool_stats, -#ifdef HAVE_ETHTOOL_GET_PERM_ADDR - .get_perm_addr = ethtool_op_get_perm_addr, -#endif - .get_coalesce = ixgbe_get_coalesce, - .set_coalesce = ixgbe_set_coalesce, -#ifndef HAVE_NDO_SET_FEATURES - .get_rx_csum = ixgbe_get_rx_csum, - .set_rx_csum = ixgbe_set_rx_csum, - .get_tx_csum = ixgbe_get_tx_csum, - .set_tx_csum = ixgbe_set_tx_csum, - .get_sg = ethtool_op_get_sg, - .set_sg = ethtool_op_set_sg, -#ifdef NETIF_F_TSO - .get_tso = ethtool_op_get_tso, - .set_tso = ixgbe_set_tso, -#endif -#ifdef ETHTOOL_GFLAGS - .get_flags = ethtool_op_get_flags, - .set_flags = ixgbe_set_flags, -#endif -#endif /* HAVE_NDO_SET_FEATURES */ -#ifdef ETHTOOL_GRXRINGS - .get_rxnfc = ixgbe_get_rxnfc, - .set_rxnfc = ixgbe_set_rxnfc, -#ifdef ETHTOOL_SRXNTUPLE - .set_rx_ntuple = ixgbe_set_rx_ntuple, -#endif -#endif -}; - -void ixgbe_set_ethtool_ops(struct net_device *netdev) -{ - SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops); -} -#endif /* SIOCETHTOOL */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h deleted file mode 100644 index eec86cbb..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_FCOE_H -#define _IXGBE_FCOE_H - -#ifdef IXGBE_FCOE - -#include -#include - -/* shift bits within STAT fo FCSTAT */ -#define IXGBE_RXDADV_FCSTAT_SHIFT 4 - -/* ddp user buffer */ -#define IXGBE_BUFFCNT_MAX 256 /* 8 bits bufcnt */ -#define IXGBE_FCPTR_ALIGN 16 -#define IXGBE_FCPTR_MAX (IXGBE_BUFFCNT_MAX * sizeof(dma_addr_t)) -#define IXGBE_FCBUFF_4KB 0x0 -#define IXGBE_FCBUFF_8KB 0x1 -#define IXGBE_FCBUFF_16KB 0x2 -#define IXGBE_FCBUFF_64KB 0x3 -#define IXGBE_FCBUFF_MAX 65536 /* 64KB max */ -#define IXGBE_FCBUFF_MIN 4096 /* 4KB min */ -#define IXGBE_FCOE_DDP_MAX 512 /* 9 bits xid */ - -/* Default traffic class to use for FCoE */ -#define IXGBE_FCOE_DEFTC 3 - -/* fcerr */ -#define IXGBE_FCERR_BADCRC 0x00100000 -#define IXGBE_FCERR_EOFSOF 0x00200000 -#define IXGBE_FCERR_NOFIRST 0x00300000 -#define IXGBE_FCERR_OOOSEQ 0x00400000 -#define IXGBE_FCERR_NODMA 0x00500000 -#define IXGBE_FCERR_PKTLOST 0x00600000 - -/* FCoE DDP for target mode */ -#define __IXGBE_FCOE_TARGET 1 - -struct ixgbe_fcoe_ddp { - int len; - u32 err; - unsigned int sgc; - struct scatterlist *sgl; - dma_addr_t udp; - u64 *udl; - struct pci_pool *pool; -}; - -struct ixgbe_fcoe { - struct pci_pool **pool; - atomic_t refcnt; - spinlock_t lock; - struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX]; - unsigned char *extra_ddp_buffer; - dma_addr_t extra_ddp_buffer_dma; - u64 __percpu *pcpu_noddp; - u64 __percpu *pcpu_noddp_ext_buff; - unsigned long mode; - u8 tc; - u8 up; - u8 up_set; -}; -#endif /* IXGBE_FCOE */ - -#endif /* _IXGBE_FCOE_H */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c deleted file mode 100644 index a5acf19c..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c +++ /dev/null @@ -1,2951 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/****************************************************************************** - Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code -******************************************************************************/ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_SCTP -#include -#endif -#include -#include -#ifdef NETIF_F_TSO -#include -#ifdef NETIF_F_TSO6 -#include -#endif -#endif -#ifdef SIOCETHTOOL -#include -#endif - -#include "ixgbe.h" - -#undef CONFIG_DCA -#undef CONFIG_DCA_MODULE - -char ixgbe_driver_name[] = "ixgbe"; -#define DRV_HW_PERF - -#ifndef CONFIG_IXGBE_NAPI -#define DRIVERNAPI -#else -#define DRIVERNAPI "-NAPI" -#endif - -#define FPGA - -#define VMDQ_TAG - -#define MAJ 3 -#define MIN 9 -#define BUILD 17 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ - __stringify(BUILD) DRIVERNAPI DRV_HW_PERF FPGA VMDQ_TAG -const char ixgbe_driver_version[] = DRV_VERSION; - -/* ixgbe_pci_tbl - PCI Device ID Table - * - * Wildcard entries (PCI_ANY_ID) should come last - * Last entry must be all 0s - * - * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, - * Class, Class Mask, private data (not used) } - */ -const struct pci_device_id ixgbe_pci_tbl[] = { - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT2)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_XF_LR)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_BX)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KR)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_EM)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_T3_LOM)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP)}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)}, - /* required last entry */ - {0, } -}; - -#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) -static int ixgbe_notify_dca(struct notifier_block *, unsigned long event, - void *p); -static struct notifier_block dca_notifier = { - .notifier_call = ixgbe_notify_dca, - .next = NULL, - .priority = 0 -}; - -#endif -MODULE_AUTHOR("Intel Corporation, "); -MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); - -#define DEFAULT_DEBUG_LEVEL_SHIFT 3 - - -static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter) -{ - u32 ctrl_ext; - - /* Let firmware take over control of h/w */ - ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, - ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD); -} - -#ifdef NO_VNIC -static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter) -{ - u32 ctrl_ext; - - /* Let firmware know the driver has taken over */ - ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, - ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD); -} -#endif - - -static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct ixgbe_hw_stats *hwstats = &adapter->stats; - int i; - u32 data; - - if ((hw->fc.current_mode != ixgbe_fc_full) && - (hw->fc.current_mode != ixgbe_fc_rx_pause)) - return; - - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); - break; - default: - data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); - } - hwstats->lxoffrxc += data; - - /* refill credits (no tx hang) if we received xoff */ - if (!data) - return; - - for (i = 0; i < adapter->num_tx_queues; i++) - clear_bit(__IXGBE_HANG_CHECK_ARMED, - &adapter->tx_ring[i]->state); -} - -static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct ixgbe_hw_stats *hwstats = &adapter->stats; - u32 xoff[8] = {0}; - int i; - bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; - -#ifdef HAVE_DCBNL_IEEE - if (adapter->ixgbe_ieee_pfc) - pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); - -#endif - if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) { - ixgbe_update_xoff_rx_lfc(adapter); - return; - } - - /* update stats for each tc, only valid with PFC enabled */ - for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) { - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); - break; - default: - xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i)); - } - hwstats->pxoffrxc[i] += xoff[i]; - } - - /* disarm tx queues that have received xoff frames */ - for (i = 0; i < adapter->num_tx_queues; i++) { - struct ixgbe_ring *tx_ring = adapter->tx_ring[i]; - u8 tc = tx_ring->dcb_tc; - - if ((tc <= 7) && (xoff[tc])) - clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state); - } -} - - - - -#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 - - - - -#ifdef HAVE_8021P_SUPPORT -/** - * ixgbe_vlan_stripping_disable - helper to disable vlan tag stripping - * @adapter: driver data - */ -void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 vlnctrl; - int i; - - /* leave vlan tag stripping enabled for DCB */ - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) - return; - - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl &= ~IXGBE_VLNCTRL_VME; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - for (i = 0; i < adapter->num_rx_queues; i++) { - u8 reg_idx = adapter->rx_ring[i]->reg_idx; - vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx)); - vlnctrl &= ~IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl); - } - break; - default: - break; - } -} - -#endif -/** - * ixgbe_vlan_stripping_enable - helper to enable vlan tag stripping - * @adapter: driver data - */ -void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 vlnctrl; - int i; - - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl |= IXGBE_VLNCTRL_VME; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - for (i = 0; i < adapter->num_rx_queues; i++) { - u8 reg_idx = adapter->rx_ring[i]->reg_idx; - vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx)); - vlnctrl |= IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl); - } - break; - default: - break; - } -} - -#ifdef HAVE_VLAN_RX_REGISTER -void ixgbe_vlan_mode(struct net_device *netdev, struct vlan_group *grp) -#else -void ixgbe_vlan_mode(struct net_device *netdev, u32 features) -#endif -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); -#ifdef HAVE_8021P_SUPPORT - bool enable; -#endif -#ifdef HAVE_VLAN_RX_REGISTER - - //if (!test_bit(__IXGBE_DOWN, &adapter->state)) - // ixgbe_irq_disable(adapter); - - adapter->vlgrp = grp; - - //if (!test_bit(__IXGBE_DOWN, &adapter->state)) - // ixgbe_irq_enable(adapter, true, true); -#endif -#ifdef HAVE_8021P_SUPPORT -#ifdef HAVE_VLAN_RX_REGISTER - enable = (grp || (adapter->flags & IXGBE_FLAG_DCB_ENABLED)); -#else - enable = !!(features & NETIF_F_HW_VLAN_RX); -#endif - if (enable) - /* enable VLAN tag insert/strip */ - ixgbe_vlan_stripping_enable(adapter); - else - /* disable VLAN tag insert/strip */ - ixgbe_vlan_stripping_disable(adapter); - -#endif -} - -static u8 *ixgbe_addr_list_itr(struct ixgbe_hw *hw, u8 **mc_addr_ptr, u32 *vmdq) -{ -#ifdef NETDEV_HW_ADDR_T_MULTICAST - struct netdev_hw_addr *mc_ptr; -#else - struct dev_mc_list *mc_ptr; -#endif - struct ixgbe_adapter *adapter = hw->back; - u8 *addr = *mc_addr_ptr; - - *vmdq = adapter->num_vfs; - -#ifdef NETDEV_HW_ADDR_T_MULTICAST - mc_ptr = container_of(addr, struct netdev_hw_addr, addr[0]); - if (mc_ptr->list.next) { - struct netdev_hw_addr *ha; - - ha = list_entry(mc_ptr->list.next, struct netdev_hw_addr, list); - *mc_addr_ptr = ha->addr; - } -#else - mc_ptr = container_of(addr, struct dev_mc_list, dmi_addr[0]); - if (mc_ptr->next) - *mc_addr_ptr = mc_ptr->next->dmi_addr; -#endif - else - *mc_addr_ptr = NULL; - - return addr; -} - -/** - * ixgbe_write_mc_addr_list - write multicast addresses to MTA - * @netdev: network interface device structure - * - * Writes multicast address list to the MTA hash table. - * Returns: -ENOMEM on failure - * 0 on no addresses written - * X on writing X addresses to MTA - **/ -int ixgbe_write_mc_addr_list(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; -#ifdef NETDEV_HW_ADDR_T_MULTICAST - struct netdev_hw_addr *ha; -#endif - u8 *addr_list = NULL; - int addr_count = 0; - - if (!hw->mac.ops.update_mc_addr_list) - return -ENOMEM; - - if (!netif_running(netdev)) - return 0; - - - hw->mac.ops.update_mc_addr_list(hw, NULL, 0, - ixgbe_addr_list_itr, true); - - if (!netdev_mc_empty(netdev)) { -#ifdef NETDEV_HW_ADDR_T_MULTICAST - ha = list_first_entry(&netdev->mc.list, - struct netdev_hw_addr, list); - addr_list = ha->addr; -#else - addr_list = netdev->mc_list->dmi_addr; -#endif - addr_count = netdev_mc_count(netdev); - - hw->mac.ops.update_mc_addr_list(hw, addr_list, addr_count, - ixgbe_addr_list_itr, false); - } - -#ifdef CONFIG_PCI_IOV - //ixgbe_restore_vf_multicasts(adapter); -#endif - return addr_count; -} - - -void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) { - hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr, - adapter->mac_table[i].queue, - IXGBE_RAH_AV); - } else { - hw->mac.ops.clear_rar(hw, i); - } - } -} - -void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) { - if (adapter->mac_table[i].state & - IXGBE_MAC_STATE_IN_USE) { - hw->mac.ops.set_rar(hw, i, - adapter->mac_table[i].addr, - adapter->mac_table[i].queue, - IXGBE_RAH_AV); - } else { - hw->mac.ops.clear_rar(hw, i); - } - adapter->mac_table[i].state &= - ~(IXGBE_MAC_STATE_MODIFIED); - } - } -} - -int ixgbe_available_rars(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - int i, count = 0; - - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state == 0) - count++; - } - return count; -} - -int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) -{ - struct ixgbe_hw *hw = &adapter->hw; - int i; - - if (is_zero_ether_addr(addr)) - return 0; - - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) - continue; - adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED | - IXGBE_MAC_STATE_IN_USE); - memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN); - adapter->mac_table[i].queue = queue; - ixgbe_sync_mac_table(adapter); - return i; - } - return -ENOMEM; -} - -void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter) -{ - int i; - struct ixgbe_hw *hw = &adapter->hw; - - for (i = 0; i < hw->mac.num_rar_entries; i++) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); - adapter->mac_table[i].queue = 0; - } - ixgbe_sync_mac_table(adapter); -} - -void ixgbe_del_mac_filter_by_index(struct ixgbe_adapter *adapter, int index) -{ - adapter->mac_table[index].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[index].state &= ~IXGBE_MAC_STATE_IN_USE; - memset(adapter->mac_table[index].addr, 0, ETH_ALEN); - adapter->mac_table[index].queue = 0; - ixgbe_sync_mac_table(adapter); -} - -int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8* addr, u16 queue) -{ - /* search table for addr, if found, set to 0 and sync */ - int i; - struct ixgbe_hw *hw = &adapter->hw; - - if (is_zero_ether_addr(addr)) - return 0; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (ether_addr_equal(addr, adapter->mac_table[i].addr) && - adapter->mac_table[i].queue == queue) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); - adapter->mac_table[i].queue = 0; - ixgbe_sync_mac_table(adapter); - return 0; - } - } - return -ENOMEM; -} -#ifdef HAVE_SET_RX_MODE -/** - * ixgbe_write_uc_addr_list - write unicast addresses to RAR table - * @netdev: network interface device structure - * - * Writes unicast address list to the RAR table. - * Returns: -ENOMEM on failure/insufficient address space - * 0 on no addresses written - * X on writing X addresses to the RAR table - **/ -int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter, - struct net_device *netdev, unsigned int vfn) -{ - int count = 0; - - /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter)) - return -ENOMEM; - - if (!netdev_uc_empty(netdev)) { -#ifdef NETDEV_HW_ADDR_T_UNICAST - struct netdev_hw_addr *ha; -#else - struct dev_mc_list *ha; -#endif - netdev_for_each_uc_addr(ha, netdev) { -#ifdef NETDEV_HW_ADDR_T_UNICAST - ixgbe_del_mac_filter(adapter, ha->addr, (u16)vfn); - ixgbe_add_mac_filter(adapter, ha->addr, (u16)vfn); -#else - ixgbe_del_mac_filter(adapter, ha->da_addr, (u16)vfn); - ixgbe_add_mac_filter(adapter, ha->da_addr, (u16)vfn); -#endif - count++; - } - } - return count; -} - -#endif -/** - * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set - * @netdev: network interface device structure - * - * The set_rx_method entry point is called whenever the unicast/multicast - * address list or the network interface flags are updated. This routine is - * responsible for configuring the hardware for proper unicast, multicast and - * promiscuous mode. - **/ -void ixgbe_set_rx_mode(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE; - u32 vlnctrl; - int count; - - /* Check for Promiscuous and All Multicast modes */ - fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - - /* set all bits that we expect to always be set */ - fctrl |= IXGBE_FCTRL_BAM; - fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */ - fctrl |= IXGBE_FCTRL_PMCF; - - /* clear the bits we are changing the status of */ - fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); - - if (netdev->flags & IFF_PROMISC) { - hw->addr_ctrl.user_set_promisc = true; - fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - vmolr |= IXGBE_VMOLR_MPE; - } else { - if (netdev->flags & IFF_ALLMULTI) { - fctrl |= IXGBE_FCTRL_MPE; - vmolr |= IXGBE_VMOLR_MPE; - } else { - /* - * Write addresses to the MTA, if the attempt fails - * then we should just turn on promiscuous mode so - * that we can at least receive multicast traffic - */ - count = ixgbe_write_mc_addr_list(netdev); - if (count < 0) { - fctrl |= IXGBE_FCTRL_MPE; - vmolr |= IXGBE_VMOLR_MPE; - } else if (count) { - vmolr |= IXGBE_VMOLR_ROMPE; - } - } -#ifdef NETIF_F_HW_VLAN_TX - /* enable hardware vlan filtering */ - vlnctrl |= IXGBE_VLNCTRL_VFE; -#endif - hw->addr_ctrl.user_set_promisc = false; -#ifdef HAVE_SET_RX_MODE - /* - * Write addresses to available RAR registers, if there is not - * sufficient space to store all the addresses then enable - * unicast promiscuous mode - */ - count = ixgbe_write_uc_addr_list(adapter, netdev, - adapter->num_vfs); - if (count < 0) { - fctrl |= IXGBE_FCTRL_UPE; - vmolr |= IXGBE_VMOLR_ROPE; - } -#endif - } - - if (hw->mac.type != ixgbe_mac_82598EB) { - vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(adapter->num_vfs)) & - ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE | - IXGBE_VMOLR_ROPE); - IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr); - } - - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); -} - - - - - - - - -/* Additional bittime to account for IXGBE framing */ -#define IXGBE_ETH_FRAMING 20 - -/* - * ixgbe_hpbthresh - calculate high water mark for flow control - * - * @adapter: board private structure to calculate for - * @pb - packet buffer to calculate - */ -static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct net_device *dev = adapter->netdev; - int link, tc, kb, marker; - u32 dv_id, rx_pba; - - /* Calculate max LAN frame size */ - tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING; - -#ifdef IXGBE_FCOE - /* FCoE traffic class uses FCOE jumbo frames */ - if (dev->features & NETIF_F_FCOE_MTU) { - int fcoe_pb = 0; - - fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up); - - if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) - tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; - } -#endif - - /* Calculate delay value for device */ - switch (hw->mac.type) { - case ixgbe_mac_X540: - dv_id = IXGBE_DV_X540(link, tc); - break; - default: - dv_id = IXGBE_DV(link, tc); - break; - } - - /* Loopback switch introduces additional latency */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - dv_id += IXGBE_B2BT(tc); - - /* Delay value is calculated in bit times convert to KB */ - kb = IXGBE_BT2KB(dv_id); - rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10; - - marker = rx_pba - kb; - - /* It is possible that the packet buffer is not large enough - * to provide required headroom. In this case throw an error - * to user and a do the best we can. - */ - if (marker < 0) { - e_warn(drv, "Packet Buffer(%i) can not provide enough" - "headroom to suppport flow control." - "Decrease MTU or number of traffic classes\n", pb); - marker = tc + 1; - } - - return marker; -} - -/* - * ixgbe_lpbthresh - calculate low water mark for for flow control - * - * @adapter: board private structure to calculate for - * @pb - packet buffer to calculate - */ -static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct net_device *dev = adapter->netdev; - int tc; - u32 dv_id; - - /* Calculate max LAN frame size */ - tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN; - -#ifdef IXGBE_FCOE - /* FCoE traffic class uses FCOE jumbo frames */ - if (dev->features & NETIF_F_FCOE_MTU) { - int fcoe_pb = 0; - - fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up); - - if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) - tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; - } -#endif - - /* Calculate delay value for device */ - switch (hw->mac.type) { - case ixgbe_mac_X540: - dv_id = IXGBE_LOW_DV_X540(tc); - break; - default: - dv_id = IXGBE_LOW_DV(tc); - break; - } - - /* Delay value is calculated in bit times convert to KB */ - return IXGBE_BT2KB(dv_id); -} - -/* - * ixgbe_pbthresh_setup - calculate and setup high low water marks - */ -static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - int num_tc = netdev_get_num_tc(adapter->netdev); - int i; - - if (!num_tc) - num_tc = 1; - if (num_tc > IXGBE_DCB_MAX_TRAFFIC_CLASS) - num_tc = IXGBE_DCB_MAX_TRAFFIC_CLASS; - - for (i = 0; i < num_tc; i++) { - hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i); - hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i); - - /* Low water marks must not be larger than high water marks */ - if (hw->fc.low_water[i] > hw->fc.high_water[i]) - hw->fc.low_water[i] = 0; - } - - for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) - hw->fc.high_water[i] = 0; -} - - - -#ifdef NO_VNIC -static void ixgbe_configure(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - - ixgbe_configure_pb(adapter); - ixgbe_configure_dcb(adapter); - - ixgbe_set_rx_mode(adapter->netdev); -#ifdef NETIF_F_HW_VLAN_TX - ixgbe_restore_vlan(adapter); -#endif - -#ifdef IXGBE_FCOE - if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) - ixgbe_configure_fcoe(adapter); - -#endif /* IXGBE_FCOE */ - - if (adapter->hw.mac.type != ixgbe_mac_82598EB) - hw->mac.ops.disable_sec_rx_path(hw); - - if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { - ixgbe_init_fdir_signature_82599(&adapter->hw, - adapter->fdir_pballoc); - } else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { - ixgbe_init_fdir_perfect_82599(&adapter->hw, - adapter->fdir_pballoc); - ixgbe_fdir_filter_restore(adapter); - } - - if (adapter->hw.mac.type != ixgbe_mac_82598EB) - hw->mac.ops.enable_sec_rx_path(hw); - - ixgbe_configure_virtualization(adapter); - - ixgbe_configure_tx(adapter); - ixgbe_configure_rx(adapter); -} -#endif - -static bool ixgbe_is_sfp(struct ixgbe_hw *hw) -{ - switch (hw->phy.type) { - case ixgbe_phy_sfp_avago: - case ixgbe_phy_sfp_ftl: - case ixgbe_phy_sfp_intel: - case ixgbe_phy_sfp_unknown: - case ixgbe_phy_sfp_passive_tyco: - case ixgbe_phy_sfp_passive_unknown: - case ixgbe_phy_sfp_active_unknown: - case ixgbe_phy_sfp_ftl_active: - return true; - case ixgbe_phy_nl: - if (hw->mac.type == ixgbe_mac_82598EB) - return true; - default: - return false; - } -} - - -/** - * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset - * @adapter: board private structure - * - * On a reset we need to clear out the VF stats or accounting gets - * messed up because they're not clear on read. - **/ -void ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - int i; - - for (i = 0; i < adapter->num_vfs; i++) { - adapter->vfinfo[i].last_vfstats.gprc = - IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i)); - adapter->vfinfo[i].saved_rst_vfstats.gprc += - adapter->vfinfo[i].vfstats.gprc; - adapter->vfinfo[i].vfstats.gprc = 0; - adapter->vfinfo[i].last_vfstats.gptc = - IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i)); - adapter->vfinfo[i].saved_rst_vfstats.gptc += - adapter->vfinfo[i].vfstats.gptc; - adapter->vfinfo[i].vfstats.gptc = 0; - adapter->vfinfo[i].last_vfstats.gorc = - IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i)); - adapter->vfinfo[i].saved_rst_vfstats.gorc += - adapter->vfinfo[i].vfstats.gorc; - adapter->vfinfo[i].vfstats.gorc = 0; - adapter->vfinfo[i].last_vfstats.gotc = - IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i)); - adapter->vfinfo[i].saved_rst_vfstats.gotc += - adapter->vfinfo[i].vfstats.gotc; - adapter->vfinfo[i].vfstats.gotc = 0; - adapter->vfinfo[i].last_vfstats.mprc = - IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i)); - adapter->vfinfo[i].saved_rst_vfstats.mprc += - adapter->vfinfo[i].vfstats.mprc; - adapter->vfinfo[i].vfstats.mprc = 0; - } -} - - - -void ixgbe_reinit_locked(struct ixgbe_adapter *adapter) -{ -#ifdef NO_VNIC - WARN_ON(in_interrupt()); - /* put off any impending NetWatchDogTimeout */ - adapter->netdev->trans_start = jiffies; - - while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - ixgbe_down(adapter); - /* - * If SR-IOV enabled then wait a bit before bringing the adapter - * back up to give the VFs time to respond to the reset. The - * two second wait is based upon the watchdog timer cycle in - * the VF driver. - */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - msleep(2000); - ixgbe_up(adapter); - clear_bit(__IXGBE_RESETTING, &adapter->state); -#endif -} - -void ixgbe_up(struct ixgbe_adapter *adapter) -{ - /* hardware has been reset, we need to reload some things */ - //ixgbe_configure(adapter); - - //ixgbe_up_complete(adapter); -} - -void ixgbe_reset(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - int err; - - /* lock SFP init bit to prevent race conditions with the watchdog */ - while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state)) - usleep_range(1000, 2000); - - /* clear all SFP and link config related flags while holding SFP_INIT */ - adapter->flags2 &= ~(IXGBE_FLAG2_SEARCH_FOR_SFP | - IXGBE_FLAG2_SFP_NEEDS_RESET); - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG; - - err = hw->mac.ops.init_hw(hw); - switch (err) { - case 0: - case IXGBE_ERR_SFP_NOT_PRESENT: - case IXGBE_ERR_SFP_NOT_SUPPORTED: - break; - case IXGBE_ERR_MASTER_REQUESTS_PENDING: - e_dev_err("master disable timed out\n"); - break; - case IXGBE_ERR_EEPROM_VERSION: - /* We are running on a pre-production device, log a warning */ - e_dev_warn("This device is a pre-production adapter/LOM. " - "Please be aware there may be issues associated " - "with your hardware. If you are experiencing " - "problems please contact your Intel or hardware " - "representative who provided you with this " - "hardware.\n"); - break; - default: - e_dev_err("Hardware Error: %d\n", err); - } - - clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); - - ixgbe_flush_sw_mac_table(adapter); - memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr, - netdev->addr_len); - adapter->mac_table[0].queue = adapter->num_vfs; - adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | - IXGBE_MAC_STATE_IN_USE); - hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, - adapter->mac_table[0].queue, - IXGBE_RAH_AV); -} - - - - - - -void ixgbe_down(struct ixgbe_adapter *adapter) -{ -#ifdef NO_VNIC - struct net_device *netdev = adapter->netdev; - struct ixgbe_hw *hw = &adapter->hw; - u32 rxctrl; - int i; - - /* signal that we are down to the interrupt handler */ - set_bit(__IXGBE_DOWN, &adapter->state); - - /* disable receives */ - rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); - IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN); - - /* disable all enabled rx queues */ - for (i = 0; i < adapter->num_rx_queues; i++) - /* this call also flushes the previous write */ - ixgbe_disable_rx_queue(adapter, adapter->rx_ring[i]); - - usleep_range(10000, 20000); - - netif_tx_stop_all_queues(netdev); - - /* call carrier off first to avoid false dev_watchdog timeouts */ - netif_carrier_off(netdev); - netif_tx_disable(netdev); - - ixgbe_irq_disable(adapter); - - ixgbe_napi_disable_all(adapter); - - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; - - del_timer_sync(&adapter->service_timer); - - if (adapter->num_vfs) { - /* Clear EITR Select mapping */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, 0); - - /* Mark all the VFs as inactive */ - for (i = 0 ; i < adapter->num_vfs; i++) - adapter->vfinfo[i].clear_to_send = 0; - - /* ping all the active vfs to let them know we are going down */ - ixgbe_ping_all_vfs(adapter); - - /* Disable all VFTE/VFRE TX/RX */ - ixgbe_disable_tx_rx(adapter); - } - - /* disable transmits in the hardware now that interrupts are off */ - for (i = 0; i < adapter->num_tx_queues; i++) { - u8 reg_idx = adapter->tx_ring[i]->reg_idx; - IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH); - } - - /* Disable the Tx DMA engine on 82599 and X540 */ - switch (hw->mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, - (IXGBE_READ_REG(hw, IXGBE_DMATXCTL) & - ~IXGBE_DMATXCTL_TE)); - break; - default: - break; - } - -#ifdef HAVE_PCI_ERS - if (!pci_channel_offline(adapter->pdev)) -#endif - ixgbe_reset(adapter); - /* power down the optics */ - if ((hw->phy.multispeed_fiber) || - ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && - (hw->mac.type == ixgbe_mac_82599EB))) - ixgbe_disable_tx_laser(hw); - - ixgbe_clean_all_tx_rings(adapter); - ixgbe_clean_all_rx_rings(adapter); - -#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) - /* since we reset the hardware DCA settings were cleared */ - ixgbe_setup_dca(adapter); -#endif - -#endif /* NO_VNIC */ -} - -#ifndef NO_VNIC - -#undef IXGBE_FCOE - -/* Artificial max queue cap per traffic class in DCB mode */ -#define DCB_QUEUE_CAP 8 - -/** - * ixgbe_set_dcb_queues: Allocate queues for a DCB-enabled device - * @adapter: board private structure to initialize - * - * When DCB (Data Center Bridging) is enabled, allocate queues for - * each traffic class. If multiqueue isn't available,then abort DCB - * initialization. - * - * This function handles all combinations of DCB, RSS, and FCoE. - * - **/ -static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter) -{ - int tcs; -#ifdef HAVE_MQPRIO - int rss_i, i, offset = 0; - struct net_device *dev = adapter->netdev; - - /* Map queue offset and counts onto allocated tx queues */ - tcs = netdev_get_num_tc(dev); - - if (!tcs) - return false; - - rss_i = min_t(int, dev->num_tx_queues / tcs, num_online_cpus()); - - if (rss_i > DCB_QUEUE_CAP) - rss_i = DCB_QUEUE_CAP; - - for (i = 0; i < tcs; i++) { - netdev_set_tc_queue(dev, i, rss_i, offset); - offset += rss_i; - } - - adapter->num_tx_queues = rss_i * tcs; - adapter->num_rx_queues = rss_i * tcs; - -#ifdef IXGBE_FCOE - /* FCoE enabled queues require special configuration indexed - * by feature specific indices and mask. Here we map FCoE - * indices onto the DCB queue pairs allowing FCoE to own - * configuration later. - */ - - if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { - struct ixgbe_ring_feature *f; - int tc; - u8 prio_tc[IXGBE_DCB_MAX_USER_PRIORITY] = {0}; - - ixgbe_dcb_unpack_map_cee(&adapter->dcb_cfg, - IXGBE_DCB_TX_CONFIG, - prio_tc); - tc = prio_tc[adapter->fcoe.up]; - - f = &adapter->ring_feature[RING_F_FCOE]; - f->indices = min_t(int, rss_i, f->indices); - f->mask = rss_i * tc; - } -#endif /* IXGBE_FCOE */ -#else - if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) - return false; - - /* Enable one Queue per traffic class */ - tcs = adapter->tc; - if (!tcs) - return false; - -#ifdef IXGBE_FCOE - if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { - struct ixgbe_ring_feature *f; - int tc = netdev_get_prio_tc_map(adapter->netdev, - adapter->fcoe.up); - - f = &adapter->ring_feature[RING_F_FCOE]; - - /* - * We have max 8 queues for FCoE, where 8 the is - * FCoE redirection table size. We must also share - * ring resources with network traffic so if FCoE TC is - * 4 or greater and we are in 8 TC mode we can only use - * 7 queues. - */ - if ((tcs > 4) && (tc >= 4) && (f->indices > 7)) - f->indices = 7; - - f->indices = min_t(int, num_online_cpus(), f->indices); - f->mask = tcs; - - adapter->num_rx_queues = f->indices + tcs; - adapter->num_tx_queues = f->indices + tcs; - - return true; - } - -#endif /* IXGBE_FCOE */ - adapter->num_rx_queues = tcs; - adapter->num_tx_queues = tcs; -#endif /* HAVE_MQ */ - - return true; -} - -/** - * ixgbe_set_vmdq_queues: Allocate queues for VMDq devices - * @adapter: board private structure to initialize - * - * When VMDq (Virtual Machine Devices queue) is enabled, allocate queues - * and VM pools where appropriate. If RSS is available, then also try and - * enable RSS and map accordingly. - * - **/ -static bool ixgbe_set_vmdq_queues(struct ixgbe_adapter *adapter) -{ - int vmdq_i = adapter->ring_feature[RING_F_VMDQ].indices; - int vmdq_m = 0; - int rss_i = adapter->ring_feature[RING_F_RSS].indices; - unsigned long i; - int rss_shift; - bool ret = false; - - - switch (adapter->flags & (IXGBE_FLAG_RSS_ENABLED - | IXGBE_FLAG_DCB_ENABLED - | IXGBE_FLAG_VMDQ_ENABLED)) { - - case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED): - switch (adapter->hw.mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - vmdq_i = min((int)IXGBE_MAX_VMDQ_INDICES, vmdq_i); - if (vmdq_i > 32) - rss_i = 2; - else - rss_i = 4; - i = rss_i; - rss_shift = find_first_bit(&i, sizeof(i) * 8); - vmdq_m = ((IXGBE_MAX_VMDQ_INDICES - 1) << - rss_shift) & (MAX_RX_QUEUES - 1); - break; - default: - break; - } - adapter->num_rx_queues = vmdq_i * rss_i; - adapter->num_tx_queues = min((int)MAX_TX_QUEUES, vmdq_i * rss_i); - ret = true; - break; - - case (IXGBE_FLAG_VMDQ_ENABLED): - switch (adapter->hw.mac.type) { - case ixgbe_mac_82598EB: - vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1) << 1; - break; - default: - break; - } - adapter->num_rx_queues = vmdq_i; - adapter->num_tx_queues = vmdq_i; - ret = true; - break; - - default: - ret = false; - goto vmdq_queues_out; - } - - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { - adapter->num_rx_pools = vmdq_i; - adapter->num_rx_queues_per_pool = adapter->num_rx_queues / - vmdq_i; - } else { - adapter->num_rx_pools = adapter->num_rx_queues; - adapter->num_rx_queues_per_pool = 1; - } - /* save the mask for later use */ - adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m; -vmdq_queues_out: - return ret; -} - -/** - * ixgbe_set_rss_queues: Allocate queues for RSS - * @adapter: board private structure to initialize - * - * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try - * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU. - * - **/ -static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter) -{ - struct ixgbe_ring_feature *f; - - if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) { - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - return false; - } - - /* set mask for 16 queue limit of RSS */ - f = &adapter->ring_feature[RING_F_RSS]; - f->mask = 0xF; - - /* - * Use Flow Director in addition to RSS to ensure the best - * distribution of flows across cores, even when an FDIR flow - * isn't matched. - */ - if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { - f = &adapter->ring_feature[RING_F_FDIR]; - - f->indices = min_t(int, num_online_cpus(), f->indices); - f->mask = 0; - } - - adapter->num_rx_queues = f->indices; -#ifdef HAVE_TX_MQ - adapter->num_tx_queues = f->indices; -#endif - - return true; -} - -#ifdef IXGBE_FCOE -/** - * ixgbe_set_fcoe_queues: Allocate queues for Fiber Channel over Ethernet (FCoE) - * @adapter: board private structure to initialize - * - * FCoE RX FCRETA can use up to 8 rx queues for up to 8 different exchanges. - * The ring feature mask is not used as a mask for FCoE, as it can take any 8 - * rx queues out of the max number of rx queues, instead, it is used as the - * index of the first rx queue used by FCoE. - * - **/ -static bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter) -{ - struct ixgbe_ring_feature *f; - - if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) - return false; - - ixgbe_set_rss_queues(adapter); - - f = &adapter->ring_feature[RING_F_FCOE]; - f->indices = min_t(int, num_online_cpus(), f->indices); - - /* adding FCoE queues */ - f->mask = adapter->num_rx_queues; - adapter->num_rx_queues += f->indices; - adapter->num_tx_queues += f->indices; - - return true; -} - -#endif /* IXGBE_FCOE */ -/* - * ixgbe_set_num_queues: Allocate queues for device, feature dependent - * @adapter: board private structure to initialize - * - * This is the top level queue allocation routine. The order here is very - * important, starting with the "most" number of features turned on at once, - * and ending with the smallest set of features. This way large combinations - * can be allocated if they're turned on, and smaller combinations are the - * fallthrough conditions. - * - **/ -static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) -{ - /* Start with base case */ - adapter->num_rx_queues = 1; - adapter->num_tx_queues = 1; - adapter->num_rx_pools = adapter->num_rx_queues; - adapter->num_rx_queues_per_pool = 1; - - if (ixgbe_set_vmdq_queues(adapter)) - return; - - if (ixgbe_set_dcb_queues(adapter)) - return; - -#ifdef IXGBE_FCOE - if (ixgbe_set_fcoe_queues(adapter)) - return; - -#endif /* IXGBE_FCOE */ - ixgbe_set_rss_queues(adapter); -} - -#endif - - -/** - * ixgbe_sw_init - Initialize general software structures (struct ixgbe_adapter) - * @adapter: board private structure to initialize - * - * ixgbe_sw_init initializes the Adapter private data structure. - * Fields are initialized based on PCI device information and - * OS network device settings (MTU size). - **/ -static int ixgbe_sw_init(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct pci_dev *pdev = adapter->pdev; - int err; - - /* PCI config space info */ - - hw->vendor_id = pdev->vendor; - hw->device_id = pdev->device; - pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); - hw->subsystem_vendor_id = pdev->subsystem_vendor; - hw->subsystem_device_id = pdev->subsystem_device; - - err = ixgbe_init_shared_code(hw); - if (err) { - e_err(probe, "init_shared_code failed: %d\n", err); - goto out; - } - adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * - hw->mac.num_rar_entries, - GFP_ATOMIC); - /* Set capability flags */ - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - adapter->flags |= IXGBE_FLAG_MSI_CAPABLE | - IXGBE_FLAG_MSIX_CAPABLE | - IXGBE_FLAG_MQ_CAPABLE | - IXGBE_FLAG_RSS_CAPABLE; - adapter->flags |= IXGBE_FLAG_DCB_CAPABLE; -#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) - adapter->flags |= IXGBE_FLAG_DCA_CAPABLE; -#endif - adapter->flags &= ~IXGBE_FLAG_SRIOV_CAPABLE; - adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE; - - if (hw->device_id == IXGBE_DEV_ID_82598AT) - adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE; - - adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82598; - break; - case ixgbe_mac_X540: - adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; - case ixgbe_mac_82599EB: - adapter->flags |= IXGBE_FLAG_MSI_CAPABLE | - IXGBE_FLAG_MSIX_CAPABLE | - IXGBE_FLAG_MQ_CAPABLE | - IXGBE_FLAG_RSS_CAPABLE; - adapter->flags |= IXGBE_FLAG_DCB_CAPABLE; -#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) - adapter->flags |= IXGBE_FLAG_DCA_CAPABLE; -#endif - adapter->flags |= IXGBE_FLAG_SRIOV_CAPABLE; - adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; -#ifdef IXGBE_FCOE - adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE; - adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; - adapter->ring_feature[RING_F_FCOE].indices = 0; -#ifdef CONFIG_DCB - /* Default traffic class to use for FCoE */ - adapter->fcoe.tc = IXGBE_FCOE_DEFTC; - adapter->fcoe.up = IXGBE_FCOE_DEFTC; - adapter->fcoe.up_set = IXGBE_FCOE_DEFTC; -#endif -#endif - if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) - adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; -#ifndef IXGBE_NO_SMART_SPEED - hw->phy.smart_speed = ixgbe_smart_speed_on; -#else - hw->phy.smart_speed = ixgbe_smart_speed_off; -#endif - adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82599; - default: - break; - } - - /* n-tuple support exists, always init our spinlock */ - //spin_lock_init(&adapter->fdir_perfect_lock); - - if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE) { - int j; - struct ixgbe_dcb_tc_config *tc; - int dcb_i = IXGBE_DCB_MAX_TRAFFIC_CLASS; - - - adapter->dcb_cfg.num_tcs.pg_tcs = dcb_i; - adapter->dcb_cfg.num_tcs.pfc_tcs = dcb_i; - for (j = 0; j < dcb_i; j++) { - tc = &adapter->dcb_cfg.tc_config[j]; - tc->path[IXGBE_DCB_TX_CONFIG].bwg_id = 0; - tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 100 / dcb_i; - tc->path[IXGBE_DCB_RX_CONFIG].bwg_id = 0; - tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 100 / dcb_i; - tc->pfc = ixgbe_dcb_pfc_disabled; - if (j == 0) { - /* total of all TCs bandwidth needs to be 100 */ - tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent += - 100 % dcb_i; - tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent += - 100 % dcb_i; - } - } - - /* Initialize default user to priority mapping, UPx->TC0 */ - tc = &adapter->dcb_cfg.tc_config[0]; - tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0xFF; - tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0xFF; - - adapter->dcb_cfg.bw_percentage[IXGBE_DCB_TX_CONFIG][0] = 100; - adapter->dcb_cfg.bw_percentage[IXGBE_DCB_RX_CONFIG][0] = 100; - adapter->dcb_cfg.rx_pba_cfg = ixgbe_dcb_pba_equal; - adapter->dcb_cfg.pfc_mode_enable = false; - adapter->dcb_cfg.round_robin_enable = false; - adapter->dcb_set_bitmap = 0x00; -#ifdef CONFIG_DCB - adapter->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE; -#endif /* CONFIG_DCB */ - - if (hw->mac.type == ixgbe_mac_X540) { - adapter->dcb_cfg.num_tcs.pg_tcs = 4; - adapter->dcb_cfg.num_tcs.pfc_tcs = 4; - } - } -#ifdef CONFIG_DCB - /* XXX does this need to be initialized even w/o DCB? */ - //memcpy(&adapter->temp_dcb_cfg, &adapter->dcb_cfg, - // sizeof(adapter->temp_dcb_cfg)); - -#endif - //if (hw->mac.type == ixgbe_mac_82599EB || - // hw->mac.type == ixgbe_mac_X540) - // hw->mbx.ops.init_params(hw); - - /* default flow control settings */ - hw->fc.requested_mode = ixgbe_fc_full; - hw->fc.current_mode = ixgbe_fc_full; /* init for ethtool output */ - - adapter->last_lfc_mode = hw->fc.current_mode; - ixgbe_pbthresh_setup(adapter); - hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE; - hw->fc.send_xon = true; - hw->fc.disable_fc_autoneg = false; - - /* set default ring sizes */ - adapter->tx_ring_count = IXGBE_DEFAULT_TXD; - adapter->rx_ring_count = IXGBE_DEFAULT_RXD; - - /* set default work limits */ - adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK; - adapter->rx_work_limit = IXGBE_DEFAULT_RX_WORK; - - set_bit(__IXGBE_DOWN, &adapter->state); -out: - return err; -} - -/** - * ixgbe_setup_tx_resources - allocate Tx resources (Descriptors) - * @tx_ring: tx descriptor ring (for a specific queue) to setup - * - * Return 0 on success, negative on failure - **/ -int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) -{ - struct device *dev = tx_ring->dev; - //int orig_node = dev_to_node(dev); - int numa_node = -1; - int size; - - size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; - - if (tx_ring->q_vector) - numa_node = tx_ring->q_vector->numa_node; - - tx_ring->tx_buffer_info = vzalloc_node(size, numa_node); - if (!tx_ring->tx_buffer_info) - tx_ring->tx_buffer_info = vzalloc(size); - if (!tx_ring->tx_buffer_info) - goto err; - - /* round up to nearest 4K */ - tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); - tx_ring->size = ALIGN(tx_ring->size, 4096); - - //set_dev_node(dev, numa_node); - //tx_ring->desc = dma_alloc_coherent(dev, - // tx_ring->size, - // &tx_ring->dma, - // GFP_KERNEL); - //set_dev_node(dev, orig_node); - //if (!tx_ring->desc) - // tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, - // &tx_ring->dma, GFP_KERNEL); - //if (!tx_ring->desc) - // goto err; - - return 0; - -err: - vfree(tx_ring->tx_buffer_info); - tx_ring->tx_buffer_info = NULL; - dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); - return -ENOMEM; -} - -/** - * ixgbe_setup_all_tx_resources - allocate all queues Tx resources - * @adapter: board private structure - * - * If this function returns with an error, then it's possible one or - * more of the rings is populated (while the rest are not). It is the - * callers duty to clean those orphaned rings. - * - * Return 0 on success, negative on failure - **/ -static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter) -{ - int i, err = 0; - - for (i = 0; i < adapter->num_tx_queues; i++) { - err = ixgbe_setup_tx_resources(adapter->tx_ring[i]); - if (!err) - continue; - e_err(probe, "Allocation for Tx Queue %u failed\n", i); - break; - } - - return err; -} - -/** - * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) - * @rx_ring: rx descriptor ring (for a specific queue) to setup - * - * Returns 0 on success, negative on failure - **/ -int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) -{ - struct device *dev = rx_ring->dev; - //int orig_node = dev_to_node(dev); - int numa_node = -1; - int size; - - size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; - - if (rx_ring->q_vector) - numa_node = rx_ring->q_vector->numa_node; - - rx_ring->rx_buffer_info = vzalloc_node(size, numa_node); - if (!rx_ring->rx_buffer_info) - rx_ring->rx_buffer_info = vzalloc(size); - if (!rx_ring->rx_buffer_info) - goto err; - - /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); - rx_ring->size = ALIGN(rx_ring->size, 4096); - -#ifdef NO_VNIC - set_dev_node(dev, numa_node); - rx_ring->desc = dma_alloc_coherent(dev, - rx_ring->size, - &rx_ring->dma, - GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!rx_ring->desc) - rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, - &rx_ring->dma, GFP_KERNEL); - if (!rx_ring->desc) - goto err; - -#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT - ixgbe_init_rx_page_offset(rx_ring); - -#endif - -#endif /* NO_VNIC */ - return 0; -err: - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; - dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); - return -ENOMEM; -} - -/** - * ixgbe_setup_all_rx_resources - allocate all queues Rx resources - * @adapter: board private structure - * - * If this function returns with an error, then it's possible one or - * more of the rings is populated (while the rest are not). It is the - * callers duty to clean those orphaned rings. - * - * Return 0 on success, negative on failure - **/ -static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter) -{ - int i, err = 0; - - for (i = 0; i < adapter->num_rx_queues; i++) { - err = ixgbe_setup_rx_resources(adapter->rx_ring[i]); - if (!err) - continue; - e_err(probe, "Allocation for Rx Queue %u failed\n", i); - break; - } - - return err; -} - -/** - * ixgbe_free_tx_resources - Free Tx Resources per Queue - * @tx_ring: Tx descriptor ring for a specific queue - * - * Free all transmit software resources - **/ -void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring) -{ - //ixgbe_clean_tx_ring(tx_ring); - - vfree(tx_ring->tx_buffer_info); - tx_ring->tx_buffer_info = NULL; - - /* if not set, then don't free */ - if (!tx_ring->desc) - return; - - //dma_free_coherent(tx_ring->dev, tx_ring->size, - // tx_ring->desc, tx_ring->dma); - - tx_ring->desc = NULL; -} - -/** - * ixgbe_free_all_tx_resources - Free Tx Resources for All Queues - * @adapter: board private structure - * - * Free all transmit software resources - **/ -static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) - if (adapter->tx_ring[i]->desc) - ixgbe_free_tx_resources(adapter->tx_ring[i]); -} - -/** - * ixgbe_free_rx_resources - Free Rx Resources - * @rx_ring: ring to clean the resources from - * - * Free all receive software resources - **/ -void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring) -{ - //ixgbe_clean_rx_ring(rx_ring); - - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; - - /* if not set, then don't free */ - if (!rx_ring->desc) - return; - - //dma_free_coherent(rx_ring->dev, rx_ring->size, - // rx_ring->desc, rx_ring->dma); - - rx_ring->desc = NULL; -} - -/** - * ixgbe_free_all_rx_resources - Free Rx Resources for All Queues - * @adapter: board private structure - * - * Free all receive software resources - **/ -static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_rx_queues; i++) - if (adapter->rx_ring[i]->desc) - ixgbe_free_rx_resources(adapter->rx_ring[i]); -} - - -/** - * ixgbe_open - Called when a network interface is made active - * @netdev: network interface device structure - * - * Returns 0 on success, negative value on failure - * - * The open entry point is called when a network interface is made - * active by the system (IFF_UP). At this point all resources needed - * for transmit and receive operations are allocated, the interrupt - * handler is registered with the OS, the watchdog timer is started, - * and the stack is notified that the interface is ready. - **/ -//static -int ixgbe_open(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - int err; - - /* disallow open during test */ - if (test_bit(__IXGBE_TESTING, &adapter->state)) - return -EBUSY; - - netif_carrier_off(netdev); - - /* allocate transmit descriptors */ - err = ixgbe_setup_all_tx_resources(adapter); - if (err) - goto err_setup_tx; - - /* allocate receive descriptors */ - err = ixgbe_setup_all_rx_resources(adapter); - if (err) - goto err_setup_rx; - -#ifdef NO_VNIC - ixgbe_configure(adapter); - - err = ixgbe_request_irq(adapter); - if (err) - goto err_req_irq; - - ixgbe_up_complete(adapter); - -err_req_irq: -#else - return 0; -#endif -err_setup_rx: - ixgbe_free_all_rx_resources(adapter); -err_setup_tx: - ixgbe_free_all_tx_resources(adapter); - ixgbe_reset(adapter); - - return err; -} - -/** - * ixgbe_close - Disables a network interface - * @netdev: network interface device structure - * - * Returns 0, this is not allowed to fail - * - * The close entry point is called when an interface is de-activated - * by the OS. The hardware is still under the drivers control, but - * needs to be disabled. A global MAC reset is issued to stop the - * hardware, and all transmit and receive resources are freed. - **/ -//static -int ixgbe_close(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - //ixgbe_down(adapter); - //ixgbe_free_irq(adapter); - - //ixgbe_fdir_filter_exit(adapter); - - //ixgbe_free_all_tx_resources(adapter); - //ixgbe_free_all_rx_resources(adapter); - - ixgbe_release_hw_control(adapter); - - return 0; -} - - - - - -/** - * ixgbe_get_stats - Get System Network Statistics - * @netdev: network interface device structure - * - * Returns the address of the device statistics structure. - * The statistics are actually updated from the timer callback. - **/ -//static -struct net_device_stats *ixgbe_get_stats(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - /* update the stats data */ - ixgbe_update_stats(adapter); - -#ifdef HAVE_NETDEV_STATS_IN_NETDEV - /* only return the current stats */ - return &netdev->stats; -#else - /* only return the current stats */ - return &adapter->net_stats; -#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ -} - -/** - * ixgbe_update_stats - Update the board statistics counters. - * @adapter: board private structure - **/ -void ixgbe_update_stats(struct ixgbe_adapter *adapter) -{ -#ifdef HAVE_NETDEV_STATS_IN_NETDEV - struct net_device_stats *net_stats = &adapter->netdev->stats; -#else - struct net_device_stats *net_stats = &adapter->net_stats; -#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ - struct ixgbe_hw *hw = &adapter->hw; - struct ixgbe_hw_stats *hwstats = &adapter->stats; - u64 total_mpc = 0; - u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot; - u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0; - u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0; - u64 bytes = 0, packets = 0, hw_csum_rx_error = 0; -#ifndef IXGBE_NO_LRO - u32 flushed = 0, coal = 0; - int num_q_vectors = 1; -#endif -#ifdef IXGBE_FCOE - struct ixgbe_fcoe *fcoe = &adapter->fcoe; - unsigned int cpu; - u64 fcoe_noddp_counts_sum = 0, fcoe_noddp_ext_buff_counts_sum = 0; -#endif /* IXGBE_FCOE */ - - printk(KERN_DEBUG "ixgbe_update_stats, tx_queues=%d, rx_queues=%d\n", - adapter->num_tx_queues, adapter->num_rx_queues); - - if (test_bit(__IXGBE_DOWN, &adapter->state) || - test_bit(__IXGBE_RESETTING, &adapter->state)) - return; - -#ifndef IXGBE_NO_LRO - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) - num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - -#endif - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { - u64 rsc_count = 0; - u64 rsc_flush = 0; - for (i = 0; i < adapter->num_rx_queues; i++) { - rsc_count += adapter->rx_ring[i]->rx_stats.rsc_count; - rsc_flush += adapter->rx_ring[i]->rx_stats.rsc_flush; - } - adapter->rsc_total_count = rsc_count; - adapter->rsc_total_flush = rsc_flush; - } - -#ifndef IXGBE_NO_LRO - for (i = 0; i < num_q_vectors; i++) { - struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; - if (!q_vector) - continue; - flushed += q_vector->lrolist.stats.flushed; - coal += q_vector->lrolist.stats.coal; - } - adapter->lro_stats.flushed = flushed; - adapter->lro_stats.coal = coal; - -#endif - for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *rx_ring = adapter->rx_ring[i]; - non_eop_descs += rx_ring->rx_stats.non_eop_descs; - alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed; - alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed; - hw_csum_rx_error += rx_ring->rx_stats.csum_err; - bytes += rx_ring->stats.bytes; - packets += rx_ring->stats.packets; - - } - adapter->non_eop_descs = non_eop_descs; - adapter->alloc_rx_page_failed = alloc_rx_page_failed; - adapter->alloc_rx_buff_failed = alloc_rx_buff_failed; - adapter->hw_csum_rx_error = hw_csum_rx_error; - net_stats->rx_bytes = bytes; - net_stats->rx_packets = packets; - - bytes = 0; - packets = 0; - /* gather some stats to the adapter struct that are per queue */ - for (i = 0; i < adapter->num_tx_queues; i++) { - struct ixgbe_ring *tx_ring = adapter->tx_ring[i]; - restart_queue += tx_ring->tx_stats.restart_queue; - tx_busy += tx_ring->tx_stats.tx_busy; - bytes += tx_ring->stats.bytes; - packets += tx_ring->stats.packets; - } - adapter->restart_queue = restart_queue; - adapter->tx_busy = tx_busy; - net_stats->tx_bytes = bytes; - net_stats->tx_packets = packets; - - hwstats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); - - /* 8 register reads */ - for (i = 0; i < 8; i++) { - /* for packet buffers not used, the register should read 0 */ - mpc = IXGBE_READ_REG(hw, IXGBE_MPC(i)); - missed_rx += mpc; - hwstats->mpc[i] += mpc; - total_mpc += hwstats->mpc[i]; - hwstats->pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); - hwstats->pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - hwstats->rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i)); - hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i)); - hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i)); - hwstats->pxonrxc[i] += - IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - hwstats->pxonrxc[i] += - IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); - break; - default: - break; - } - } - - /*16 register reads */ - for (i = 0; i < 16; i++) { - hwstats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); - hwstats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); - if ((hw->mac.type == ixgbe_mac_82599EB) || - (hw->mac.type == ixgbe_mac_X540)) { - hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); - IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */ - hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i)); - IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); /* to clear */ - } - } - - hwstats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); - /* work around hardware counting issue */ - hwstats->gprc -= missed_rx; - - ixgbe_update_xoff_received(adapter); - - /* 82598 hardware only has a 32 bit counter in the high register */ - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); - hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); - hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); - hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH); - break; - case ixgbe_mac_X540: - /* OS2BMC stats are X540 only*/ - hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC); - hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC); - hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC); - hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC); - case ixgbe_mac_82599EB: - for (i = 0; i < 16; i++) - adapter->hw_rx_no_dma_resources += - IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); - hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL); - IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */ - hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL); - IXGBE_READ_REG(hw, IXGBE_GOTCH); /* to clear */ - hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL); - IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */ - hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); -#ifdef HAVE_TX_MQ - hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); - hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS); -#endif /* HAVE_TX_MQ */ -#ifdef IXGBE_FCOE - hwstats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); - hwstats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); - hwstats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); - hwstats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); - hwstats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); - hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); - hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); - /* Add up per cpu counters for total ddp aloc fail */ - if (fcoe && fcoe->pcpu_noddp && fcoe->pcpu_noddp_ext_buff) { - for_each_possible_cpu(cpu) { - fcoe_noddp_counts_sum += - *per_cpu_ptr(fcoe->pcpu_noddp, cpu); - fcoe_noddp_ext_buff_counts_sum += - *per_cpu_ptr(fcoe-> - pcpu_noddp_ext_buff, cpu); - } - } - hwstats->fcoe_noddp = fcoe_noddp_counts_sum; - hwstats->fcoe_noddp_ext_buff = fcoe_noddp_ext_buff_counts_sum; - -#endif /* IXGBE_FCOE */ - break; - default: - break; - } - bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); - hwstats->bprc += bprc; - hwstats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); - if (hw->mac.type == ixgbe_mac_82598EB) - hwstats->mprc -= bprc; - hwstats->roc += IXGBE_READ_REG(hw, IXGBE_ROC); - hwstats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); - hwstats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); - hwstats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); - hwstats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); - hwstats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); - hwstats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); - hwstats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); - lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); - hwstats->lxontxc += lxon; - lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); - hwstats->lxofftxc += lxoff; - hwstats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); - hwstats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); - /* - * 82598 errata - tx of flow control packets is included in tx counters - */ - xon_off_tot = lxon + lxoff; - hwstats->gptc -= xon_off_tot; - hwstats->mptc -= xon_off_tot; - hwstats->gotc -= (xon_off_tot * (ETH_ZLEN + ETH_FCS_LEN)); - hwstats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC); - hwstats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC); - hwstats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC); - hwstats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR); - hwstats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); - hwstats->ptc64 -= xon_off_tot; - hwstats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); - hwstats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); - hwstats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); - hwstats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); - hwstats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); - hwstats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); - /* Fill out the OS statistics structure */ - net_stats->multicast = hwstats->mprc; - - /* Rx Errors */ - net_stats->rx_errors = hwstats->crcerrs + - hwstats->rlec; - net_stats->rx_dropped = 0; - net_stats->rx_length_errors = hwstats->rlec; - net_stats->rx_crc_errors = hwstats->crcerrs; - net_stats->rx_missed_errors = total_mpc; - - /* - * VF Stats Collection - skip while resetting because these - * are not clear on read and otherwise you'll sometimes get - * crazy values. - */ - if (!test_bit(__IXGBE_RESETTING, &adapter->state)) { - for (i = 0; i < adapter->num_vfs; i++) { - UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i), \ - adapter->vfinfo[i].last_vfstats.gprc, \ - adapter->vfinfo[i].vfstats.gprc); - UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i), \ - adapter->vfinfo[i].last_vfstats.gptc, \ - adapter->vfinfo[i].vfstats.gptc); - UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i), \ - IXGBE_PVFGORC_MSB(i), \ - adapter->vfinfo[i].last_vfstats.gorc, \ - adapter->vfinfo[i].vfstats.gorc); - UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i), \ - IXGBE_PVFGOTC_MSB(i), \ - adapter->vfinfo[i].last_vfstats.gotc, \ - adapter->vfinfo[i].vfstats.gotc); - UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i), \ - adapter->vfinfo[i].last_vfstats.mprc, \ - adapter->vfinfo[i].vfstats.mprc); - } - } -} - - -#ifdef NO_VNIC - -/** - * ixgbe_watchdog_update_link - update the link status - * @adapter - pointer to the device adapter structure - * @link_speed - pointer to a u32 to store the link_speed - **/ -static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 link_speed = adapter->link_speed; - bool link_up = adapter->link_up; - bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; - - if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)) - return; - - if (hw->mac.ops.check_link) { - hw->mac.ops.check_link(hw, &link_speed, &link_up, false); - } else { - /* always assume link is up, if no check link function */ - link_speed = IXGBE_LINK_SPEED_10GB_FULL; - link_up = true; - } - -#ifdef HAVE_DCBNL_IEEE - if (adapter->ixgbe_ieee_pfc) - pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); - -#endif - if (link_up && !((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && pfc_en)) { - hw->mac.ops.fc_enable(hw); - //ixgbe_set_rx_drop_en(adapter); - } - - if (link_up || - time_after(jiffies, (adapter->link_check_timeout + - IXGBE_TRY_LINK_TIMEOUT))) { - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; - IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMC_LSC); - IXGBE_WRITE_FLUSH(hw); - } - - adapter->link_up = link_up; - adapter->link_speed = link_speed; -} -#endif - - - -#ifdef NO_VNIC - -/** - * ixgbe_service_task - manages and runs subtasks - * @work: pointer to work_struct containing our data - **/ -static void ixgbe_service_task(struct work_struct *work) -{ - //struct ixgbe_adapter *adapter = container_of(work, - // struct ixgbe_adapter, - // service_task); - - //ixgbe_reset_subtask(adapter); - //ixgbe_sfp_detection_subtask(adapter); - //ixgbe_sfp_link_config_subtask(adapter); - //ixgbe_check_overtemp_subtask(adapter); - //ixgbe_watchdog_subtask(adapter); -#ifdef HAVE_TX_MQ - //ixgbe_fdir_reinit_subtask(adapter); -#endif - //ixgbe_check_hang_subtask(adapter); - - //ixgbe_service_event_complete(adapter); -} - - - - -#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ - IXGBE_TXD_CMD_RS) - - -/** - * ixgbe_set_mac - Change the Ethernet Address of the NIC - * @netdev: network interface device structure - * @p: pointer to an address structure - * - * Returns 0 on success, negative on failure - **/ -static int ixgbe_set_mac(struct net_device *netdev, void *p) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - struct sockaddr *addr = p; - int ret; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - ixgbe_del_mac_filter(adapter, hw->mac.addr, - adapter->num_vfs); - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - - - /* set the correct pool for the new PF MAC address in entry 0 */ - ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, - adapter->num_vfs); - return ret > 0 ? 0 : ret; -} - - -/** - * ixgbe_ioctl - - * @netdev: - * @ifreq: - * @cmd: - **/ -static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { -#ifdef ETHTOOL_OPS_COMPAT - case SIOCETHTOOL: - return ethtool_ioctl(ifr); -#endif - default: - return -EOPNOTSUPP; - } -} -#endif /* NO_VNIC */ - - -void ixgbe_do_reset(struct net_device *netdev) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - - if (netif_running(netdev)) - ixgbe_reinit_locked(adapter); - else - ixgbe_reset(adapter); -} - - - - - - -/** - * ixgbe_probe - Device Initialization Routine - * @pdev: PCI device information struct - * @ent: entry in ixgbe_pci_tbl - * - * Returns 0 on success, negative on failure - * - * ixgbe_probe initializes an adapter identified by a pci_dev structure. - * The OS initialization, configuring of the adapter private structure, - * and a hardware reset occur. - **/ -//static -int ixgbe_kni_probe(struct pci_dev *pdev, - struct net_device **lad_dev) -{ - size_t count; - struct net_device *netdev; - struct ixgbe_adapter *adapter = NULL; - struct ixgbe_hw *hw = NULL; - static int cards_found; - int i, err; - u16 offset; - u16 eeprom_verh, eeprom_verl, eeprom_cfg_blkh, eeprom_cfg_blkl; - u32 etrack_id; - u16 build, major, patch; - char *info_string, *i_s_var; - u8 part_str[IXGBE_PBANUM_LENGTH]; - enum ixgbe_mac_type mac_type = ixgbe_mac_unknown; -#ifdef HAVE_TX_MQ - unsigned int indices = num_possible_cpus(); -#endif /* HAVE_TX_MQ */ -#ifdef IXGBE_FCOE - u16 device_caps; -#endif - u16 wol_cap; - - err = pci_enable_device_mem(pdev); - if (err) - return err; - - -#ifdef NO_VNIC - err = pci_request_selected_regions(pdev, pci_select_bars(pdev, - IORESOURCE_MEM), ixgbe_driver_name); - if (err) { - dev_err(pci_dev_to_dev(pdev), - "pci_request_selected_regions failed 0x%x\n", err); - goto err_pci_reg; - } -#endif - - /* - * The mac_type is needed before we have the adapter is set up - * so rather than maintain two devID -> MAC tables we dummy up - * an ixgbe_hw stuct and use ixgbe_set_mac_type. - */ - hw = vmalloc(sizeof(struct ixgbe_hw)); - if (!hw) { - pr_info("Unable to allocate memory for early mac " - "check\n"); - } else { - hw->vendor_id = pdev->vendor; - hw->device_id = pdev->device; - ixgbe_set_mac_type(hw); - mac_type = hw->mac.type; - vfree(hw); - } - -#ifdef NO_VNIC - /* - * Workaround of Silicon errata on 82598. Disable LOs in the PCI switch - * port to which the 82598 is connected to prevent duplicate - * completions caused by LOs. We need the mac type so that we only - * do this on 82598 devices, ixgbe_set_mac_type does this for us if - * we set it's device ID. - */ - if (mac_type == ixgbe_mac_82598EB) - pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); - - pci_enable_pcie_error_reporting(pdev); - - pci_set_master(pdev); -#endif - -#ifdef HAVE_TX_MQ -#ifdef CONFIG_DCB -#ifdef HAVE_MQPRIO - indices *= IXGBE_DCB_MAX_TRAFFIC_CLASS; -#else - indices = max_t(unsigned int, indices, IXGBE_MAX_DCB_INDICES); -#endif /* HAVE_MQPRIO */ -#endif /* CONFIG_DCB */ - - if (mac_type == ixgbe_mac_82598EB) - indices = min_t(unsigned int, indices, IXGBE_MAX_RSS_INDICES); - else - indices = min_t(unsigned int, indices, IXGBE_MAX_FDIR_INDICES); - -#ifdef IXGBE_FCOE - indices += min_t(unsigned int, num_possible_cpus(), - IXGBE_MAX_FCOE_INDICES); -#endif - netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices); -#else /* HAVE_TX_MQ */ - netdev = alloc_etherdev(sizeof(struct ixgbe_adapter)); -#endif /* HAVE_TX_MQ */ - if (!netdev) { - err = -ENOMEM; - goto err_alloc_etherdev; - } - - SET_NETDEV_DEV(netdev, &pdev->dev); - - adapter = netdev_priv(netdev); - //pci_set_drvdata(pdev, adapter); - - adapter->netdev = netdev; - adapter->pdev = pdev; - hw = &adapter->hw; - hw->back = adapter; - adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1; - -#ifdef HAVE_PCI_ERS - /* - * call save state here in standalone driver because it relies on - * adapter struct to exist, and needs to call netdev_priv - */ - pci_save_state(pdev); - -#endif - hw->hw_addr = ioremap(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); - if (!hw->hw_addr) { - err = -EIO; - goto err_ioremap; - } - //ixgbe_assign_netdev_ops(netdev); - ixgbe_set_ethtool_ops(netdev); - - strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name)); - - adapter->bd_number = cards_found; - - /* setup the private structure */ - err = ixgbe_sw_init(adapter); - if (err) - goto err_sw_init; - - /* Make it possible the adapter to be woken up via WOL */ - switch (adapter->hw.mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0); - break; - default: - break; - } - - /* - * check_options must be called before setup_link to set up - * hw->fc completely - */ - //ixgbe_check_options(adapter); - -#ifndef NO_VNIC - /* reset_hw fills in the perm_addr as well */ - hw->phy.reset_if_overtemp = true; - err = hw->mac.ops.reset_hw(hw); - hw->phy.reset_if_overtemp = false; - if (err == IXGBE_ERR_SFP_NOT_PRESENT && - hw->mac.type == ixgbe_mac_82598EB) { - err = 0; - } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - e_dev_err("failed to load because an unsupported SFP+ " - "module type was detected.\n"); - e_dev_err("Reload the driver after installing a supported " - "module.\n"); - goto err_sw_init; - } else if (err) { - e_dev_err("HW Init failed: %d\n", err); - goto err_sw_init; - } -#endif - - //if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - // ixgbe_probe_vf(adapter); - - -#ifdef MAX_SKB_FRAGS - netdev->features |= NETIF_F_SG | - NETIF_F_IP_CSUM; - -#ifdef NETIF_F_IPV6_CSUM - netdev->features |= NETIF_F_IPV6_CSUM; -#endif - -#ifdef NETIF_F_HW_VLAN_TX - netdev->features |= NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX; -#endif -#ifdef NETIF_F_TSO - netdev->features |= NETIF_F_TSO; -#endif /* NETIF_F_TSO */ -#ifdef NETIF_F_TSO6 - netdev->features |= NETIF_F_TSO6; -#endif /* NETIF_F_TSO6 */ -#ifdef NETIF_F_RXHASH - netdev->features |= NETIF_F_RXHASH; -#endif /* NETIF_F_RXHASH */ - -#ifdef HAVE_NDO_SET_FEATURES - netdev->features |= NETIF_F_RXCSUM; - - /* copy netdev features into list of user selectable features */ - netdev->hw_features |= netdev->features; - - /* give us the option of enabling RSC/LRO later */ -#ifdef IXGBE_NO_LRO - if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) -#endif - netdev->hw_features |= NETIF_F_LRO; - -#else -#ifdef NETIF_F_GRO - - /* this is only needed on kernels prior to 2.6.39 */ - netdev->features |= NETIF_F_GRO; -#endif /* NETIF_F_GRO */ -#endif - -#ifdef NETIF_F_HW_VLAN_TX - /* set this bit last since it cannot be part of hw_features */ - netdev->features |= NETIF_F_HW_VLAN_FILTER; -#endif - switch (adapter->hw.mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - netdev->features |= NETIF_F_SCTP_CSUM; -#ifdef HAVE_NDO_SET_FEATURES - netdev->hw_features |= NETIF_F_SCTP_CSUM | - NETIF_F_NTUPLE; -#endif - break; - default: - break; - } - -#ifdef HAVE_NETDEV_VLAN_FEATURES - netdev->vlan_features |= NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6; - -#endif /* HAVE_NETDEV_VLAN_FEATURES */ - /* - * If perfect filters were enabled in check_options(), enable them - * on the netdevice too. - */ - if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) - netdev->features |= NETIF_F_NTUPLE; - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) - adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) - adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - /* clear n-tuple support in the netdev unconditionally */ - netdev->features &= ~NETIF_F_NTUPLE; - } - -#ifdef NETIF_F_RXHASH - if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) - netdev->features &= ~NETIF_F_RXHASH; - -#endif /* NETIF_F_RXHASH */ - if (netdev->features & NETIF_F_LRO) { - if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) && - ((adapter->rx_itr_setting == 1) || - (adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR))) { - adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; - } else if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) { -#ifdef IXGBE_NO_LRO - e_info(probe, "InterruptThrottleRate set too high, " - "disabling RSC\n"); -#else - e_info(probe, "InterruptThrottleRate set too high, " - "falling back to software LRO\n"); -#endif - } - } -#ifdef CONFIG_DCB - //netdev->dcbnl_ops = &dcbnl_ops; -#endif - -#ifdef IXGBE_FCOE -#ifdef NETIF_F_FSO - if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) { - ixgbe_get_device_caps(hw, &device_caps); - if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS) { - adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; - adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE; - e_info(probe, "FCoE offload feature is not available. " - "Disabling FCoE offload feature\n"); - } -#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE - else { - adapter->flags |= IXGBE_FLAG_FCOE_ENABLED; - adapter->ring_feature[RING_F_FCOE].indices = - IXGBE_FCRETA_SIZE; - netdev->features |= NETIF_F_FSO | - NETIF_F_FCOE_CRC | - NETIF_F_FCOE_MTU; - netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1; - } -#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */ -#ifdef HAVE_NETDEV_VLAN_FEATURES - netdev->vlan_features |= NETIF_F_FSO | - NETIF_F_FCOE_CRC | - NETIF_F_FCOE_MTU; -#endif /* HAVE_NETDEV_VLAN_FEATURES */ - } -#endif /* NETIF_F_FSO */ -#endif /* IXGBE_FCOE */ - -#endif /* MAX_SKB_FRAGS */ - /* make sure the EEPROM is good */ - if (hw->eeprom.ops.validate_checksum && - (hw->eeprom.ops.validate_checksum(hw, NULL) < 0)) { - e_dev_err("The EEPROM Checksum Is Not Valid\n"); - err = -EIO; - goto err_sw_init; - } - - memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len); -#ifdef ETHTOOL_GPERMADDR - memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len); - - if (ixgbe_validate_mac_addr(netdev->perm_addr)) { - e_dev_err("invalid MAC address\n"); - err = -EIO; - goto err_sw_init; - } -#else - if (ixgbe_validate_mac_addr(netdev->dev_addr)) { - e_dev_err("invalid MAC address\n"); - err = -EIO; - goto err_sw_init; - } -#endif - memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr, - netdev->addr_len); - adapter->mac_table[0].queue = adapter->num_vfs; - adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | - IXGBE_MAC_STATE_IN_USE); - hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, - adapter->mac_table[0].queue, - IXGBE_RAH_AV); - - //setup_timer(&adapter->service_timer, &ixgbe_service_timer, - // (unsigned long) adapter); - - //INIT_WORK(&adapter->service_task, ixgbe_service_task); - //clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state); - - //err = ixgbe_init_interrupt_scheme(adapter); - //if (err) - // goto err_sw_init; - - //adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; - ixgbe_set_num_queues(adapter); - - adapter->wol = 0; - /* WOL not supported for all but the following */ - switch (pdev->device) { - case IXGBE_DEV_ID_82599_SFP: - /* Only these subdevice supports WOL */ - switch (pdev->subsystem_device) { - case IXGBE_SUBDEV_ID_82599_560FLR: - /* only support first port */ - if (hw->bus.func != 0) - break; - case IXGBE_SUBDEV_ID_82599_SFP: - adapter->wol = IXGBE_WUFC_MAG; - break; - } - break; - case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: - /* All except this subdevice support WOL */ - if (pdev->subsystem_device != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) - adapter->wol = IXGBE_WUFC_MAG; - break; - case IXGBE_DEV_ID_82599_KX4: - adapter->wol = IXGBE_WUFC_MAG; - break; - case IXGBE_DEV_ID_X540T: - /* Check eeprom to see if it is enabled */ - ixgbe_read_eeprom(hw, 0x2c, &adapter->eeprom_cap); - wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK; - - if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) || - ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) && - (hw->bus.func == 0))) - adapter->wol = IXGBE_WUFC_MAG; - break; - } - //device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); - - - /* - * Save off EEPROM version number and Option Rom version which - * together make a unique identify for the eeprom - */ - ixgbe_read_eeprom(hw, 0x2e, &eeprom_verh); - ixgbe_read_eeprom(hw, 0x2d, &eeprom_verl); - - etrack_id = (eeprom_verh << 16) | eeprom_verl; - - ixgbe_read_eeprom(hw, 0x17, &offset); - - /* Make sure offset to SCSI block is valid */ - if (!(offset == 0x0) && !(offset == 0xffff)) { - ixgbe_read_eeprom(hw, offset + 0x84, &eeprom_cfg_blkh); - ixgbe_read_eeprom(hw, offset + 0x83, &eeprom_cfg_blkl); - - /* Only display Option Rom if exist */ - if (eeprom_cfg_blkl && eeprom_cfg_blkh) { - major = eeprom_cfg_blkl >> 8; - build = (eeprom_cfg_blkl << 8) | (eeprom_cfg_blkh >> 8); - patch = eeprom_cfg_blkh & 0x00ff; - - snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), - "0x%08x, %d.%d.%d", etrack_id, major, build, - patch); - } else { - snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), - "0x%08x", etrack_id); - } - } else { - snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), - "0x%08x", etrack_id); - } - - /* reset the hardware with the new settings */ - err = hw->mac.ops.start_hw(hw); - if (err == IXGBE_ERR_EEPROM_VERSION) { - /* We are running on a pre-production device, log a warning */ - e_dev_warn("This device is a pre-production adapter/LOM. " - "Please be aware there may be issues associated " - "with your hardware. If you are experiencing " - "problems please contact your Intel or hardware " - "representative who provided you with this " - "hardware.\n"); - } - /* pick up the PCI bus settings for reporting later */ - if (hw->mac.ops.get_bus_info) - hw->mac.ops.get_bus_info(hw); - - strlcpy(netdev->name, "eth%d", sizeof(netdev->name)); - *lad_dev = netdev; - - adapter->netdev_registered = true; -#ifdef NO_VNIC - /* power down the optics */ - if ((hw->phy.multispeed_fiber) || - ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && - (hw->mac.type == ixgbe_mac_82599EB))) - ixgbe_disable_tx_laser(hw); - - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - /* keep stopping all the transmit queues for older kernels */ - netif_tx_stop_all_queues(netdev); -#endif - - /* print all messages at the end so that we use our eth%d name */ - /* print bus type/speed/width info */ - e_dev_info("(PCI Express:%s:%s) ", - (hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" : - hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : - "Unknown"), - (hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" : - hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" : - hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : - "Unknown")); - - /* print the MAC address */ - for (i = 0; i < 6; i++) - pr_cont("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); - - /* First try to read PBA as a string */ - err = ixgbe_read_pba_string(hw, part_str, IXGBE_PBANUM_LENGTH); - if (err) - strlcpy(part_str, "Unknown", sizeof(part_str)); - if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present) - e_info(probe, "MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n", - hw->mac.type, hw->phy.type, hw->phy.sfp_type, part_str); - else - e_info(probe, "MAC: %d, PHY: %d, PBA No: %s\n", - hw->mac.type, hw->phy.type, part_str); - - if (((hw->bus.speed == ixgbe_bus_speed_2500) && - (hw->bus.width <= ixgbe_bus_width_pcie_x4)) || - (hw->bus.width <= ixgbe_bus_width_pcie_x2)) { - e_dev_warn("PCI-Express bandwidth available for this " - "card is not sufficient for optimal " - "performance.\n"); - e_dev_warn("For optimal performance a x8 PCI-Express " - "slot is required.\n"); - } - -#define INFO_STRING_LEN 255 - info_string = kzalloc(INFO_STRING_LEN, GFP_KERNEL); - if (!info_string) { - e_err(probe, "allocation for info string failed\n"); - goto no_info_string; - } - count = 0; - i_s_var = info_string; - count += snprintf(i_s_var, INFO_STRING_LEN, "Enabled Features: "); - - i_s_var = info_string + count; - count += snprintf(i_s_var, (INFO_STRING_LEN - count), - "RxQ: %d TxQ: %d ", adapter->num_rx_queues, - adapter->num_tx_queues); - i_s_var = info_string + count; -#ifdef IXGBE_FCOE - if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { - count += snprintf(i_s_var, INFO_STRING_LEN - count, "FCoE "); - i_s_var = info_string + count; - } -#endif - if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { - count += snprintf(i_s_var, INFO_STRING_LEN - count, - "FdirHash "); - i_s_var = info_string + count; - } - if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { - count += snprintf(i_s_var, INFO_STRING_LEN - count, - "FdirPerfect "); - i_s_var = info_string + count; - } - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { - count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCB "); - i_s_var = info_string + count; - } - if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { - count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSS "); - i_s_var = info_string + count; - } - if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { - count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCA "); - i_s_var = info_string + count; - } - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { - count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSC "); - i_s_var = info_string + count; - } -#ifndef IXGBE_NO_LRO - else if (netdev->features & NETIF_F_LRO) { - count += snprintf(i_s_var, INFO_STRING_LEN - count, "LRO "); - i_s_var = info_string + count; - } -#endif - - BUG_ON(i_s_var > (info_string + INFO_STRING_LEN)); - /* end features printing */ - e_info(probe, "%s\n", info_string); - kfree(info_string); -no_info_string: - - /* firmware requires blank driver version */ - ixgbe_set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF); - -#if defined(HAVE_NETDEV_STORAGE_ADDRESS) && defined(NETDEV_HW_ADDR_T_SAN) - /* add san mac addr to netdev */ - //ixgbe_add_sanmac_netdev(netdev); - -#endif /* (HAVE_NETDEV_STORAGE_ADDRESS) && (NETDEV_HW_ADDR_T_SAN) */ - e_info(probe, "Intel(R) 10 Gigabit Network Connection\n"); - cards_found++; - -#ifdef IXGBE_SYSFS - //if (ixgbe_sysfs_init(adapter)) - // e_err(probe, "failed to allocate sysfs resources\n"); -#else -#ifdef IXGBE_PROCFS - //if (ixgbe_procfs_init(adapter)) - // e_err(probe, "failed to allocate procfs resources\n"); -#endif /* IXGBE_PROCFS */ -#endif /* IXGBE_SYSFS */ - - return 0; - -//err_register: - //ixgbe_clear_interrupt_scheme(adapter); - //ixgbe_release_hw_control(adapter); -err_sw_init: - adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP; - if (adapter->mac_table) - kfree(adapter->mac_table); - iounmap(hw->hw_addr); -err_ioremap: - free_netdev(netdev); -err_alloc_etherdev: - //pci_release_selected_regions(pdev, - // pci_select_bars(pdev, IORESOURCE_MEM)); -//err_pci_reg: -//err_dma: - pci_disable_device(pdev); - return err; -} - -/** - * ixgbe_remove - Device Removal Routine - * @pdev: PCI device information struct - * - * ixgbe_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. The could be caused by a - * Hot-Plug event, or because the driver is going to be removed from - * memory. - **/ -void ixgbe_kni_remove(struct pci_dev *pdev) -{ - pci_disable_device(pdev); -} - - -u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg) -{ - u16 value; - struct ixgbe_adapter *adapter = hw->back; - - pci_read_config_word(adapter->pdev, reg, &value); - return value; -} - -void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value) -{ - struct ixgbe_adapter *adapter = hw->back; - - pci_write_config_word(adapter->pdev, reg, value); -} - -void ewarn(struct ixgbe_hw *hw, const char *st, u32 status) -{ - struct ixgbe_adapter *adapter = hw->back; - - netif_warn(adapter, drv, adapter->netdev, "%s", st); -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h deleted file mode 100644 index 53ace941..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h +++ /dev/null @@ -1,90 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_MBX_H_ -#define _IXGBE_MBX_H_ - -#include "ixgbe_type.h" - -#define IXGBE_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ -#define IXGBE_ERR_MBX -100 - -#define IXGBE_VFMAILBOX 0x002FC -#define IXGBE_VFMBMEM 0x00200 - -/* Define mailbox register bits */ -#define IXGBE_VFMAILBOX_REQ 0x00000001 /* Request for PF Ready bit */ -#define IXGBE_VFMAILBOX_ACK 0x00000002 /* Ack PF message received */ -#define IXGBE_VFMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ -#define IXGBE_VFMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ -#define IXGBE_VFMAILBOX_PFSTS 0x00000010 /* PF wrote a message in the MB */ -#define IXGBE_VFMAILBOX_PFACK 0x00000020 /* PF ack the previous VF msg */ -#define IXGBE_VFMAILBOX_RSTI 0x00000040 /* PF has reset indication */ -#define IXGBE_VFMAILBOX_RSTD 0x00000080 /* PF has indicated reset done */ -#define IXGBE_VFMAILBOX_R2C_BITS 0x000000B0 /* All read to clear bits */ - -#define IXGBE_PFMAILBOX_STS 0x00000001 /* Initiate message send to VF */ -#define IXGBE_PFMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ -#define IXGBE_PFMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ -#define IXGBE_PFMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ -#define IXGBE_PFMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ - -#define IXGBE_MBVFICR_VFREQ_MASK 0x0000FFFF /* bits for VF messages */ -#define IXGBE_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */ -#define IXGBE_MBVFICR_VFACK_MASK 0xFFFF0000 /* bits for VF acks */ -#define IXGBE_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */ - - -/* If it's a IXGBE_VF_* msg then it originates in the VF and is sent to the - * PF. The reverse is true if it is IXGBE_PF_*. - * Message ACK's are the value or'd with 0xF0000000 - */ -#define IXGBE_VT_MSGTYPE_ACK 0x80000000 /* Messages below or'd with - * this are the ACK */ -#define IXGBE_VT_MSGTYPE_NACK 0x40000000 /* Messages below or'd with - * this are the NACK */ -#define IXGBE_VT_MSGTYPE_CTS 0x20000000 /* Indicates that VF is still - * clear to send requests */ -#define IXGBE_VT_MSGINFO_SHIFT 16 -/* bits 23:16 are used for extra info for certain messages */ -#define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT) - -#define IXGBE_VF_RESET 0x01 /* VF requests reset */ -#define IXGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */ -#define IXGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */ -#define IXGBE_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */ -#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ -#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ - -/* length of permanent address message returned from PF */ -#define IXGBE_VF_PERMADDR_MSG_LEN 4 -/* word in permanent address message with the current multicast type */ -#define IXGBE_VF_MC_TYPE_WORD 3 - -#define IXGBE_PF_CONTROL_MSG 0x0100 /* PF control message */ - - -#define IXGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */ -#define IXGBE_VF_MBX_INIT_DELAY 500 /* microseconds between retries */ - -s32 ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16); -s32 ixgbe_write_mbx(struct ixgbe_hw *, u32 *, u16, u16); -s32 ixgbe_read_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16); -s32 ixgbe_write_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16); -s32 ixgbe_check_for_msg(struct ixgbe_hw *, u16); -s32 ixgbe_check_for_ack(struct ixgbe_hw *, u16); -s32 ixgbe_check_for_rst(struct ixgbe_hw *, u16); -void ixgbe_init_mbx_ops_generic(struct ixgbe_hw *hw); -void ixgbe_init_mbx_params_vf(struct ixgbe_hw *); -void ixgbe_init_mbx_params_pf(struct ixgbe_hw *); - -#endif /* _IXGBE_MBX_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h deleted file mode 100644 index 7b3f8c51..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h +++ /dev/null @@ -1,117 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - - -/* glue for the OS independent part of ixgbe - * includes register access macros - */ - -#ifndef _IXGBE_OSDEP_H_ -#define _IXGBE_OSDEP_H_ - -#include -#include -#include -#include -#include -#include "kcompat.h" - - -#ifndef msleep -#define msleep(x) do { if (in_interrupt()) { \ - /* Don't mdelay in interrupt context! */ \ - BUG(); \ - } else { \ - msleep(x); \ - } } while (0) - -#endif - -#undef ASSERT - -#ifdef DBG -#define hw_dbg(hw, S, A...) printk(KERN_DEBUG S, ## A) -#else -#define hw_dbg(hw, S, A...) do {} while (0) -#endif - -#define e_dev_info(format, arg...) \ - dev_info(pci_dev_to_dev(adapter->pdev), format, ## arg) -#define e_dev_warn(format, arg...) \ - dev_warn(pci_dev_to_dev(adapter->pdev), format, ## arg) -#define e_dev_err(format, arg...) \ - dev_err(pci_dev_to_dev(adapter->pdev), format, ## arg) -#define e_dev_notice(format, arg...) \ - dev_notice(pci_dev_to_dev(adapter->pdev), format, ## arg) -#define e_info(msglvl, format, arg...) \ - netif_info(adapter, msglvl, adapter->netdev, format, ## arg) -#define e_err(msglvl, format, arg...) \ - netif_err(adapter, msglvl, adapter->netdev, format, ## arg) -#define e_warn(msglvl, format, arg...) \ - netif_warn(adapter, msglvl, adapter->netdev, format, ## arg) -#define e_crit(msglvl, format, arg...) \ - netif_crit(adapter, msglvl, adapter->netdev, format, ## arg) - - -#ifdef DBG -#define IXGBE_WRITE_REG(a, reg, value) do {\ - switch (reg) { \ - case IXGBE_EIMS: \ - case IXGBE_EIMC: \ - case IXGBE_EIAM: \ - case IXGBE_EIAC: \ - case IXGBE_EICR: \ - case IXGBE_EICS: \ - printk("%s: Reg - 0x%05X, value - 0x%08X\n", __func__, \ - reg, (u32)(value)); \ - default: \ - break; \ - } \ - writel((value), ((a)->hw_addr + (reg))); \ -} while (0) -#else -#define IXGBE_WRITE_REG(a, reg, value) writel((value), ((a)->hw_addr + (reg))) -#endif - -#define IXGBE_READ_REG(a, reg) readl((a)->hw_addr + (reg)) - -#define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) ( \ - writel((value), ((a)->hw_addr + (reg) + ((offset) << 2)))) - -#define IXGBE_READ_REG_ARRAY(a, reg, offset) ( \ - readl((a)->hw_addr + (reg) + ((offset) << 2))) - -#ifndef writeq -#define writeq(val, addr) do { writel((u32) (val), addr); \ - writel((u32) (val >> 32), (addr + 4)); \ - } while (0); -#endif - -#define IXGBE_WRITE_REG64(a, reg, value) writeq((value), ((a)->hw_addr + (reg))) - -#define IXGBE_WRITE_FLUSH(a) IXGBE_READ_REG(a, IXGBE_STATUS) -struct ixgbe_hw; -extern u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg); -extern void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value); -extern void ewarn(struct ixgbe_hw *hw, const char *str, u32 status); - -#define IXGBE_READ_PCIE_WORD ixgbe_read_pci_cfg_word -#define IXGBE_WRITE_PCIE_WORD ixgbe_write_pci_cfg_word -#define IXGBE_EEPROM_GRANT_ATTEMPS 100 -#define IXGBE_HTONL(_i) htonl(_i) -#define IXGBE_NTOHL(_i) ntohl(_i) -#define IXGBE_NTOHS(_i) ntohs(_i) -#define IXGBE_CPU_TO_LE32(_i) cpu_to_le32(_i) -#define IXGBE_LE32_TO_CPUS(_i) le32_to_cpus(_i) -#define EWARN(H, W, S) ewarn(H, W, S) - -#endif /* _IXGBE_OSDEP_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c deleted file mode 100644 index a47a2ff8..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c +++ /dev/null @@ -1,1832 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "ixgbe_api.h" -#include "ixgbe_common.h" -#include "ixgbe_phy.h" - -static void ixgbe_i2c_start(struct ixgbe_hw *hw); -static void ixgbe_i2c_stop(struct ixgbe_hw *hw); -static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data); -static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data); -static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw); -static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data); -static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data); -static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl); -static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl); -static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data); -static bool ixgbe_get_i2c_data(u32 *i2cctl); - -/** - * ixgbe_init_phy_ops_generic - Inits PHY function ptrs - * @hw: pointer to the hardware structure - * - * Initialize the function pointers. - **/ -s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw) -{ - struct ixgbe_phy_info *phy = &hw->phy; - - /* PHY */ - phy->ops.identify = &ixgbe_identify_phy_generic; - phy->ops.reset = &ixgbe_reset_phy_generic; - phy->ops.read_reg = &ixgbe_read_phy_reg_generic; - phy->ops.write_reg = &ixgbe_write_phy_reg_generic; - phy->ops.setup_link = &ixgbe_setup_phy_link_generic; - phy->ops.setup_link_speed = &ixgbe_setup_phy_link_speed_generic; - phy->ops.check_link = NULL; - phy->ops.get_firmware_version = ixgbe_get_phy_firmware_version_generic; - phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_generic; - phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_generic; - phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_generic; - phy->ops.write_i2c_eeprom = &ixgbe_write_i2c_eeprom_generic; - phy->ops.i2c_bus_clear = &ixgbe_i2c_bus_clear; - phy->ops.identify_sfp = &ixgbe_identify_module_generic; - phy->sfp_type = ixgbe_sfp_type_unknown; - phy->ops.check_overtemp = &ixgbe_tn_check_overtemp; - return 0; -} - -/** - * ixgbe_identify_phy_generic - Get physical layer module - * @hw: pointer to hardware structure - * - * Determines the physical layer module found on the current adapter. - **/ -s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) -{ - s32 status = IXGBE_ERR_PHY_ADDR_INVALID; - u32 phy_addr; - u16 ext_ability = 0; - - if (hw->phy.type == ixgbe_phy_unknown) { - for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { - if (ixgbe_validate_phy_addr(hw, phy_addr)) { - hw->phy.addr = phy_addr; - ixgbe_get_phy_id(hw); - hw->phy.type = - ixgbe_get_phy_type_from_id(hw->phy.id); - - if (hw->phy.type == ixgbe_phy_unknown) { - hw->phy.ops.read_reg(hw, - IXGBE_MDIO_PHY_EXT_ABILITY, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, - &ext_ability); - if (ext_ability & - (IXGBE_MDIO_PHY_10GBASET_ABILITY | - IXGBE_MDIO_PHY_1000BASET_ABILITY)) - hw->phy.type = - ixgbe_phy_cu_unknown; - else - hw->phy.type = - ixgbe_phy_generic; - } - - status = 0; - break; - } - } - /* clear value if nothing found */ - if (status != 0) - hw->phy.addr = 0; - } else { - status = 0; - } - - return status; -} - -/** - * ixgbe_validate_phy_addr - Determines phy address is valid - * @hw: pointer to hardware structure - * - **/ -bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr) -{ - u16 phy_id = 0; - bool valid = false; - - hw->phy.addr = phy_addr; - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_id); - - if (phy_id != 0xFFFF && phy_id != 0x0) - valid = true; - - return valid; -} - -/** - * ixgbe_get_phy_id - Get the phy type - * @hw: pointer to hardware structure - * - **/ -s32 ixgbe_get_phy_id(struct ixgbe_hw *hw) -{ - u32 status; - u16 phy_id_high = 0; - u16 phy_id_low = 0; - - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, - &phy_id_high); - - if (status == 0) { - hw->phy.id = (u32)(phy_id_high << 16); - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_LOW, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, - &phy_id_low); - hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK); - hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK); - } - return status; -} - -/** - * ixgbe_get_phy_type_from_id - Get the phy type - * @hw: pointer to hardware structure - * - **/ -enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) -{ - enum ixgbe_phy_type phy_type; - - switch (phy_id) { - case TN1010_PHY_ID: - phy_type = ixgbe_phy_tn; - break; - case X540_PHY_ID: - phy_type = ixgbe_phy_aq; - break; - case QT2022_PHY_ID: - phy_type = ixgbe_phy_qt; - break; - case ATH_PHY_ID: - phy_type = ixgbe_phy_nl; - break; - default: - phy_type = ixgbe_phy_unknown; - break; - } - - hw_dbg(hw, "phy type found is %d\n", phy_type); - return phy_type; -} - -/** - * ixgbe_reset_phy_generic - Performs a PHY reset - * @hw: pointer to hardware structure - **/ -s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) -{ - u32 i; - u16 ctrl = 0; - s32 status = 0; - - if (hw->phy.type == ixgbe_phy_unknown) - status = ixgbe_identify_phy_generic(hw); - - if (status != 0 || hw->phy.type == ixgbe_phy_none) - goto out; - - /* Don't reset PHY if it's shut down due to overtemp. */ - if (!hw->phy.reset_if_overtemp && - (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw))) - goto out; - - /* - * Perform soft PHY reset to the PHY_XS. - * This will cause a soft reset to the PHY - */ - hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, - IXGBE_MDIO_PHY_XS_DEV_TYPE, - IXGBE_MDIO_PHY_XS_RESET); - - /* - * Poll for reset bit to self-clear indicating reset is complete. - * Some PHYs could take up to 3 seconds to complete and need about - * 1.7 usec delay after the reset is complete. - */ - for (i = 0; i < 30; i++) { - msleep(100); - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, - IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl); - if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) { - udelay(2); - break; - } - } - - if (ctrl & IXGBE_MDIO_PHY_XS_RESET) { - status = IXGBE_ERR_RESET_FAILED; - hw_dbg(hw, "PHY reset polling failed to complete.\n"); - } - -out: - return status; -} - -/** - * ixgbe_read_phy_reg_generic - Reads a value from a specified PHY register - * @hw: pointer to hardware structure - * @reg_addr: 32 bit address of PHY register to read - * @phy_data: Pointer to read data from PHY register - **/ -s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 *phy_data) -{ - u32 command; - u32 i; - u32 data; - s32 status = 0; - u16 gssr; - - if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) - gssr = IXGBE_GSSR_PHY1_SM; - else - gssr = IXGBE_GSSR_PHY0_SM; - - if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0) - status = IXGBE_ERR_SWFW_SYNC; - - if (status == 0) { - /* Setup and write the address cycle command */ - command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | - (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | - (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | - (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND)); - - IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); - - /* - * Check every 10 usec to see if the address cycle completed. - * The MDI Command bit will clear when the operation is - * complete - */ - for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { - udelay(10); - - command = IXGBE_READ_REG(hw, IXGBE_MSCA); - - if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) - break; - } - - if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { - hw_dbg(hw, "PHY address command did not complete.\n"); - status = IXGBE_ERR_PHY; - } - - if (status == 0) { - /* - * Address cycle complete, setup and write the read - * command - */ - command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | - (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | - (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | - (IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND)); - - IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); - - /* - * Check every 10 usec to see if the address cycle - * completed. The MDI Command bit will clear when the - * operation is complete - */ - for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { - udelay(10); - - command = IXGBE_READ_REG(hw, IXGBE_MSCA); - - if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) - break; - } - - if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { - hw_dbg(hw, "PHY read command didn't complete\n"); - status = IXGBE_ERR_PHY; - } else { - /* - * Read operation is complete. Get the data - * from MSRWD - */ - data = IXGBE_READ_REG(hw, IXGBE_MSRWD); - data >>= IXGBE_MSRWD_READ_DATA_SHIFT; - *phy_data = (u16)(data); - } - } - - hw->mac.ops.release_swfw_sync(hw, gssr); - } - - return status; -} - -/** - * ixgbe_write_phy_reg_generic - Writes a value to specified PHY register - * @hw: pointer to hardware structure - * @reg_addr: 32 bit PHY register to write - * @device_type: 5 bit device type - * @phy_data: Data to write to the PHY register - **/ -s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 phy_data) -{ - u32 command; - u32 i; - s32 status = 0; - u16 gssr; - - if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) - gssr = IXGBE_GSSR_PHY1_SM; - else - gssr = IXGBE_GSSR_PHY0_SM; - - if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0) - status = IXGBE_ERR_SWFW_SYNC; - - if (status == 0) { - /* Put the data in the MDI single read and write data register*/ - IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data); - - /* Setup and write the address cycle command */ - command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | - (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | - (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | - (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND)); - - IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); - - /* - * Check every 10 usec to see if the address cycle completed. - * The MDI Command bit will clear when the operation is - * complete - */ - for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { - udelay(10); - - command = IXGBE_READ_REG(hw, IXGBE_MSCA); - - if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) - break; - } - - if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { - hw_dbg(hw, "PHY address cmd didn't complete\n"); - status = IXGBE_ERR_PHY; - } - - if (status == 0) { - /* - * Address cycle complete, setup and write the write - * command - */ - command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | - (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | - (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | - (IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND)); - - IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); - - /* - * Check every 10 usec to see if the address cycle - * completed. The MDI Command bit will clear when the - * operation is complete - */ - for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { - udelay(10); - - command = IXGBE_READ_REG(hw, IXGBE_MSCA); - - if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) - break; - } - - if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { - hw_dbg(hw, "PHY address cmd didn't complete\n"); - status = IXGBE_ERR_PHY; - } - } - - hw->mac.ops.release_swfw_sync(hw, gssr); - } - - return status; -} - -/** - * ixgbe_setup_phy_link_generic - Set and restart autoneg - * @hw: pointer to hardware structure - * - * Restart autonegotiation and PHY and waits for completion. - **/ -s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) -{ - s32 status = 0; - u32 time_out; - u32 max_time_out = 10; - u16 autoneg_reg = IXGBE_MII_AUTONEG_REG; - bool autoneg = false; - ixgbe_link_speed speed; - - ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); - - if (speed & IXGBE_LINK_SPEED_10GB_FULL) { - /* Set or unset auto-negotiation 10G advertisement */ - hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) - autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } - - if (speed & IXGBE_LINK_SPEED_1GB_FULL) { - /* Set or unset auto-negotiation 1G advertisement */ - hw->phy.ops.read_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) - autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } - - if (speed & IXGBE_LINK_SPEED_100_FULL) { - /* Set or unset auto-negotiation 100M advertisement */ - hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE | - IXGBE_MII_100BASE_T_ADVERTISE_HALF); - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) - autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } - - /* Restart PHY autonegotiation and wait for completion */ - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg); - - autoneg_reg |= IXGBE_MII_RESTART; - - hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg); - - /* Wait for autonegotiation to finish */ - for (time_out = 0; time_out < max_time_out; time_out++) { - udelay(10); - /* Restart PHY autonegotiation and wait for completion */ - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE; - if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE) - break; - } - - if (time_out == max_time_out) { - status = IXGBE_ERR_LINK_SETUP; - hw_dbg(hw, "ixgbe_setup_phy_link_generic: time out"); - } - - return status; -} - -/** - * ixgbe_setup_phy_link_speed_generic - Sets the auto advertised capabilities - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - **/ -s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete) -{ - - /* - * Clear autoneg_advertised and set new values based on input link - * speed. - */ - hw->phy.autoneg_advertised = 0; - - if (speed & IXGBE_LINK_SPEED_10GB_FULL) - hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; - - if (speed & IXGBE_LINK_SPEED_1GB_FULL) - hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; - - if (speed & IXGBE_LINK_SPEED_100_FULL) - hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL; - - /* Setup link based on the new speed settings */ - hw->phy.ops.setup_link(hw); - - return 0; -} - -/** - * ixgbe_get_copper_link_capabilities_generic - Determines link capabilities - * @hw: pointer to hardware structure - * @speed: pointer to link speed - * @autoneg: boolean auto-negotiation value - * - * Determines the link capabilities by reading the AUTOC register. - **/ -s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, - bool *autoneg) -{ - s32 status = IXGBE_ERR_LINK_SETUP; - u16 speed_ability; - - *speed = 0; - *autoneg = true; - - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_SPEED_ABILITY, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, - &speed_ability); - - if (status == 0) { - if (speed_ability & IXGBE_MDIO_PHY_SPEED_10G) - *speed |= IXGBE_LINK_SPEED_10GB_FULL; - if (speed_ability & IXGBE_MDIO_PHY_SPEED_1G) - *speed |= IXGBE_LINK_SPEED_1GB_FULL; - if (speed_ability & IXGBE_MDIO_PHY_SPEED_100M) - *speed |= IXGBE_LINK_SPEED_100_FULL; - } - - return status; -} - -/** - * ixgbe_check_phy_link_tnx - Determine link and speed status - * @hw: pointer to hardware structure - * - * Reads the VS1 register to determine if link is up and the current speed for - * the PHY. - **/ -s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed, - bool *link_up) -{ - s32 status = 0; - u32 time_out; - u32 max_time_out = 10; - u16 phy_link = 0; - u16 phy_speed = 0; - u16 phy_data = 0; - - /* Initialize speed and link to default case */ - *link_up = false; - *speed = IXGBE_LINK_SPEED_10GB_FULL; - - /* - * Check current speed and link status of the PHY register. - * This is a vendor specific register and may have to - * be changed for other copper PHYs. - */ - for (time_out = 0; time_out < max_time_out; time_out++) { - udelay(10); - status = hw->phy.ops.read_reg(hw, - IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, - &phy_data); - phy_link = phy_data & IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS; - phy_speed = phy_data & - IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS; - if (phy_link == IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS) { - *link_up = true; - if (phy_speed == - IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS) - *speed = IXGBE_LINK_SPEED_1GB_FULL; - break; - } - } - - return status; -} - -/** - * ixgbe_setup_phy_link_tnx - Set and restart autoneg - * @hw: pointer to hardware structure - * - * Restart autonegotiation and PHY and waits for completion. - **/ -s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw) -{ - s32 status = 0; - u32 time_out; - u32 max_time_out = 10; - u16 autoneg_reg = IXGBE_MII_AUTONEG_REG; - bool autoneg = false; - ixgbe_link_speed speed; - - ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); - - if (speed & IXGBE_LINK_SPEED_10GB_FULL) { - /* Set or unset auto-negotiation 10G advertisement */ - hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) - autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } - - if (speed & IXGBE_LINK_SPEED_1GB_FULL) { - /* Set or unset auto-negotiation 1G advertisement */ - hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) - autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX; - - hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } - - if (speed & IXGBE_LINK_SPEED_100_FULL) { - /* Set or unset auto-negotiation 100M advertisement */ - hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~IXGBE_MII_100BASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) - autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } - - /* Restart PHY autonegotiation and wait for completion */ - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg); - - autoneg_reg |= IXGBE_MII_RESTART; - - hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg); - - /* Wait for autonegotiation to finish */ - for (time_out = 0; time_out < max_time_out; time_out++) { - udelay(10); - /* Restart PHY autonegotiation and wait for completion */ - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE; - if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE) - break; - } - - if (time_out == max_time_out) { - status = IXGBE_ERR_LINK_SETUP; - hw_dbg(hw, "ixgbe_setup_phy_link_tnx: time out"); - } - - return status; -} - -/** - * ixgbe_get_phy_firmware_version_tnx - Gets the PHY Firmware Version - * @hw: pointer to hardware structure - * @firmware_version: pointer to the PHY Firmware Version - **/ -s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw, - u16 *firmware_version) -{ - s32 status = 0; - - status = hw->phy.ops.read_reg(hw, TNX_FW_REV, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, - firmware_version); - - return status; -} - -/** - * ixgbe_get_phy_firmware_version_generic - Gets the PHY Firmware Version - * @hw: pointer to hardware structure - * @firmware_version: pointer to the PHY Firmware Version - **/ -s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw, - u16 *firmware_version) -{ - s32 status = 0; - - status = hw->phy.ops.read_reg(hw, AQ_FW_REV, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, - firmware_version); - - return status; -} - -/** - * ixgbe_reset_phy_nl - Performs a PHY reset - * @hw: pointer to hardware structure - **/ -s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) -{ - u16 phy_offset, control, eword, edata, block_crc; - bool end_data = false; - u16 list_offset, data_offset; - u16 phy_data = 0; - s32 ret_val = 0; - u32 i; - - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, - IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data); - - /* reset the PHY and poll for completion */ - hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, - IXGBE_MDIO_PHY_XS_DEV_TYPE, - (phy_data | IXGBE_MDIO_PHY_XS_RESET)); - - for (i = 0; i < 100; i++) { - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, - IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data); - if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) == 0) - break; - msleep(10); - } - - if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) != 0) { - hw_dbg(hw, "PHY reset did not complete.\n"); - ret_val = IXGBE_ERR_PHY; - goto out; - } - - /* Get init offsets */ - ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset, - &data_offset); - if (ret_val != 0) - goto out; - - ret_val = hw->eeprom.ops.read(hw, data_offset, &block_crc); - data_offset++; - while (!end_data) { - /* - * Read control word from PHY init contents offset - */ - ret_val = hw->eeprom.ops.read(hw, data_offset, &eword); - control = (eword & IXGBE_CONTROL_MASK_NL) >> - IXGBE_CONTROL_SHIFT_NL; - edata = eword & IXGBE_DATA_MASK_NL; - switch (control) { - case IXGBE_DELAY_NL: - data_offset++; - hw_dbg(hw, "DELAY: %d MS\n", edata); - msleep(edata); - break; - case IXGBE_DATA_NL: - hw_dbg(hw, "DATA:\n"); - data_offset++; - hw->eeprom.ops.read(hw, data_offset++, - &phy_offset); - for (i = 0; i < edata; i++) { - hw->eeprom.ops.read(hw, data_offset, &eword); - hw->phy.ops.write_reg(hw, phy_offset, - IXGBE_TWINAX_DEV, eword); - hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword, - phy_offset); - data_offset++; - phy_offset++; - } - break; - case IXGBE_CONTROL_NL: - data_offset++; - hw_dbg(hw, "CONTROL:\n"); - if (edata == IXGBE_CONTROL_EOL_NL) { - hw_dbg(hw, "EOL\n"); - end_data = true; - } else if (edata == IXGBE_CONTROL_SOL_NL) { - hw_dbg(hw, "SOL\n"); - } else { - hw_dbg(hw, "Bad control value\n"); - ret_val = IXGBE_ERR_PHY; - goto out; - } - break; - default: - hw_dbg(hw, "Bad control type\n"); - ret_val = IXGBE_ERR_PHY; - goto out; - } - } - -out: - return ret_val; -} - -/** - * ixgbe_identify_module_generic - Identifies module type - * @hw: pointer to hardware structure - * - * Determines HW type and calls appropriate function. - **/ -s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw) -{ - s32 status = IXGBE_ERR_SFP_NOT_PRESENT; - - switch (hw->mac.ops.get_media_type(hw)) { - case ixgbe_media_type_fiber: - status = ixgbe_identify_sfp_module_generic(hw); - break; - - case ixgbe_media_type_fiber_qsfp: - status = ixgbe_identify_qsfp_module_generic(hw); - break; - - default: - hw->phy.sfp_type = ixgbe_sfp_type_not_present; - status = IXGBE_ERR_SFP_NOT_PRESENT; - break; - } - - return status; -} - -/** - * ixgbe_identify_sfp_module_generic - Identifies SFP modules - * @hw: pointer to hardware structure - * - * Searches for and identifies the SFP module and assigns appropriate PHY type. - **/ -s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) -{ - s32 status = IXGBE_ERR_PHY_ADDR_INVALID; - u32 vendor_oui = 0; - enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type; - u8 identifier = 0; - u8 comp_codes_1g = 0; - u8 comp_codes_10g = 0; - u8 oui_bytes[3] = {0, 0, 0}; - u8 cable_tech = 0; - u8 cable_spec = 0; - u16 enforce_sfp = 0; - - if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) { - hw->phy.sfp_type = ixgbe_sfp_type_not_present; - status = IXGBE_ERR_SFP_NOT_PRESENT; - goto out; - } - - status = hw->phy.ops.read_i2c_eeprom(hw, - IXGBE_SFF_IDENTIFIER, - &identifier); - - if (status == IXGBE_ERR_SWFW_SYNC || - status == IXGBE_ERR_I2C || - status == IXGBE_ERR_SFP_NOT_PRESENT) - goto err_read_i2c_eeprom; - - /* LAN ID is needed for sfp_type determination */ - hw->mac.ops.set_lan_id(hw); - - if (identifier != IXGBE_SFF_IDENTIFIER_SFP) { - hw->phy.type = ixgbe_phy_sfp_unsupported; - status = IXGBE_ERR_SFP_NOT_SUPPORTED; - } else { - status = hw->phy.ops.read_i2c_eeprom(hw, - IXGBE_SFF_1GBE_COMP_CODES, - &comp_codes_1g); - - if (status == IXGBE_ERR_SWFW_SYNC || - status == IXGBE_ERR_I2C || - status == IXGBE_ERR_SFP_NOT_PRESENT) - goto err_read_i2c_eeprom; - - status = hw->phy.ops.read_i2c_eeprom(hw, - IXGBE_SFF_10GBE_COMP_CODES, - &comp_codes_10g); - - if (status == IXGBE_ERR_SWFW_SYNC || - status == IXGBE_ERR_I2C || - status == IXGBE_ERR_SFP_NOT_PRESENT) - goto err_read_i2c_eeprom; - status = hw->phy.ops.read_i2c_eeprom(hw, - IXGBE_SFF_CABLE_TECHNOLOGY, - &cable_tech); - - if (status == IXGBE_ERR_SWFW_SYNC || - status == IXGBE_ERR_I2C || - status == IXGBE_ERR_SFP_NOT_PRESENT) - goto err_read_i2c_eeprom; - - /* ID Module - * ========= - * 0 SFP_DA_CU - * 1 SFP_SR - * 2 SFP_LR - * 3 SFP_DA_CORE0 - 82599-specific - * 4 SFP_DA_CORE1 - 82599-specific - * 5 SFP_SR/LR_CORE0 - 82599-specific - * 6 SFP_SR/LR_CORE1 - 82599-specific - * 7 SFP_act_lmt_DA_CORE0 - 82599-specific - * 8 SFP_act_lmt_DA_CORE1 - 82599-specific - * 9 SFP_1g_cu_CORE0 - 82599-specific - * 10 SFP_1g_cu_CORE1 - 82599-specific - * 11 SFP_1g_sx_CORE0 - 82599-specific - * 12 SFP_1g_sx_CORE1 - 82599-specific - */ - if (hw->mac.type == ixgbe_mac_82598EB) { - if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) - hw->phy.sfp_type = ixgbe_sfp_type_da_cu; - else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE) - hw->phy.sfp_type = ixgbe_sfp_type_sr; - else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE) - hw->phy.sfp_type = ixgbe_sfp_type_lr; - else - hw->phy.sfp_type = ixgbe_sfp_type_unknown; - } else if (hw->mac.type == ixgbe_mac_82599EB) { - if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) { - if (hw->bus.lan_id == 0) - hw->phy.sfp_type = - ixgbe_sfp_type_da_cu_core0; - else - hw->phy.sfp_type = - ixgbe_sfp_type_da_cu_core1; - } else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) { - hw->phy.ops.read_i2c_eeprom( - hw, IXGBE_SFF_CABLE_SPEC_COMP, - &cable_spec); - if (cable_spec & - IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING) { - if (hw->bus.lan_id == 0) - hw->phy.sfp_type = - ixgbe_sfp_type_da_act_lmt_core0; - else - hw->phy.sfp_type = - ixgbe_sfp_type_da_act_lmt_core1; - } else { - hw->phy.sfp_type = - ixgbe_sfp_type_unknown; - } - } else if (comp_codes_10g & - (IXGBE_SFF_10GBASESR_CAPABLE | - IXGBE_SFF_10GBASELR_CAPABLE)) { - if (hw->bus.lan_id == 0) - hw->phy.sfp_type = - ixgbe_sfp_type_srlr_core0; - else - hw->phy.sfp_type = - ixgbe_sfp_type_srlr_core1; - } else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) { - if (hw->bus.lan_id == 0) - hw->phy.sfp_type = - ixgbe_sfp_type_1g_cu_core0; - else - hw->phy.sfp_type = - ixgbe_sfp_type_1g_cu_core1; - } else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) { - if (hw->bus.lan_id == 0) - hw->phy.sfp_type = - ixgbe_sfp_type_1g_sx_core0; - else - hw->phy.sfp_type = - ixgbe_sfp_type_1g_sx_core1; - } else { - hw->phy.sfp_type = ixgbe_sfp_type_unknown; - } - } - - if (hw->phy.sfp_type != stored_sfp_type) - hw->phy.sfp_setup_needed = true; - - /* Determine if the SFP+ PHY is dual speed or not. */ - hw->phy.multispeed_fiber = false; - if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) && - (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) || - ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) && - (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE))) - hw->phy.multispeed_fiber = true; - - /* Determine PHY vendor */ - if (hw->phy.type != ixgbe_phy_nl) { - hw->phy.id = identifier; - status = hw->phy.ops.read_i2c_eeprom(hw, - IXGBE_SFF_VENDOR_OUI_BYTE0, - &oui_bytes[0]); - - if (status == IXGBE_ERR_SWFW_SYNC || - status == IXGBE_ERR_I2C || - status == IXGBE_ERR_SFP_NOT_PRESENT) - goto err_read_i2c_eeprom; - - status = hw->phy.ops.read_i2c_eeprom(hw, - IXGBE_SFF_VENDOR_OUI_BYTE1, - &oui_bytes[1]); - - if (status == IXGBE_ERR_SWFW_SYNC || - status == IXGBE_ERR_I2C || - status == IXGBE_ERR_SFP_NOT_PRESENT) - goto err_read_i2c_eeprom; - - status = hw->phy.ops.read_i2c_eeprom(hw, - IXGBE_SFF_VENDOR_OUI_BYTE2, - &oui_bytes[2]); - - if (status == IXGBE_ERR_SWFW_SYNC || - status == IXGBE_ERR_I2C || - status == IXGBE_ERR_SFP_NOT_PRESENT) - goto err_read_i2c_eeprom; - - vendor_oui = - ((oui_bytes[0] << IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT) | - (oui_bytes[1] << IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT) | - (oui_bytes[2] << IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT)); - - switch (vendor_oui) { - case IXGBE_SFF_VENDOR_OUI_TYCO: - if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) - hw->phy.type = - ixgbe_phy_sfp_passive_tyco; - break; - case IXGBE_SFF_VENDOR_OUI_FTL: - if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) - hw->phy.type = ixgbe_phy_sfp_ftl_active; - else - hw->phy.type = ixgbe_phy_sfp_ftl; - break; - case IXGBE_SFF_VENDOR_OUI_AVAGO: - hw->phy.type = ixgbe_phy_sfp_avago; - break; - case IXGBE_SFF_VENDOR_OUI_INTEL: - hw->phy.type = ixgbe_phy_sfp_intel; - break; - default: - if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) - hw->phy.type = - ixgbe_phy_sfp_passive_unknown; - else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) - hw->phy.type = - ixgbe_phy_sfp_active_unknown; - else - hw->phy.type = ixgbe_phy_sfp_unknown; - break; - } - } - - /* Allow any DA cable vendor */ - if (cable_tech & (IXGBE_SFF_DA_PASSIVE_CABLE | - IXGBE_SFF_DA_ACTIVE_CABLE)) { - status = 0; - goto out; - } - - /* Verify supported 1G SFP modules */ - if (comp_codes_10g == 0 && - !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { - hw->phy.type = ixgbe_phy_sfp_unsupported; - status = IXGBE_ERR_SFP_NOT_SUPPORTED; - goto out; - } - - /* Anything else 82598-based is supported */ - if (hw->mac.type == ixgbe_mac_82598EB) { - status = 0; - goto out; - } - - ixgbe_get_device_caps(hw, &enforce_sfp); - if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) && - !((hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0) || - (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1) || - (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0) || - (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1))) { - /* Make sure we're a supported PHY type */ - if (hw->phy.type == ixgbe_phy_sfp_intel) { - status = 0; - } else { - if (hw->allow_unsupported_sfp == true) { - EWARN(hw, "WARNING: Intel (R) Network " - "Connections are quality tested " - "using Intel (R) Ethernet Optics." - " Using untested modules is not " - "supported and may cause unstable" - " operation or damage to the " - "module or the adapter. Intel " - "Corporation is not responsible " - "for any harm caused by using " - "untested modules.\n", status); - status = 0; - } else { - hw_dbg(hw, "SFP+ module not supported\n"); - hw->phy.type = - ixgbe_phy_sfp_unsupported; - status = IXGBE_ERR_SFP_NOT_SUPPORTED; - } - } - } else { - status = 0; - } - } - -out: - return status; - -err_read_i2c_eeprom: - hw->phy.sfp_type = ixgbe_sfp_type_not_present; - if (hw->phy.type != ixgbe_phy_nl) { - hw->phy.id = 0; - hw->phy.type = ixgbe_phy_unknown; - } - return IXGBE_ERR_SFP_NOT_PRESENT; -} - -/** - * ixgbe_identify_qsfp_module_generic - Identifies QSFP modules - * @hw: pointer to hardware structure - * - * Searches for and identifies the QSFP module and assigns appropriate PHY type - **/ -s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) -{ - s32 status = 0; - - if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) { - hw->phy.sfp_type = ixgbe_sfp_type_not_present; - status = IXGBE_ERR_SFP_NOT_PRESENT; - } - - return status; -} - - -/** - * ixgbe_get_sfp_init_sequence_offsets - Provides offset of PHY init sequence - * @hw: pointer to hardware structure - * @list_offset: offset to the SFP ID list - * @data_offset: offset to the SFP data block - * - * Checks the MAC's EEPROM to see if it supports a given SFP+ module type, if - * so it returns the offsets to the phy init sequence block. - **/ -s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, - u16 *list_offset, - u16 *data_offset) -{ - u16 sfp_id; - u16 sfp_type = hw->phy.sfp_type; - - if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) - return IXGBE_ERR_SFP_NOT_SUPPORTED; - - if (hw->phy.sfp_type == ixgbe_sfp_type_not_present) - return IXGBE_ERR_SFP_NOT_PRESENT; - - if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) && - (hw->phy.sfp_type == ixgbe_sfp_type_da_cu)) - return IXGBE_ERR_SFP_NOT_SUPPORTED; - - /* - * Limiting active cables and 1G Phys must be initialized as - * SR modules - */ - if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 || - sfp_type == ixgbe_sfp_type_1g_cu_core0 || - sfp_type == ixgbe_sfp_type_1g_sx_core0) - sfp_type = ixgbe_sfp_type_srlr_core0; - else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 || - sfp_type == ixgbe_sfp_type_1g_cu_core1 || - sfp_type == ixgbe_sfp_type_1g_sx_core1) - sfp_type = ixgbe_sfp_type_srlr_core1; - - /* Read offset to PHY init contents */ - hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset); - - if ((!*list_offset) || (*list_offset == 0xFFFF)) - return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; - - /* Shift offset to first ID word */ - (*list_offset)++; - - /* - * Find the matching SFP ID in the EEPROM - * and program the init sequence - */ - hw->eeprom.ops.read(hw, *list_offset, &sfp_id); - - while (sfp_id != IXGBE_PHY_INIT_END_NL) { - if (sfp_id == sfp_type) { - (*list_offset)++; - hw->eeprom.ops.read(hw, *list_offset, data_offset); - if ((!*data_offset) || (*data_offset == 0xFFFF)) { - hw_dbg(hw, "SFP+ module not supported\n"); - return IXGBE_ERR_SFP_NOT_SUPPORTED; - } else { - break; - } - } else { - (*list_offset) += 2; - if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id)) - return IXGBE_ERR_PHY; - } - } - - if (sfp_id == IXGBE_PHY_INIT_END_NL) { - hw_dbg(hw, "No matching SFP+ module found\n"); - return IXGBE_ERR_SFP_NOT_SUPPORTED; - } - - return 0; -} - -/** - * ixgbe_read_i2c_eeprom_generic - Reads 8 bit EEPROM word over I2C interface - * @hw: pointer to hardware structure - * @byte_offset: EEPROM byte offset to read - * @eeprom_data: value read - * - * Performs byte read operation to SFP module's EEPROM over I2C interface. - **/ -s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, - u8 *eeprom_data) -{ - return hw->phy.ops.read_i2c_byte(hw, byte_offset, - IXGBE_I2C_EEPROM_DEV_ADDR, - eeprom_data); -} - -/** - * ixgbe_write_i2c_eeprom_generic - Writes 8 bit EEPROM word over I2C interface - * @hw: pointer to hardware structure - * @byte_offset: EEPROM byte offset to write - * @eeprom_data: value to write - * - * Performs byte write operation to SFP module's EEPROM over I2C interface. - **/ -s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, - u8 eeprom_data) -{ - return hw->phy.ops.write_i2c_byte(hw, byte_offset, - IXGBE_I2C_EEPROM_DEV_ADDR, - eeprom_data); -} - -/** - * ixgbe_read_i2c_byte_generic - Reads 8 bit word over I2C - * @hw: pointer to hardware structure - * @byte_offset: byte offset to read - * @data: value read - * - * Performs byte read operation to SFP module's EEPROM over I2C interface at - * a specified device address. - **/ -s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, - u8 dev_addr, u8 *data) -{ - s32 status = 0; - u32 max_retry = 10; - u32 retry = 0; - u16 swfw_mask = 0; - bool nack = 1; - *data = 0; - - if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) - swfw_mask = IXGBE_GSSR_PHY1_SM; - else - swfw_mask = IXGBE_GSSR_PHY0_SM; - - do { - if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) - != 0) { - status = IXGBE_ERR_SWFW_SYNC; - goto read_byte_out; - } - - ixgbe_i2c_start(hw); - - /* Device Address and write indication */ - status = ixgbe_clock_out_i2c_byte(hw, dev_addr); - if (status != 0) - goto fail; - - status = ixgbe_get_i2c_ack(hw); - if (status != 0) - goto fail; - - status = ixgbe_clock_out_i2c_byte(hw, byte_offset); - if (status != 0) - goto fail; - - status = ixgbe_get_i2c_ack(hw); - if (status != 0) - goto fail; - - ixgbe_i2c_start(hw); - - /* Device Address and read indication */ - status = ixgbe_clock_out_i2c_byte(hw, (dev_addr | 0x1)); - if (status != 0) - goto fail; - - status = ixgbe_get_i2c_ack(hw); - if (status != 0) - goto fail; - - status = ixgbe_clock_in_i2c_byte(hw, data); - if (status != 0) - goto fail; - - status = ixgbe_clock_out_i2c_bit(hw, nack); - if (status != 0) - goto fail; - - ixgbe_i2c_stop(hw); - break; - -fail: - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - msleep(100); - ixgbe_i2c_bus_clear(hw); - retry++; - if (retry < max_retry) - hw_dbg(hw, "I2C byte read error - Retrying.\n"); - else - hw_dbg(hw, "I2C byte read error.\n"); - - } while (retry < max_retry); - - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - -read_byte_out: - return status; -} - -/** - * ixgbe_write_i2c_byte_generic - Writes 8 bit word over I2C - * @hw: pointer to hardware structure - * @byte_offset: byte offset to write - * @data: value to write - * - * Performs byte write operation to SFP module's EEPROM over I2C interface at - * a specified device address. - **/ -s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, - u8 dev_addr, u8 data) -{ - s32 status = 0; - u32 max_retry = 1; - u32 retry = 0; - u16 swfw_mask = 0; - - if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) - swfw_mask = IXGBE_GSSR_PHY1_SM; - else - swfw_mask = IXGBE_GSSR_PHY0_SM; - - if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != 0) { - status = IXGBE_ERR_SWFW_SYNC; - goto write_byte_out; - } - - do { - ixgbe_i2c_start(hw); - - status = ixgbe_clock_out_i2c_byte(hw, dev_addr); - if (status != 0) - goto fail; - - status = ixgbe_get_i2c_ack(hw); - if (status != 0) - goto fail; - - status = ixgbe_clock_out_i2c_byte(hw, byte_offset); - if (status != 0) - goto fail; - - status = ixgbe_get_i2c_ack(hw); - if (status != 0) - goto fail; - - status = ixgbe_clock_out_i2c_byte(hw, data); - if (status != 0) - goto fail; - - status = ixgbe_get_i2c_ack(hw); - if (status != 0) - goto fail; - - ixgbe_i2c_stop(hw); - break; - -fail: - ixgbe_i2c_bus_clear(hw); - retry++; - if (retry < max_retry) - hw_dbg(hw, "I2C byte write error - Retrying.\n"); - else - hw_dbg(hw, "I2C byte write error.\n"); - } while (retry < max_retry); - - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - -write_byte_out: - return status; -} - -/** - * ixgbe_i2c_start - Sets I2C start condition - * @hw: pointer to hardware structure - * - * Sets I2C start condition (High -> Low on SDA while SCL is High) - **/ -static void ixgbe_i2c_start(struct ixgbe_hw *hw) -{ - u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - - /* Start condition must begin with data and clock high */ - ixgbe_set_i2c_data(hw, &i2cctl, 1); - ixgbe_raise_i2c_clk(hw, &i2cctl); - - /* Setup time for start condition (4.7us) */ - udelay(IXGBE_I2C_T_SU_STA); - - ixgbe_set_i2c_data(hw, &i2cctl, 0); - - /* Hold time for start condition (4us) */ - udelay(IXGBE_I2C_T_HD_STA); - - ixgbe_lower_i2c_clk(hw, &i2cctl); - - /* Minimum low period of clock is 4.7 us */ - udelay(IXGBE_I2C_T_LOW); - -} - -/** - * ixgbe_i2c_stop - Sets I2C stop condition - * @hw: pointer to hardware structure - * - * Sets I2C stop condition (Low -> High on SDA while SCL is High) - **/ -static void ixgbe_i2c_stop(struct ixgbe_hw *hw) -{ - u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - - /* Stop condition must begin with data low and clock high */ - ixgbe_set_i2c_data(hw, &i2cctl, 0); - ixgbe_raise_i2c_clk(hw, &i2cctl); - - /* Setup time for stop condition (4us) */ - udelay(IXGBE_I2C_T_SU_STO); - - ixgbe_set_i2c_data(hw, &i2cctl, 1); - - /* bus free time between stop and start (4.7us)*/ - udelay(IXGBE_I2C_T_BUF); -} - -/** - * ixgbe_clock_in_i2c_byte - Clocks in one byte via I2C - * @hw: pointer to hardware structure - * @data: data byte to clock in - * - * Clocks in one byte data via I2C data/clock - **/ -static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data) -{ - s32 i; - bool bit = 0; - - for (i = 7; i >= 0; i--) { - ixgbe_clock_in_i2c_bit(hw, &bit); - *data |= bit << i; - } - - return 0; -} - -/** - * ixgbe_clock_out_i2c_byte - Clocks out one byte via I2C - * @hw: pointer to hardware structure - * @data: data byte clocked out - * - * Clocks out one byte data via I2C data/clock - **/ -static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data) -{ - s32 status = 0; - s32 i; - u32 i2cctl; - bool bit = 0; - - for (i = 7; i >= 0; i--) { - bit = (data >> i) & 0x1; - status = ixgbe_clock_out_i2c_bit(hw, bit); - - if (status != 0) - break; - } - - /* Release SDA line (set high) */ - i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - i2cctl |= IXGBE_I2C_DATA_OUT; - IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, i2cctl); - IXGBE_WRITE_FLUSH(hw); - - return status; -} - -/** - * ixgbe_get_i2c_ack - Polls for I2C ACK - * @hw: pointer to hardware structure - * - * Clocks in/out one bit via I2C data/clock - **/ -static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw) -{ - s32 status = 0; - u32 i = 0; - u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - u32 timeout = 10; - bool ack = 1; - - ixgbe_raise_i2c_clk(hw, &i2cctl); - - - /* Minimum high period of clock is 4us */ - udelay(IXGBE_I2C_T_HIGH); - - /* Poll for ACK. Note that ACK in I2C spec is - * transition from 1 to 0 */ - for (i = 0; i < timeout; i++) { - i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - ack = ixgbe_get_i2c_data(&i2cctl); - - udelay(1); - if (ack == 0) - break; - } - - if (ack == 1) { - hw_dbg(hw, "I2C ack was not received.\n"); - status = IXGBE_ERR_I2C; - } - - ixgbe_lower_i2c_clk(hw, &i2cctl); - - /* Minimum low period of clock is 4.7 us */ - udelay(IXGBE_I2C_T_LOW); - - return status; -} - -/** - * ixgbe_clock_in_i2c_bit - Clocks in one bit via I2C data/clock - * @hw: pointer to hardware structure - * @data: read data value - * - * Clocks in one bit via I2C data/clock - **/ -static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data) -{ - u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - - ixgbe_raise_i2c_clk(hw, &i2cctl); - - /* Minimum high period of clock is 4us */ - udelay(IXGBE_I2C_T_HIGH); - - i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - *data = ixgbe_get_i2c_data(&i2cctl); - - ixgbe_lower_i2c_clk(hw, &i2cctl); - - /* Minimum low period of clock is 4.7 us */ - udelay(IXGBE_I2C_T_LOW); - - return 0; -} - -/** - * ixgbe_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock - * @hw: pointer to hardware structure - * @data: data value to write - * - * Clocks out one bit via I2C data/clock - **/ -static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data) -{ - s32 status; - u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - - status = ixgbe_set_i2c_data(hw, &i2cctl, data); - if (status == 0) { - ixgbe_raise_i2c_clk(hw, &i2cctl); - - /* Minimum high period of clock is 4us */ - udelay(IXGBE_I2C_T_HIGH); - - ixgbe_lower_i2c_clk(hw, &i2cctl); - - /* Minimum low period of clock is 4.7 us. - * This also takes care of the data hold time. - */ - udelay(IXGBE_I2C_T_LOW); - } else { - status = IXGBE_ERR_I2C; - hw_dbg(hw, "I2C data was not set to %X\n", data); - } - - return status; -} -/** - * ixgbe_raise_i2c_clk - Raises the I2C SCL clock - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register - * - * Raises the I2C clock line '0'->'1' - **/ -static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl) -{ - u32 i = 0; - u32 timeout = IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT; - u32 i2cctl_r = 0; - - for (i = 0; i < timeout; i++) { - *i2cctl |= IXGBE_I2C_CLK_OUT; - - IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); - IXGBE_WRITE_FLUSH(hw); - /* SCL rise time (1000ns) */ - udelay(IXGBE_I2C_T_RISE); - - i2cctl_r = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - if (i2cctl_r & IXGBE_I2C_CLK_IN) - break; - } -} - -/** - * ixgbe_lower_i2c_clk - Lowers the I2C SCL clock - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register - * - * Lowers the I2C clock line '1'->'0' - **/ -static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl) -{ - - *i2cctl &= ~IXGBE_I2C_CLK_OUT; - - IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); - IXGBE_WRITE_FLUSH(hw); - - /* SCL fall time (300ns) */ - udelay(IXGBE_I2C_T_FALL); -} - -/** - * ixgbe_set_i2c_data - Sets the I2C data bit - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register - * @data: I2C data value (0 or 1) to set - * - * Sets the I2C data bit - **/ -static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data) -{ - s32 status = 0; - - if (data) - *i2cctl |= IXGBE_I2C_DATA_OUT; - else - *i2cctl &= ~IXGBE_I2C_DATA_OUT; - - IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); - IXGBE_WRITE_FLUSH(hw); - - /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ - udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA); - - /* Verify data was set correctly */ - *i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - if (data != ixgbe_get_i2c_data(i2cctl)) { - status = IXGBE_ERR_I2C; - hw_dbg(hw, "Error - I2C data was not set to %X.\n", data); - } - - return status; -} - -/** - * ixgbe_get_i2c_data - Reads the I2C SDA data bit - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register - * - * Returns the I2C data bit value - **/ -static bool ixgbe_get_i2c_data(u32 *i2cctl) -{ - bool data; - - if (*i2cctl & IXGBE_I2C_DATA_IN) - data = 1; - else - data = 0; - - return data; -} - -/** - * ixgbe_i2c_bus_clear - Clears the I2C bus - * @hw: pointer to hardware structure - * - * Clears the I2C bus by sending nine clock pulses. - * Used when data line is stuck low. - **/ -void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw) -{ - u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); - u32 i; - - ixgbe_i2c_start(hw); - - ixgbe_set_i2c_data(hw, &i2cctl, 1); - - for (i = 0; i < 9; i++) { - ixgbe_raise_i2c_clk(hw, &i2cctl); - - /* Min high period of clock is 4us */ - udelay(IXGBE_I2C_T_HIGH); - - ixgbe_lower_i2c_clk(hw, &i2cctl); - - /* Min low period of clock is 4.7us*/ - udelay(IXGBE_I2C_T_LOW); - } - - ixgbe_i2c_start(hw); - - /* Put the i2c bus back to default state */ - ixgbe_i2c_stop(hw); -} - -/** - * ixgbe_tn_check_overtemp - Checks if an overtemp occurred. - * @hw: pointer to hardware structure - * - * Checks if the LASI temp alarm status was triggered due to overtemp - **/ -s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) -{ - s32 status = 0; - u16 phy_data = 0; - - if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM) - goto out; - - /* Check that the LASI temp alarm status was triggered */ - hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_data); - - if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM)) - goto out; - - status = IXGBE_ERR_OVERTEMP; -out: - return status; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h deleted file mode 100644 index 6baa9acb..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h +++ /dev/null @@ -1,122 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_PHY_H_ -#define _IXGBE_PHY_H_ - -#include "ixgbe_type.h" -#define IXGBE_I2C_EEPROM_DEV_ADDR 0xA0 - -/* EEPROM byte offsets */ -#define IXGBE_SFF_IDENTIFIER 0x0 -#define IXGBE_SFF_IDENTIFIER_SFP 0x3 -#define IXGBE_SFF_VENDOR_OUI_BYTE0 0x25 -#define IXGBE_SFF_VENDOR_OUI_BYTE1 0x26 -#define IXGBE_SFF_VENDOR_OUI_BYTE2 0x27 -#define IXGBE_SFF_1GBE_COMP_CODES 0x6 -#define IXGBE_SFF_10GBE_COMP_CODES 0x3 -#define IXGBE_SFF_CABLE_TECHNOLOGY 0x8 -#define IXGBE_SFF_CABLE_SPEC_COMP 0x3C - -/* Bitmasks */ -#define IXGBE_SFF_DA_PASSIVE_CABLE 0x4 -#define IXGBE_SFF_DA_ACTIVE_CABLE 0x8 -#define IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING 0x4 -#define IXGBE_SFF_1GBASESX_CAPABLE 0x1 -#define IXGBE_SFF_1GBASELX_CAPABLE 0x2 -#define IXGBE_SFF_1GBASET_CAPABLE 0x8 -#define IXGBE_SFF_10GBASESR_CAPABLE 0x10 -#define IXGBE_SFF_10GBASELR_CAPABLE 0x20 -#define IXGBE_I2C_EEPROM_READ_MASK 0x100 -#define IXGBE_I2C_EEPROM_STATUS_MASK 0x3 -#define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION 0x0 -#define IXGBE_I2C_EEPROM_STATUS_PASS 0x1 -#define IXGBE_I2C_EEPROM_STATUS_FAIL 0x2 -#define IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS 0x3 - -/* Flow control defines */ -#define IXGBE_TAF_SYM_PAUSE 0x400 -#define IXGBE_TAF_ASM_PAUSE 0x800 - -/* Bit-shift macros */ -#define IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT 24 -#define IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT 16 -#define IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT 8 - -/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */ -#define IXGBE_SFF_VENDOR_OUI_TYCO 0x00407600 -#define IXGBE_SFF_VENDOR_OUI_FTL 0x00906500 -#define IXGBE_SFF_VENDOR_OUI_AVAGO 0x00176A00 -#define IXGBE_SFF_VENDOR_OUI_INTEL 0x001B2100 - -/* I2C SDA and SCL timing parameters for standard mode */ -#define IXGBE_I2C_T_HD_STA 4 -#define IXGBE_I2C_T_LOW 5 -#define IXGBE_I2C_T_HIGH 4 -#define IXGBE_I2C_T_SU_STA 5 -#define IXGBE_I2C_T_HD_DATA 5 -#define IXGBE_I2C_T_SU_DATA 1 -#define IXGBE_I2C_T_RISE 1 -#define IXGBE_I2C_T_FALL 1 -#define IXGBE_I2C_T_SU_STO 4 -#define IXGBE_I2C_T_BUF 5 - -#define IXGBE_TN_LASI_STATUS_REG 0x9005 -#define IXGBE_TN_LASI_STATUS_TEMP_ALARM 0x0008 - -s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw); -bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr); -enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id); -s32 ixgbe_get_phy_id(struct ixgbe_hw *hw); -s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw); -s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw); -s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 *phy_data); -s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 phy_data); -s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw); -s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg, - bool autoneg_wait_to_complete); -s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, - bool *autoneg); - -/* PHY specific */ -s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, - bool *link_up); -s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw); -s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw, - u16 *firmware_version); -s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw, - u16 *firmware_version); - -s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw); -s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw); -s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw); -s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw); -s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, - u16 *list_offset, - u16 *data_offset); -s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); -s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, - u8 dev_addr, u8 *data); -s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, - u8 dev_addr, u8 data); -s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, - u8 *eeprom_data); -s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, - u8 eeprom_data); -void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw); -#endif /* _IXGBE_PHY_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h deleted file mode 100644 index 0689590e..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h +++ /dev/null @@ -1,3239 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_TYPE_H_ -#define _IXGBE_TYPE_H_ - -#include "ixgbe_osdep.h" - - -/* Vendor ID */ -#define IXGBE_INTEL_VENDOR_ID 0x8086 - -/* Device IDs */ -#define IXGBE_DEV_ID_82598 0x10B6 -#define IXGBE_DEV_ID_82598_BX 0x1508 -#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6 -#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7 -#define IXGBE_DEV_ID_82598AT 0x10C8 -#define IXGBE_DEV_ID_82598AT2 0x150B -#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB -#define IXGBE_DEV_ID_82598EB_CX4 0x10DD -#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC -#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1 -#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1 -#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4 -#define IXGBE_DEV_ID_82599_KX4 0x10F7 -#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 -#define IXGBE_DEV_ID_82599_KR 0x1517 -#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 -#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C -#define IXGBE_DEV_ID_82599_CX4 0x10F9 -#define IXGBE_DEV_ID_82599_SFP 0x10FB -#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 -#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 -#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A -#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 -#define IXGBE_DEV_ID_82599_SFP_EM 0x1507 -#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D -#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558 -#define IXGBE_DEV_ID_82599EN_SFP 0x1557 -#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC -#define IXGBE_DEV_ID_82599_T3_LOM 0x151C -#define IXGBE_DEV_ID_82599_LS 0x154F -#define IXGBE_DEV_ID_X540T 0x1528 - -/* General Registers */ -#define IXGBE_CTRL 0x00000 -#define IXGBE_STATUS 0x00008 -#define IXGBE_CTRL_EXT 0x00018 -#define IXGBE_ESDP 0x00020 -#define IXGBE_EODSDP 0x00028 -#define IXGBE_I2CCTL 0x00028 -#define IXGBE_PHY_GPIO 0x00028 -#define IXGBE_MAC_GPIO 0x00030 -#define IXGBE_PHYINT_STATUS0 0x00100 -#define IXGBE_PHYINT_STATUS1 0x00104 -#define IXGBE_PHYINT_STATUS2 0x00108 -#define IXGBE_LEDCTL 0x00200 -#define IXGBE_FRTIMER 0x00048 -#define IXGBE_TCPTIMER 0x0004C -#define IXGBE_CORESPARE 0x00600 -#define IXGBE_EXVET 0x05078 - -/* NVM Registers */ -#define IXGBE_EEC 0x10010 -#define IXGBE_EERD 0x10014 -#define IXGBE_EEWR 0x10018 -#define IXGBE_FLA 0x1001C -#define IXGBE_EEMNGCTL 0x10110 -#define IXGBE_EEMNGDATA 0x10114 -#define IXGBE_FLMNGCTL 0x10118 -#define IXGBE_FLMNGDATA 0x1011C -#define IXGBE_FLMNGCNT 0x10120 -#define IXGBE_FLOP 0x1013C -#define IXGBE_GRC 0x10200 -#define IXGBE_SRAMREL 0x10210 -#define IXGBE_PHYDBG 0x10218 - -/* General Receive Control */ -#define IXGBE_GRC_MNG 0x00000001 /* Manageability Enable */ -#define IXGBE_GRC_APME 0x00000002 /* APM enabled in EEPROM */ - -#define IXGBE_VPDDIAG0 0x10204 -#define IXGBE_VPDDIAG1 0x10208 - -/* I2CCTL Bit Masks */ -#define IXGBE_I2C_CLK_IN 0x00000001 -#define IXGBE_I2C_CLK_OUT 0x00000002 -#define IXGBE_I2C_DATA_IN 0x00000004 -#define IXGBE_I2C_DATA_OUT 0x00000008 -#define IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT 500 - -#define IXGBE_I2C_THERMAL_SENSOR_ADDR 0xF8 -#define IXGBE_EMC_INTERNAL_DATA 0x00 -#define IXGBE_EMC_INTERNAL_THERM_LIMIT 0x20 -#define IXGBE_EMC_DIODE1_DATA 0x01 -#define IXGBE_EMC_DIODE1_THERM_LIMIT 0x19 -#define IXGBE_EMC_DIODE2_DATA 0x23 -#define IXGBE_EMC_DIODE2_THERM_LIMIT 0x1A - -#define IXGBE_MAX_SENSORS 3 - -struct ixgbe_thermal_diode_data { - u8 location; - u8 temp; - u8 caution_thresh; - u8 max_op_thresh; -}; - -struct ixgbe_thermal_sensor_data { - struct ixgbe_thermal_diode_data sensor[IXGBE_MAX_SENSORS]; -}; - -/* Interrupt Registers */ -#define IXGBE_EICR 0x00800 -#define IXGBE_EICS 0x00808 -#define IXGBE_EIMS 0x00880 -#define IXGBE_EIMC 0x00888 -#define IXGBE_EIAC 0x00810 -#define IXGBE_EIAM 0x00890 -#define IXGBE_EICS_EX(_i) (0x00A90 + (_i) * 4) -#define IXGBE_EIMS_EX(_i) (0x00AA0 + (_i) * 4) -#define IXGBE_EIMC_EX(_i) (0x00AB0 + (_i) * 4) -#define IXGBE_EIAM_EX(_i) (0x00AD0 + (_i) * 4) -/* 82599 EITR is only 12 bits, with the lower 3 always zero */ -/* - * 82598 EITR is 16 bits but set the limits based on the max - * supported by all ixgbe hardware - */ -#define IXGBE_MAX_INT_RATE 488281 -#define IXGBE_MIN_INT_RATE 956 -#define IXGBE_MAX_EITR 0x00000FF8 -#define IXGBE_MIN_EITR 8 -#define IXGBE_EITR(_i) (((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \ - (0x012300 + (((_i) - 24) * 4))) -#define IXGBE_EITR_ITR_INT_MASK 0x00000FF8 -#define IXGBE_EITR_LLI_MOD 0x00008000 -#define IXGBE_EITR_CNT_WDIS 0x80000000 -#define IXGBE_IVAR(_i) (0x00900 + ((_i) * 4)) /* 24 at 0x900-0x960 */ -#define IXGBE_IVAR_MISC 0x00A00 /* misc MSI-X interrupt causes */ -#define IXGBE_EITRSEL 0x00894 -#define IXGBE_MSIXT 0x00000 /* MSI-X Table. 0x0000 - 0x01C */ -#define IXGBE_MSIXPBA 0x02000 /* MSI-X Pending bit array */ -#define IXGBE_PBACL(_i) (((_i) == 0) ? (0x11068) : (0x110C0 + ((_i) * 4))) -#define IXGBE_GPIE 0x00898 - -/* Flow Control Registers */ -#define IXGBE_FCADBUL 0x03210 -#define IXGBE_FCADBUH 0x03214 -#define IXGBE_FCAMACL 0x04328 -#define IXGBE_FCAMACH 0x0432C -#define IXGBE_FCRTH_82599(_i) (0x03260 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_FCRTL_82599(_i) (0x03220 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_PFCTOP 0x03008 -#define IXGBE_FCTTV(_i) (0x03200 + ((_i) * 4)) /* 4 of these (0-3) */ -#define IXGBE_FCRTL(_i) (0x03220 + ((_i) * 8)) /* 8 of these (0-7) */ -#define IXGBE_FCRTH(_i) (0x03260 + ((_i) * 8)) /* 8 of these (0-7) */ -#define IXGBE_FCRTV 0x032A0 -#define IXGBE_FCCFG 0x03D00 -#define IXGBE_TFCS 0x0CE00 - -/* Receive DMA Registers */ -#define IXGBE_RDBAL(_i) (((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \ - (0x0D000 + (((_i) - 64) * 0x40))) -#define IXGBE_RDBAH(_i) (((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \ - (0x0D004 + (((_i) - 64) * 0x40))) -#define IXGBE_RDLEN(_i) (((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \ - (0x0D008 + (((_i) - 64) * 0x40))) -#define IXGBE_RDH(_i) (((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \ - (0x0D010 + (((_i) - 64) * 0x40))) -#define IXGBE_RDT(_i) (((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \ - (0x0D018 + (((_i) - 64) * 0x40))) -#define IXGBE_RXDCTL(_i) (((_i) < 64) ? (0x01028 + ((_i) * 0x40)) : \ - (0x0D028 + (((_i) - 64) * 0x40))) -#define IXGBE_RSCCTL(_i) (((_i) < 64) ? (0x0102C + ((_i) * 0x40)) : \ - (0x0D02C + (((_i) - 64) * 0x40))) -#define IXGBE_RSCDBU 0x03028 -#define IXGBE_RDDCC 0x02F20 -#define IXGBE_RXMEMWRAP 0x03190 -#define IXGBE_STARCTRL 0x03024 -/* - * Split and Replication Receive Control Registers - * 00-15 : 0x02100 + n*4 - * 16-64 : 0x01014 + n*0x40 - * 64-127: 0x0D014 + (n-64)*0x40 - */ -#define IXGBE_SRRCTL(_i) (((_i) <= 15) ? (0x02100 + ((_i) * 4)) : \ - (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \ - (0x0D014 + (((_i) - 64) * 0x40)))) -/* - * Rx DCA Control Register: - * 00-15 : 0x02200 + n*4 - * 16-64 : 0x0100C + n*0x40 - * 64-127: 0x0D00C + (n-64)*0x40 - */ -#define IXGBE_DCA_RXCTRL(_i) (((_i) <= 15) ? (0x02200 + ((_i) * 4)) : \ - (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \ - (0x0D00C + (((_i) - 64) * 0x40)))) -#define IXGBE_RDRXCTL 0x02F00 -#define IXGBE_RDRXCTL_RSC_PUSH 0x80 -/* 8 of these 0x03C00 - 0x03C1C */ -#define IXGBE_RXPBSIZE(_i) (0x03C00 + ((_i) * 4)) -#define IXGBE_RXCTRL 0x03000 -#define IXGBE_DROPEN 0x03D04 -#define IXGBE_RXPBSIZE_SHIFT 10 - -/* Receive Registers */ -#define IXGBE_RXCSUM 0x05000 -#define IXGBE_RFCTL 0x05008 -#define IXGBE_DRECCCTL 0x02F08 -#define IXGBE_DRECCCTL_DISABLE 0 -#define IXGBE_DRECCCTL2 0x02F8C - -/* Multicast Table Array - 128 entries */ -#define IXGBE_MTA(_i) (0x05200 + ((_i) * 4)) -#define IXGBE_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ - (0x0A200 + ((_i) * 8))) -#define IXGBE_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ - (0x0A204 + ((_i) * 8))) -#define IXGBE_MPSAR_LO(_i) (0x0A600 + ((_i) * 8)) -#define IXGBE_MPSAR_HI(_i) (0x0A604 + ((_i) * 8)) -/* Packet split receive type */ -#define IXGBE_PSRTYPE(_i) (((_i) <= 15) ? (0x05480 + ((_i) * 4)) : \ - (0x0EA00 + ((_i) * 4))) -/* array of 4096 1-bit vlan filters */ -#define IXGBE_VFTA(_i) (0x0A000 + ((_i) * 4)) -/*array of 4096 4-bit vlan vmdq indices */ -#define IXGBE_VFTAVIND(_j, _i) (0x0A200 + ((_j) * 0x200) + ((_i) * 4)) -#define IXGBE_FCTRL 0x05080 -#define IXGBE_VLNCTRL 0x05088 -#define IXGBE_MCSTCTRL 0x05090 -#define IXGBE_MRQC 0x05818 -#define IXGBE_SAQF(_i) (0x0E000 + ((_i) * 4)) /* Source Address Queue Filter */ -#define IXGBE_DAQF(_i) (0x0E200 + ((_i) * 4)) /* Dest. Address Queue Filter */ -#define IXGBE_SDPQF(_i) (0x0E400 + ((_i) * 4)) /* Src Dest. Addr Queue Filter */ -#define IXGBE_FTQF(_i) (0x0E600 + ((_i) * 4)) /* Five Tuple Queue Filter */ -#define IXGBE_ETQF(_i) (0x05128 + ((_i) * 4)) /* EType Queue Filter */ -#define IXGBE_ETQS(_i) (0x0EC00 + ((_i) * 4)) /* EType Queue Select */ -#define IXGBE_SYNQF 0x0EC30 /* SYN Packet Queue Filter */ -#define IXGBE_RQTC 0x0EC70 -#define IXGBE_MTQC 0x08120 -#define IXGBE_VLVF(_i) (0x0F100 + ((_i) * 4)) /* 64 of these (0-63) */ -#define IXGBE_VLVFB(_i) (0x0F200 + ((_i) * 4)) /* 128 of these (0-127) */ -#define IXGBE_VMVIR(_i) (0x08000 + ((_i) * 4)) /* 64 of these (0-63) */ -#define IXGBE_VT_CTL 0x051B0 -#define IXGBE_PFMAILBOX(_i) (0x04B00 + (4 * (_i))) /* 64 total */ -/* 64 Mailboxes, 16 DW each */ -#define IXGBE_PFMBMEM(_i) (0x13000 + (64 * (_i))) -#define IXGBE_PFMBICR(_i) (0x00710 + (4 * (_i))) /* 4 total */ -#define IXGBE_PFMBIMR(_i) (0x00720 + (4 * (_i))) /* 4 total */ -#define IXGBE_VFRE(_i) (0x051E0 + ((_i) * 4)) -#define IXGBE_VFTE(_i) (0x08110 + ((_i) * 4)) -#define IXGBE_VMECM(_i) (0x08790 + ((_i) * 4)) -#define IXGBE_QDE 0x2F04 -#define IXGBE_VMTXSW(_i) (0x05180 + ((_i) * 4)) /* 2 total */ -#define IXGBE_VMOLR(_i) (0x0F000 + ((_i) * 4)) /* 64 total */ -#define IXGBE_UTA(_i) (0x0F400 + ((_i) * 4)) -#define IXGBE_MRCTL(_i) (0x0F600 + ((_i) * 4)) -#define IXGBE_VMRVLAN(_i) (0x0F610 + ((_i) * 4)) -#define IXGBE_VMRVM(_i) (0x0F630 + ((_i) * 4)) -#define IXGBE_L34T_IMIR(_i) (0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/ -#define IXGBE_RXFECCERR0 0x051B8 -#define IXGBE_LLITHRESH 0x0EC90 -#define IXGBE_IMIR(_i) (0x05A80 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_IMIRVP 0x05AC0 -#define IXGBE_VMD_CTL 0x0581C -#define IXGBE_RETA(_i) (0x05C00 + ((_i) * 4)) /* 32 of these (0-31) */ -#define IXGBE_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* 10 of these (0-9) */ - -/* Flow Director registers */ -#define IXGBE_FDIRCTRL 0x0EE00 -#define IXGBE_FDIRHKEY 0x0EE68 -#define IXGBE_FDIRSKEY 0x0EE6C -#define IXGBE_FDIRDIP4M 0x0EE3C -#define IXGBE_FDIRSIP4M 0x0EE40 -#define IXGBE_FDIRTCPM 0x0EE44 -#define IXGBE_FDIRUDPM 0x0EE48 -#define IXGBE_FDIRIP6M 0x0EE74 -#define IXGBE_FDIRM 0x0EE70 - -/* Flow Director Stats registers */ -#define IXGBE_FDIRFREE 0x0EE38 -#define IXGBE_FDIRLEN 0x0EE4C -#define IXGBE_FDIRUSTAT 0x0EE50 -#define IXGBE_FDIRFSTAT 0x0EE54 -#define IXGBE_FDIRMATCH 0x0EE58 -#define IXGBE_FDIRMISS 0x0EE5C - -/* Flow Director Programming registers */ -#define IXGBE_FDIRSIPv6(_i) (0x0EE0C + ((_i) * 4)) /* 3 of these (0-2) */ -#define IXGBE_FDIRIPSA 0x0EE18 -#define IXGBE_FDIRIPDA 0x0EE1C -#define IXGBE_FDIRPORT 0x0EE20 -#define IXGBE_FDIRVLAN 0x0EE24 -#define IXGBE_FDIRHASH 0x0EE28 -#define IXGBE_FDIRCMD 0x0EE2C - -/* Transmit DMA registers */ -#define IXGBE_TDBAL(_i) (0x06000 + ((_i) * 0x40)) /* 32 of them (0-31)*/ -#define IXGBE_TDBAH(_i) (0x06004 + ((_i) * 0x40)) -#define IXGBE_TDLEN(_i) (0x06008 + ((_i) * 0x40)) -#define IXGBE_TDH(_i) (0x06010 + ((_i) * 0x40)) -#define IXGBE_TDT(_i) (0x06018 + ((_i) * 0x40)) -#define IXGBE_TXDCTL(_i) (0x06028 + ((_i) * 0x40)) -#define IXGBE_TDWBAL(_i) (0x06038 + ((_i) * 0x40)) -#define IXGBE_TDWBAH(_i) (0x0603C + ((_i) * 0x40)) -#define IXGBE_DTXCTL 0x07E00 - -#define IXGBE_DMATXCTL 0x04A80 -#define IXGBE_PFVFSPOOF(_i) (0x08200 + ((_i) * 4)) /* 8 of these 0 - 7 */ -#define IXGBE_PFDTXGSWC 0x08220 -#define IXGBE_DTXMXSZRQ 0x08100 -#define IXGBE_DTXTCPFLGL 0x04A88 -#define IXGBE_DTXTCPFLGH 0x04A8C -#define IXGBE_LBDRPEN 0x0CA00 -#define IXGBE_TXPBTHRESH(_i) (0x04950 + ((_i) * 4)) /* 8 of these 0 - 7 */ - -#define IXGBE_DMATXCTL_TE 0x1 /* Transmit Enable */ -#define IXGBE_DMATXCTL_NS 0x2 /* No Snoop LSO hdr buffer */ -#define IXGBE_DMATXCTL_GDV 0x8 /* Global Double VLAN */ -#define IXGBE_DMATXCTL_VT_SHIFT 16 /* VLAN EtherType */ - -#define IXGBE_PFDTXGSWC_VT_LBEN 0x1 /* Local L2 VT switch enable */ - -/* Anti-spoofing defines */ -#define IXGBE_SPOOF_MACAS_MASK 0xFF -#define IXGBE_SPOOF_VLANAS_MASK 0xFF00 -#define IXGBE_SPOOF_VLANAS_SHIFT 8 -#define IXGBE_PFVFSPOOF_REG_COUNT 8 -/* 16 of these (0-15) */ -#define IXGBE_DCA_TXCTRL(_i) (0x07200 + ((_i) * 4)) -/* Tx DCA Control register : 128 of these (0-127) */ -#define IXGBE_DCA_TXCTRL_82599(_i) (0x0600C + ((_i) * 0x40)) -#define IXGBE_TIPG 0x0CB00 -#define IXGBE_TXPBSIZE(_i) (0x0CC00 + ((_i) * 4)) /* 8 of these */ -#define IXGBE_MNGTXMAP 0x0CD10 -#define IXGBE_TIPG_FIBER_DEFAULT 3 -#define IXGBE_TXPBSIZE_SHIFT 10 - -/* Wake up registers */ -#define IXGBE_WUC 0x05800 -#define IXGBE_WUFC 0x05808 -#define IXGBE_WUS 0x05810 -#define IXGBE_IPAV 0x05838 -#define IXGBE_IP4AT 0x05840 /* IPv4 table 0x5840-0x5858 */ -#define IXGBE_IP6AT 0x05880 /* IPv6 table 0x5880-0x588F */ - -#define IXGBE_WUPL 0x05900 -#define IXGBE_WUPM 0x05A00 /* wake up pkt memory 0x5A00-0x5A7C */ -#define IXGBE_FHFT(_n) (0x09000 + (_n * 0x100)) /* Flex host filter table */ -/* Ext Flexible Host Filter Table */ -#define IXGBE_FHFT_EXT(_n) (0x09800 + (_n * 0x100)) - -#define IXGBE_FLEXIBLE_FILTER_COUNT_MAX 4 -#define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX 2 - -/* Each Flexible Filter is at most 128 (0x80) bytes in length */ -#define IXGBE_FLEXIBLE_FILTER_SIZE_MAX 128 -#define IXGBE_FHFT_LENGTH_OFFSET 0xFC /* Length byte in FHFT */ -#define IXGBE_FHFT_LENGTH_MASK 0x0FF /* Length in lower byte */ - -/* Definitions for power management and wakeup registers */ -/* Wake Up Control */ -#define IXGBE_WUC_PME_EN 0x00000002 /* PME Enable */ -#define IXGBE_WUC_PME_STATUS 0x00000004 /* PME Status */ -#define IXGBE_WUC_WKEN 0x00000010 /* Enable PE_WAKE_N pin assertion */ - -/* Wake Up Filter Control */ -#define IXGBE_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ -#define IXGBE_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ -#define IXGBE_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ -#define IXGBE_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ -#define IXGBE_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ -#define IXGBE_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ -#define IXGBE_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ -#define IXGBE_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */ -#define IXGBE_WUFC_MNG 0x00000100 /* Directed Mgmt Packet Wakeup Enable */ - -#define IXGBE_WUFC_IGNORE_TCO 0x00008000 /* Ignore WakeOn TCO packets */ -#define IXGBE_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ -#define IXGBE_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */ -#define IXGBE_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */ -#define IXGBE_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */ -#define IXGBE_WUFC_FLX4 0x00100000 /* Flexible Filter 4 Enable */ -#define IXGBE_WUFC_FLX5 0x00200000 /* Flexible Filter 5 Enable */ -#define IXGBE_WUFC_FLX_FILTERS 0x000F0000 /* Mask for 4 flex filters */ -/* Mask for Ext. flex filters */ -#define IXGBE_WUFC_EXT_FLX_FILTERS 0x00300000 -#define IXGBE_WUFC_ALL_FILTERS 0x003F00FF /* Mask for all wakeup filters */ -#define IXGBE_WUFC_FLX_OFFSET 16 /* Offset to the Flexible Filters bits */ - -/* Wake Up Status */ -#define IXGBE_WUS_LNKC IXGBE_WUFC_LNKC -#define IXGBE_WUS_MAG IXGBE_WUFC_MAG -#define IXGBE_WUS_EX IXGBE_WUFC_EX -#define IXGBE_WUS_MC IXGBE_WUFC_MC -#define IXGBE_WUS_BC IXGBE_WUFC_BC -#define IXGBE_WUS_ARP IXGBE_WUFC_ARP -#define IXGBE_WUS_IPV4 IXGBE_WUFC_IPV4 -#define IXGBE_WUS_IPV6 IXGBE_WUFC_IPV6 -#define IXGBE_WUS_MNG IXGBE_WUFC_MNG -#define IXGBE_WUS_FLX0 IXGBE_WUFC_FLX0 -#define IXGBE_WUS_FLX1 IXGBE_WUFC_FLX1 -#define IXGBE_WUS_FLX2 IXGBE_WUFC_FLX2 -#define IXGBE_WUS_FLX3 IXGBE_WUFC_FLX3 -#define IXGBE_WUS_FLX4 IXGBE_WUFC_FLX4 -#define IXGBE_WUS_FLX5 IXGBE_WUFC_FLX5 -#define IXGBE_WUS_FLX_FILTERS IXGBE_WUFC_FLX_FILTERS - -/* Wake Up Packet Length */ -#define IXGBE_WUPL_LENGTH_MASK 0xFFFF - -/* DCB registers */ -#define IXGBE_DCB_MAX_TRAFFIC_CLASS 8 -#define IXGBE_RMCS 0x03D00 -#define IXGBE_DPMCS 0x07F40 -#define IXGBE_PDPMCS 0x0CD00 -#define IXGBE_RUPPBMR 0x050A0 -#define IXGBE_RT2CR(_i) (0x03C20 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_RT2SR(_i) (0x03C40 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_TDTQ2TCCR(_i) (0x0602C + ((_i) * 0x40)) /* 8 of these (0-7) */ -#define IXGBE_TDTQ2TCSR(_i) (0x0622C + ((_i) * 0x40)) /* 8 of these (0-7) */ -#define IXGBE_TDPT2TCCR(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_TDPT2TCSR(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */ - - -/* Security Control Registers */ -#define IXGBE_SECTXCTRL 0x08800 -#define IXGBE_SECTXSTAT 0x08804 -#define IXGBE_SECTXBUFFAF 0x08808 -#define IXGBE_SECTXMINIFG 0x08810 -#define IXGBE_SECRXCTRL 0x08D00 -#define IXGBE_SECRXSTAT 0x08D04 - -/* Security Bit Fields and Masks */ -#define IXGBE_SECTXCTRL_SECTX_DIS 0x00000001 -#define IXGBE_SECTXCTRL_TX_DIS 0x00000002 -#define IXGBE_SECTXCTRL_STORE_FORWARD 0x00000004 - -#define IXGBE_SECTXSTAT_SECTX_RDY 0x00000001 -#define IXGBE_SECTXSTAT_ECC_TXERR 0x00000002 - -#define IXGBE_SECRXCTRL_SECRX_DIS 0x00000001 -#define IXGBE_SECRXCTRL_RX_DIS 0x00000002 - -#define IXGBE_SECRXSTAT_SECRX_RDY 0x00000001 -#define IXGBE_SECRXSTAT_ECC_RXERR 0x00000002 - -/* LinkSec (MacSec) Registers */ -#define IXGBE_LSECTXCAP 0x08A00 -#define IXGBE_LSECRXCAP 0x08F00 -#define IXGBE_LSECTXCTRL 0x08A04 -#define IXGBE_LSECTXSCL 0x08A08 /* SCI Low */ -#define IXGBE_LSECTXSCH 0x08A0C /* SCI High */ -#define IXGBE_LSECTXSA 0x08A10 -#define IXGBE_LSECTXPN0 0x08A14 -#define IXGBE_LSECTXPN1 0x08A18 -#define IXGBE_LSECTXKEY0(_n) (0x08A1C + (4 * (_n))) /* 4 of these (0-3) */ -#define IXGBE_LSECTXKEY1(_n) (0x08A2C + (4 * (_n))) /* 4 of these (0-3) */ -#define IXGBE_LSECRXCTRL 0x08F04 -#define IXGBE_LSECRXSCL 0x08F08 -#define IXGBE_LSECRXSCH 0x08F0C -#define IXGBE_LSECRXSA(_i) (0x08F10 + (4 * (_i))) /* 2 of these (0-1) */ -#define IXGBE_LSECRXPN(_i) (0x08F18 + (4 * (_i))) /* 2 of these (0-1) */ -#define IXGBE_LSECRXKEY(_n, _m) (0x08F20 + ((0x10 * (_n)) + (4 * (_m)))) -#define IXGBE_LSECTXUT 0x08A3C /* OutPktsUntagged */ -#define IXGBE_LSECTXPKTE 0x08A40 /* OutPktsEncrypted */ -#define IXGBE_LSECTXPKTP 0x08A44 /* OutPktsProtected */ -#define IXGBE_LSECTXOCTE 0x08A48 /* OutOctetsEncrypted */ -#define IXGBE_LSECTXOCTP 0x08A4C /* OutOctetsProtected */ -#define IXGBE_LSECRXUT 0x08F40 /* InPktsUntagged/InPktsNoTag */ -#define IXGBE_LSECRXOCTD 0x08F44 /* InOctetsDecrypted */ -#define IXGBE_LSECRXOCTV 0x08F48 /* InOctetsValidated */ -#define IXGBE_LSECRXBAD 0x08F4C /* InPktsBadTag */ -#define IXGBE_LSECRXNOSCI 0x08F50 /* InPktsNoSci */ -#define IXGBE_LSECRXUNSCI 0x08F54 /* InPktsUnknownSci */ -#define IXGBE_LSECRXUNCH 0x08F58 /* InPktsUnchecked */ -#define IXGBE_LSECRXDELAY 0x08F5C /* InPktsDelayed */ -#define IXGBE_LSECRXLATE 0x08F60 /* InPktsLate */ -#define IXGBE_LSECRXOK(_n) (0x08F64 + (0x04 * (_n))) /* InPktsOk */ -#define IXGBE_LSECRXINV(_n) (0x08F6C + (0x04 * (_n))) /* InPktsInvalid */ -#define IXGBE_LSECRXNV(_n) (0x08F74 + (0x04 * (_n))) /* InPktsNotValid */ -#define IXGBE_LSECRXUNSA 0x08F7C /* InPktsUnusedSa */ -#define IXGBE_LSECRXNUSA 0x08F80 /* InPktsNotUsingSa */ - -/* LinkSec (MacSec) Bit Fields and Masks */ -#define IXGBE_LSECTXCAP_SUM_MASK 0x00FF0000 -#define IXGBE_LSECTXCAP_SUM_SHIFT 16 -#define IXGBE_LSECRXCAP_SUM_MASK 0x00FF0000 -#define IXGBE_LSECRXCAP_SUM_SHIFT 16 - -#define IXGBE_LSECTXCTRL_EN_MASK 0x00000003 -#define IXGBE_LSECTXCTRL_DISABLE 0x0 -#define IXGBE_LSECTXCTRL_AUTH 0x1 -#define IXGBE_LSECTXCTRL_AUTH_ENCRYPT 0x2 -#define IXGBE_LSECTXCTRL_AISCI 0x00000020 -#define IXGBE_LSECTXCTRL_PNTHRSH_MASK 0xFFFFFF00 -#define IXGBE_LSECTXCTRL_RSV_MASK 0x000000D8 - -#define IXGBE_LSECRXCTRL_EN_MASK 0x0000000C -#define IXGBE_LSECRXCTRL_EN_SHIFT 2 -#define IXGBE_LSECRXCTRL_DISABLE 0x0 -#define IXGBE_LSECRXCTRL_CHECK 0x1 -#define IXGBE_LSECRXCTRL_STRICT 0x2 -#define IXGBE_LSECRXCTRL_DROP 0x3 -#define IXGBE_LSECRXCTRL_PLSH 0x00000040 -#define IXGBE_LSECRXCTRL_RP 0x00000080 -#define IXGBE_LSECRXCTRL_RSV_MASK 0xFFFFFF33 - -/* IpSec Registers */ -#define IXGBE_IPSTXIDX 0x08900 -#define IXGBE_IPSTXSALT 0x08904 -#define IXGBE_IPSTXKEY(_i) (0x08908 + (4 * (_i))) /* 4 of these (0-3) */ -#define IXGBE_IPSRXIDX 0x08E00 -#define IXGBE_IPSRXIPADDR(_i) (0x08E04 + (4 * (_i))) /* 4 of these (0-3) */ -#define IXGBE_IPSRXSPI 0x08E14 -#define IXGBE_IPSRXIPIDX 0x08E18 -#define IXGBE_IPSRXKEY(_i) (0x08E1C + (4 * (_i))) /* 4 of these (0-3) */ -#define IXGBE_IPSRXSALT 0x08E2C -#define IXGBE_IPSRXMOD 0x08E30 - -#define IXGBE_SECTXCTRL_STORE_FORWARD_ENABLE 0x4 - -/* DCB registers */ -#define IXGBE_RTRPCS 0x02430 -#define IXGBE_RTTDCS 0x04900 -#define IXGBE_RTTDCS_ARBDIS 0x00000040 /* DCB arbiter disable */ -#define IXGBE_RTTPCS 0x0CD00 -#define IXGBE_RTRUP2TC 0x03020 -#define IXGBE_RTTUP2TC 0x0C800 -#define IXGBE_RTRPT4C(_i) (0x02140 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_TXLLQ(_i) (0x082E0 + ((_i) * 4)) /* 4 of these (0-3) */ -#define IXGBE_RTRPT4S(_i) (0x02160 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_RTTDT2C(_i) (0x04910 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_RTTDT2S(_i) (0x04930 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_RTTPT2C(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_RTTPT2S(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_RTTDQSEL 0x04904 -#define IXGBE_RTTDT1C 0x04908 -#define IXGBE_RTTDT1S 0x0490C -#define IXGBE_RTTDTECC 0x04990 -#define IXGBE_RTTDTECC_NO_BCN 0x00000100 - -#define IXGBE_RTTBCNRC 0x04984 -#define IXGBE_RTTBCNRC_RS_ENA 0x80000000 -#define IXGBE_RTTBCNRC_RF_DEC_MASK 0x00003FFF -#define IXGBE_RTTBCNRC_RF_INT_SHIFT 14 -#define IXGBE_RTTBCNRC_RF_INT_MASK \ - (IXGBE_RTTBCNRC_RF_DEC_MASK << IXGBE_RTTBCNRC_RF_INT_SHIFT) -#define IXGBE_RTTBCNRM 0x04980 - -/* FCoE DMA Context Registers */ -#define IXGBE_FCPTRL 0x02410 /* FC User Desc. PTR Low */ -#define IXGBE_FCPTRH 0x02414 /* FC USer Desc. PTR High */ -#define IXGBE_FCBUFF 0x02418 /* FC Buffer Control */ -#define IXGBE_FCDMARW 0x02420 /* FC Receive DMA RW */ -#define IXGBE_FCINVST0 0x03FC0 /* FC Invalid DMA Context Status Reg 0*/ -#define IXGBE_FCINVST(_i) (IXGBE_FCINVST0 + ((_i) * 4)) -#define IXGBE_FCBUFF_VALID (1 << 0) /* DMA Context Valid */ -#define IXGBE_FCBUFF_BUFFSIZE (3 << 3) /* User Buffer Size */ -#define IXGBE_FCBUFF_WRCONTX (1 << 7) /* 0: Initiator, 1: Target */ -#define IXGBE_FCBUFF_BUFFCNT 0x0000ff00 /* Number of User Buffers */ -#define IXGBE_FCBUFF_OFFSET 0xffff0000 /* User Buffer Offset */ -#define IXGBE_FCBUFF_BUFFSIZE_SHIFT 3 -#define IXGBE_FCBUFF_BUFFCNT_SHIFT 8 -#define IXGBE_FCBUFF_OFFSET_SHIFT 16 -#define IXGBE_FCDMARW_WE (1 << 14) /* Write enable */ -#define IXGBE_FCDMARW_RE (1 << 15) /* Read enable */ -#define IXGBE_FCDMARW_FCOESEL 0x000001ff /* FC X_ID: 11 bits */ -#define IXGBE_FCDMARW_LASTSIZE 0xffff0000 /* Last User Buffer Size */ -#define IXGBE_FCDMARW_LASTSIZE_SHIFT 16 -/* FCoE SOF/EOF */ -#define IXGBE_TEOFF 0x04A94 /* Tx FC EOF */ -#define IXGBE_TSOFF 0x04A98 /* Tx FC SOF */ -#define IXGBE_REOFF 0x05158 /* Rx FC EOF */ -#define IXGBE_RSOFF 0x051F8 /* Rx FC SOF */ -/* FCoE Filter Context Registers */ -#define IXGBE_FCFLT 0x05108 /* FC FLT Context */ -#define IXGBE_FCFLTRW 0x05110 /* FC Filter RW Control */ -#define IXGBE_FCPARAM 0x051d8 /* FC Offset Parameter */ -#define IXGBE_FCFLT_VALID (1 << 0) /* Filter Context Valid */ -#define IXGBE_FCFLT_FIRST (1 << 1) /* Filter First */ -#define IXGBE_FCFLT_SEQID 0x00ff0000 /* Sequence ID */ -#define IXGBE_FCFLT_SEQCNT 0xff000000 /* Sequence Count */ -#define IXGBE_FCFLTRW_RVALDT (1 << 13) /* Fast Re-Validation */ -#define IXGBE_FCFLTRW_WE (1 << 14) /* Write Enable */ -#define IXGBE_FCFLTRW_RE (1 << 15) /* Read Enable */ -/* FCoE Receive Control */ -#define IXGBE_FCRXCTRL 0x05100 /* FC Receive Control */ -#define IXGBE_FCRXCTRL_FCOELLI (1 << 0) /* Low latency interrupt */ -#define IXGBE_FCRXCTRL_SAVBAD (1 << 1) /* Save Bad Frames */ -#define IXGBE_FCRXCTRL_FRSTRDH (1 << 2) /* EN 1st Read Header */ -#define IXGBE_FCRXCTRL_LASTSEQH (1 << 3) /* EN Last Header in Seq */ -#define IXGBE_FCRXCTRL_ALLH (1 << 4) /* EN All Headers */ -#define IXGBE_FCRXCTRL_FRSTSEQH (1 << 5) /* EN 1st Seq. Header */ -#define IXGBE_FCRXCTRL_ICRC (1 << 6) /* Ignore Bad FC CRC */ -#define IXGBE_FCRXCTRL_FCCRCBO (1 << 7) /* FC CRC Byte Ordering */ -#define IXGBE_FCRXCTRL_FCOEVER 0x00000f00 /* FCoE Version: 4 bits */ -#define IXGBE_FCRXCTRL_FCOEVER_SHIFT 8 -/* FCoE Redirection */ -#define IXGBE_FCRECTL 0x0ED00 /* FC Redirection Control */ -#define IXGBE_FCRETA0 0x0ED10 /* FC Redirection Table 0 */ -#define IXGBE_FCRETA(_i) (IXGBE_FCRETA0 + ((_i) * 4)) /* FCoE Redir */ -#define IXGBE_FCRECTL_ENA 0x1 /* FCoE Redir Table Enable */ -#define IXGBE_FCRETASEL_ENA 0x2 /* FCoE FCRETASEL bit */ -#define IXGBE_FCRETA_SIZE 8 /* Max entries in FCRETA */ -#define IXGBE_FCRETA_ENTRY_MASK 0x0000007f /* 7 bits for the queue index */ - -/* Stats registers */ -#define IXGBE_CRCERRS 0x04000 -#define IXGBE_ILLERRC 0x04004 -#define IXGBE_ERRBC 0x04008 -#define IXGBE_MSPDC 0x04010 -#define IXGBE_MPC(_i) (0x03FA0 + ((_i) * 4)) /* 8 of these 3FA0-3FBC*/ -#define IXGBE_MLFC 0x04034 -#define IXGBE_MRFC 0x04038 -#define IXGBE_RLEC 0x04040 -#define IXGBE_LXONTXC 0x03F60 -#define IXGBE_LXONRXC 0x0CF60 -#define IXGBE_LXOFFTXC 0x03F68 -#define IXGBE_LXOFFRXC 0x0CF68 -#define IXGBE_LXONRXCNT 0x041A4 -#define IXGBE_LXOFFRXCNT 0x041A8 -#define IXGBE_PXONRXCNT(_i) (0x04140 + ((_i) * 4)) /* 8 of these */ -#define IXGBE_PXOFFRXCNT(_i) (0x04160 + ((_i) * 4)) /* 8 of these */ -#define IXGBE_PXON2OFFCNT(_i) (0x03240 + ((_i) * 4)) /* 8 of these */ -#define IXGBE_PXONTXC(_i) (0x03F00 + ((_i) * 4)) /* 8 of these 3F00-3F1C*/ -#define IXGBE_PXONRXC(_i) (0x0CF00 + ((_i) * 4)) /* 8 of these CF00-CF1C*/ -#define IXGBE_PXOFFTXC(_i) (0x03F20 + ((_i) * 4)) /* 8 of these 3F20-3F3C*/ -#define IXGBE_PXOFFRXC(_i) (0x0CF20 + ((_i) * 4)) /* 8 of these CF20-CF3C*/ -#define IXGBE_PRC64 0x0405C -#define IXGBE_PRC127 0x04060 -#define IXGBE_PRC255 0x04064 -#define IXGBE_PRC511 0x04068 -#define IXGBE_PRC1023 0x0406C -#define IXGBE_PRC1522 0x04070 -#define IXGBE_GPRC 0x04074 -#define IXGBE_BPRC 0x04078 -#define IXGBE_MPRC 0x0407C -#define IXGBE_GPTC 0x04080 -#define IXGBE_GORCL 0x04088 -#define IXGBE_GORCH 0x0408C -#define IXGBE_GOTCL 0x04090 -#define IXGBE_GOTCH 0x04094 -#define IXGBE_RNBC(_i) (0x03FC0 + ((_i) * 4)) /* 8 of these 3FC0-3FDC*/ -#define IXGBE_RUC 0x040A4 -#define IXGBE_RFC 0x040A8 -#define IXGBE_ROC 0x040AC -#define IXGBE_RJC 0x040B0 -#define IXGBE_MNGPRC 0x040B4 -#define IXGBE_MNGPDC 0x040B8 -#define IXGBE_MNGPTC 0x0CF90 -#define IXGBE_TORL 0x040C0 -#define IXGBE_TORH 0x040C4 -#define IXGBE_TPR 0x040D0 -#define IXGBE_TPT 0x040D4 -#define IXGBE_PTC64 0x040D8 -#define IXGBE_PTC127 0x040DC -#define IXGBE_PTC255 0x040E0 -#define IXGBE_PTC511 0x040E4 -#define IXGBE_PTC1023 0x040E8 -#define IXGBE_PTC1522 0x040EC -#define IXGBE_MPTC 0x040F0 -#define IXGBE_BPTC 0x040F4 -#define IXGBE_XEC 0x04120 -#define IXGBE_SSVPC 0x08780 - -#define IXGBE_RQSMR(_i) (0x02300 + ((_i) * 4)) -#define IXGBE_TQSMR(_i) (((_i) <= 7) ? (0x07300 + ((_i) * 4)) : \ - (0x08600 + ((_i) * 4))) -#define IXGBE_TQSM(_i) (0x08600 + ((_i) * 4)) - -#define IXGBE_QPRC(_i) (0x01030 + ((_i) * 0x40)) /* 16 of these */ -#define IXGBE_QPTC(_i) (0x06030 + ((_i) * 0x40)) /* 16 of these */ -#define IXGBE_QBRC(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */ -#define IXGBE_QBTC(_i) (0x06034 + ((_i) * 0x40)) /* 16 of these */ -#define IXGBE_QBRC_L(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */ -#define IXGBE_QBRC_H(_i) (0x01038 + ((_i) * 0x40)) /* 16 of these */ -#define IXGBE_QPRDC(_i) (0x01430 + ((_i) * 0x40)) /* 16 of these */ -#define IXGBE_QBTC_L(_i) (0x08700 + ((_i) * 0x8)) /* 16 of these */ -#define IXGBE_QBTC_H(_i) (0x08704 + ((_i) * 0x8)) /* 16 of these */ -#define IXGBE_FCCRC 0x05118 /* Num of Good Eth CRC w/ Bad FC CRC */ -#define IXGBE_FCOERPDC 0x0241C /* FCoE Rx Packets Dropped Count */ -#define IXGBE_FCLAST 0x02424 /* FCoE Last Error Count */ -#define IXGBE_FCOEPRC 0x02428 /* Number of FCoE Packets Received */ -#define IXGBE_FCOEDWRC 0x0242C /* Number of FCoE DWords Received */ -#define IXGBE_FCOEPTC 0x08784 /* Number of FCoE Packets Transmitted */ -#define IXGBE_FCOEDWTC 0x08788 /* Number of FCoE DWords Transmitted */ -#define IXGBE_FCCRC_CNT_MASK 0x0000FFFF /* CRC_CNT: bit 0 - 15 */ -#define IXGBE_FCLAST_CNT_MASK 0x0000FFFF /* Last_CNT: bit 0 - 15 */ -#define IXGBE_O2BGPTC 0x041C4 -#define IXGBE_O2BSPC 0x087B0 -#define IXGBE_B2OSPC 0x041C0 -#define IXGBE_B2OGPRC 0x02F90 -#define IXGBE_BUPRC 0x04180 -#define IXGBE_BMPRC 0x04184 -#define IXGBE_BBPRC 0x04188 -#define IXGBE_BUPTC 0x0418C -#define IXGBE_BMPTC 0x04190 -#define IXGBE_BBPTC 0x04194 -#define IXGBE_BCRCERRS 0x04198 -#define IXGBE_BXONRXC 0x0419C -#define IXGBE_BXOFFRXC 0x041E0 -#define IXGBE_BXONTXC 0x041E4 -#define IXGBE_BXOFFTXC 0x041E8 -#define IXGBE_PCRC8ECL 0x0E810 -#define IXGBE_PCRC8ECH 0x0E811 -#define IXGBE_PCRC8ECH_MASK 0x1F -#define IXGBE_LDPCECL 0x0E820 -#define IXGBE_LDPCECH 0x0E821 - -/* Management */ -#define IXGBE_MAVTV(_i) (0x05010 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_MFUTP(_i) (0x05030 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_MANC 0x05820 -#define IXGBE_MFVAL 0x05824 -#define IXGBE_MANC2H 0x05860 -#define IXGBE_MDEF(_i) (0x05890 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_MIPAF 0x058B0 -#define IXGBE_MMAL(_i) (0x05910 + ((_i) * 8)) /* 4 of these (0-3) */ -#define IXGBE_MMAH(_i) (0x05914 + ((_i) * 8)) /* 4 of these (0-3) */ -#define IXGBE_FTFT 0x09400 /* 0x9400-0x97FC */ -#define IXGBE_METF(_i) (0x05190 + ((_i) * 4)) /* 4 of these (0-3) */ -#define IXGBE_MDEF_EXT(_i) (0x05160 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_LSWFW 0x15014 -#define IXGBE_BMCIP(_i) (0x05050 + ((_i) * 4)) /* 0x5050-0x505C */ -#define IXGBE_BMCIPVAL 0x05060 -#define IXGBE_BMCIP_IPADDR_TYPE 0x00000001 -#define IXGBE_BMCIP_IPADDR_VALID 0x00000002 - -/* Management Bit Fields and Masks */ -#define IXGBE_MANC_EN_BMC2OS 0x10000000 /* Ena BMC2OS and OS2BMC traffic */ -#define IXGBE_MANC_EN_BMC2OS_SHIFT 28 - -/* Firmware Semaphore Register */ -#define IXGBE_FWSM_MODE_MASK 0xE - -/* ARC Subsystem registers */ -#define IXGBE_HICR 0x15F00 -#define IXGBE_FWSTS 0x15F0C -#define IXGBE_HSMC0R 0x15F04 -#define IXGBE_HSMC1R 0x15F08 -#define IXGBE_SWSR 0x15F10 -#define IXGBE_HFDR 0x15FE8 -#define IXGBE_FLEX_MNG 0x15800 /* 0x15800 - 0x15EFC */ - -#define IXGBE_HICR_EN 0x01 /* Enable bit - RO */ -/* Driver sets this bit when done to put command in RAM */ -#define IXGBE_HICR_C 0x02 -#define IXGBE_HICR_SV 0x04 /* Status Validity */ -#define IXGBE_HICR_FW_RESET_ENABLE 0x40 -#define IXGBE_HICR_FW_RESET 0x80 - -/* PCI-E registers */ -#define IXGBE_GCR 0x11000 -#define IXGBE_GTV 0x11004 -#define IXGBE_FUNCTAG 0x11008 -#define IXGBE_GLT 0x1100C -#define IXGBE_PCIEPIPEADR 0x11004 -#define IXGBE_PCIEPIPEDAT 0x11008 -#define IXGBE_GSCL_1 0x11010 -#define IXGBE_GSCL_2 0x11014 -#define IXGBE_GSCL_3 0x11018 -#define IXGBE_GSCL_4 0x1101C -#define IXGBE_GSCN_0 0x11020 -#define IXGBE_GSCN_1 0x11024 -#define IXGBE_GSCN_2 0x11028 -#define IXGBE_GSCN_3 0x1102C -#define IXGBE_FACTPS 0x10150 -#define IXGBE_PCIEANACTL 0x11040 -#define IXGBE_SWSM 0x10140 -#define IXGBE_FWSM 0x10148 -#define IXGBE_GSSR 0x10160 -#define IXGBE_MREVID 0x11064 -#define IXGBE_DCA_ID 0x11070 -#define IXGBE_DCA_CTRL 0x11074 -#define IXGBE_SWFW_SYNC IXGBE_GSSR - -/* PCI-E registers 82599-Specific */ -#define IXGBE_GCR_EXT 0x11050 -#define IXGBE_GSCL_5_82599 0x11030 -#define IXGBE_GSCL_6_82599 0x11034 -#define IXGBE_GSCL_7_82599 0x11038 -#define IXGBE_GSCL_8_82599 0x1103C -#define IXGBE_PHYADR_82599 0x11040 -#define IXGBE_PHYDAT_82599 0x11044 -#define IXGBE_PHYCTL_82599 0x11048 -#define IXGBE_PBACLR_82599 0x11068 -#define IXGBE_CIAA_82599 0x11088 -#define IXGBE_CIAD_82599 0x1108C -#define IXGBE_PICAUSE 0x110B0 -#define IXGBE_PIENA 0x110B8 -#define IXGBE_CDQ_MBR_82599 0x110B4 -#define IXGBE_PCIESPARE 0x110BC -#define IXGBE_MISC_REG_82599 0x110F0 -#define IXGBE_ECC_CTRL_0_82599 0x11100 -#define IXGBE_ECC_CTRL_1_82599 0x11104 -#define IXGBE_ECC_STATUS_82599 0x110E0 -#define IXGBE_BAR_CTRL_82599 0x110F4 - -/* PCI Express Control */ -#define IXGBE_GCR_CMPL_TMOUT_MASK 0x0000F000 -#define IXGBE_GCR_CMPL_TMOUT_10ms 0x00001000 -#define IXGBE_GCR_CMPL_TMOUT_RESEND 0x00010000 -#define IXGBE_GCR_CAP_VER2 0x00040000 - -#define IXGBE_GCR_EXT_MSIX_EN 0x80000000 -#define IXGBE_GCR_EXT_BUFFERS_CLEAR 0x40000000 -#define IXGBE_GCR_EXT_VT_MODE_16 0x00000001 -#define IXGBE_GCR_EXT_VT_MODE_32 0x00000002 -#define IXGBE_GCR_EXT_VT_MODE_64 0x00000003 -#define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \ - IXGBE_GCR_EXT_VT_MODE_64) -/* Time Sync Registers */ -#define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */ -#define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */ -#define IXGBE_RXSTMPL 0x051E8 /* Rx timestamp Low - RO */ -#define IXGBE_RXSTMPH 0x051A4 /* Rx timestamp High - RO */ -#define IXGBE_RXSATRL 0x051A0 /* Rx timestamp attribute low - RO */ -#define IXGBE_RXSATRH 0x051A8 /* Rx timestamp attribute high - RO */ -#define IXGBE_RXMTRL 0x05120 /* RX message type register low - RW */ -#define IXGBE_TXSTMPL 0x08C04 /* Tx timestamp value Low - RO */ -#define IXGBE_TXSTMPH 0x08C08 /* Tx timestamp value High - RO */ -#define IXGBE_SYSTIML 0x08C0C /* System time register Low - RO */ -#define IXGBE_SYSTIMH 0x08C10 /* System time register High - RO */ -#define IXGBE_TIMINCA 0x08C14 /* Increment attributes register - RW */ -#define IXGBE_TIMADJL 0x08C18 /* Time Adjustment Offset register Low - RW */ -#define IXGBE_TIMADJH 0x08C1C /* Time Adjustment Offset register High - RW */ -#define IXGBE_TSAUXC 0x08C20 /* TimeSync Auxiliary Control register - RW */ -#define IXGBE_TRGTTIML0 0x08C24 /* Target Time Register 0 Low - RW */ -#define IXGBE_TRGTTIMH0 0x08C28 /* Target Time Register 0 High - RW */ -#define IXGBE_TRGTTIML1 0x08C2C /* Target Time Register 1 Low - RW */ -#define IXGBE_TRGTTIMH1 0x08C30 /* Target Time Register 1 High - RW */ -#define IXGBE_FREQOUT0 0x08C34 /* Frequency Out 0 Control register - RW */ -#define IXGBE_FREQOUT1 0x08C38 /* Frequency Out 1 Control register - RW */ -#define IXGBE_AUXSTMPL0 0x08C3C /* Auxiliary Time Stamp 0 register Low - RO */ -#define IXGBE_AUXSTMPH0 0x08C40 /* Auxiliary Time Stamp 0 register High - RO */ -#define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */ -#define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */ - -/* Diagnostic Registers */ -#define IXGBE_RDSTATCTL 0x02C20 -#define IXGBE_RDSTAT(_i) (0x02C00 + ((_i) * 4)) /* 0x02C00-0x02C1C */ -#define IXGBE_RDHMPN 0x02F08 -#define IXGBE_RIC_DW(_i) (0x02F10 + ((_i) * 4)) -#define IXGBE_RDPROBE 0x02F20 -#define IXGBE_RDMAM 0x02F30 -#define IXGBE_RDMAD 0x02F34 -#define IXGBE_TDSTATCTL 0x07C20 -#define IXGBE_TDSTAT(_i) (0x07C00 + ((_i) * 4)) /* 0x07C00 - 0x07C1C */ -#define IXGBE_TDHMPN 0x07F08 -#define IXGBE_TDHMPN2 0x082FC -#define IXGBE_TXDESCIC 0x082CC -#define IXGBE_TIC_DW(_i) (0x07F10 + ((_i) * 4)) -#define IXGBE_TIC_DW2(_i) (0x082B0 + ((_i) * 4)) -#define IXGBE_TDPROBE 0x07F20 -#define IXGBE_TXBUFCTRL 0x0C600 -#define IXGBE_TXBUFDATA0 0x0C610 -#define IXGBE_TXBUFDATA1 0x0C614 -#define IXGBE_TXBUFDATA2 0x0C618 -#define IXGBE_TXBUFDATA3 0x0C61C -#define IXGBE_RXBUFCTRL 0x03600 -#define IXGBE_RXBUFDATA0 0x03610 -#define IXGBE_RXBUFDATA1 0x03614 -#define IXGBE_RXBUFDATA2 0x03618 -#define IXGBE_RXBUFDATA3 0x0361C -#define IXGBE_PCIE_DIAG(_i) (0x11090 + ((_i) * 4)) /* 8 of these */ -#define IXGBE_RFVAL 0x050A4 -#define IXGBE_MDFTC1 0x042B8 -#define IXGBE_MDFTC2 0x042C0 -#define IXGBE_MDFTFIFO1 0x042C4 -#define IXGBE_MDFTFIFO2 0x042C8 -#define IXGBE_MDFTS 0x042CC -#define IXGBE_RXDATAWRPTR(_i) (0x03700 + ((_i) * 4)) /* 8 of these 3700-370C*/ -#define IXGBE_RXDESCWRPTR(_i) (0x03710 + ((_i) * 4)) /* 8 of these 3710-371C*/ -#define IXGBE_RXDATARDPTR(_i) (0x03720 + ((_i) * 4)) /* 8 of these 3720-372C*/ -#define IXGBE_RXDESCRDPTR(_i) (0x03730 + ((_i) * 4)) /* 8 of these 3730-373C*/ -#define IXGBE_TXDATAWRPTR(_i) (0x0C700 + ((_i) * 4)) /* 8 of these C700-C70C*/ -#define IXGBE_TXDESCWRPTR(_i) (0x0C710 + ((_i) * 4)) /* 8 of these C710-C71C*/ -#define IXGBE_TXDATARDPTR(_i) (0x0C720 + ((_i) * 4)) /* 8 of these C720-C72C*/ -#define IXGBE_TXDESCRDPTR(_i) (0x0C730 + ((_i) * 4)) /* 8 of these C730-C73C*/ -#define IXGBE_PCIEECCCTL 0x1106C -#define IXGBE_RXWRPTR(_i) (0x03100 + ((_i) * 4)) /* 8 of these 3100-310C*/ -#define IXGBE_RXUSED(_i) (0x03120 + ((_i) * 4)) /* 8 of these 3120-312C*/ -#define IXGBE_RXRDPTR(_i) (0x03140 + ((_i) * 4)) /* 8 of these 3140-314C*/ -#define IXGBE_RXRDWRPTR(_i) (0x03160 + ((_i) * 4)) /* 8 of these 3160-310C*/ -#define IXGBE_TXWRPTR(_i) (0x0C100 + ((_i) * 4)) /* 8 of these C100-C10C*/ -#define IXGBE_TXUSED(_i) (0x0C120 + ((_i) * 4)) /* 8 of these C120-C12C*/ -#define IXGBE_TXRDPTR(_i) (0x0C140 + ((_i) * 4)) /* 8 of these C140-C14C*/ -#define IXGBE_TXRDWRPTR(_i) (0x0C160 + ((_i) * 4)) /* 8 of these C160-C10C*/ -#define IXGBE_PCIEECCCTL0 0x11100 -#define IXGBE_PCIEECCCTL1 0x11104 -#define IXGBE_RXDBUECC 0x03F70 -#define IXGBE_TXDBUECC 0x0CF70 -#define IXGBE_RXDBUEST 0x03F74 -#define IXGBE_TXDBUEST 0x0CF74 -#define IXGBE_PBTXECC 0x0C300 -#define IXGBE_PBRXECC 0x03300 -#define IXGBE_GHECCR 0x110B0 - -/* MAC Registers */ -#define IXGBE_PCS1GCFIG 0x04200 -#define IXGBE_PCS1GLCTL 0x04208 -#define IXGBE_PCS1GLSTA 0x0420C -#define IXGBE_PCS1GDBG0 0x04210 -#define IXGBE_PCS1GDBG1 0x04214 -#define IXGBE_PCS1GANA 0x04218 -#define IXGBE_PCS1GANLP 0x0421C -#define IXGBE_PCS1GANNP 0x04220 -#define IXGBE_PCS1GANLPNP 0x04224 -#define IXGBE_HLREG0 0x04240 -#define IXGBE_HLREG1 0x04244 -#define IXGBE_PAP 0x04248 -#define IXGBE_MACA 0x0424C -#define IXGBE_APAE 0x04250 -#define IXGBE_ARD 0x04254 -#define IXGBE_AIS 0x04258 -#define IXGBE_MSCA 0x0425C -#define IXGBE_MSRWD 0x04260 -#define IXGBE_MLADD 0x04264 -#define IXGBE_MHADD 0x04268 -#define IXGBE_MAXFRS 0x04268 -#define IXGBE_TREG 0x0426C -#define IXGBE_PCSS1 0x04288 -#define IXGBE_PCSS2 0x0428C -#define IXGBE_XPCSS 0x04290 -#define IXGBE_MFLCN 0x04294 -#define IXGBE_SERDESC 0x04298 -#define IXGBE_MACS 0x0429C -#define IXGBE_AUTOC 0x042A0 -#define IXGBE_LINKS 0x042A4 -#define IXGBE_LINKS2 0x04324 -#define IXGBE_AUTOC2 0x042A8 -#define IXGBE_AUTOC3 0x042AC -#define IXGBE_ANLP1 0x042B0 -#define IXGBE_ANLP2 0x042B4 -#define IXGBE_MACC 0x04330 -#define IXGBE_ATLASCTL 0x04800 -#define IXGBE_MMNGC 0x042D0 -#define IXGBE_ANLPNP1 0x042D4 -#define IXGBE_ANLPNP2 0x042D8 -#define IXGBE_KRPCSFC 0x042E0 -#define IXGBE_KRPCSS 0x042E4 -#define IXGBE_FECS1 0x042E8 -#define IXGBE_FECS2 0x042EC -#define IXGBE_SMADARCTL 0x14F10 -#define IXGBE_MPVC 0x04318 -#define IXGBE_SGMIIC 0x04314 - -/* Statistics Registers */ -#define IXGBE_RXNFGPC 0x041B0 -#define IXGBE_RXNFGBCL 0x041B4 -#define IXGBE_RXNFGBCH 0x041B8 -#define IXGBE_RXDGPC 0x02F50 -#define IXGBE_RXDGBCL 0x02F54 -#define IXGBE_RXDGBCH 0x02F58 -#define IXGBE_RXDDGPC 0x02F5C -#define IXGBE_RXDDGBCL 0x02F60 -#define IXGBE_RXDDGBCH 0x02F64 -#define IXGBE_RXLPBKGPC 0x02F68 -#define IXGBE_RXLPBKGBCL 0x02F6C -#define IXGBE_RXLPBKGBCH 0x02F70 -#define IXGBE_RXDLPBKGPC 0x02F74 -#define IXGBE_RXDLPBKGBCL 0x02F78 -#define IXGBE_RXDLPBKGBCH 0x02F7C -#define IXGBE_TXDGPC 0x087A0 -#define IXGBE_TXDGBCL 0x087A4 -#define IXGBE_TXDGBCH 0x087A8 - -#define IXGBE_RXDSTATCTRL 0x02F40 - -/* Copper Pond 2 link timeout */ -#define IXGBE_VALIDATE_LINK_READY_TIMEOUT 50 - -/* Omer CORECTL */ -#define IXGBE_CORECTL 0x014F00 -/* BARCTRL */ -#define IXGBE_BARCTRL 0x110F4 -#define IXGBE_BARCTRL_FLSIZE 0x0700 -#define IXGBE_BARCTRL_FLSIZE_SHIFT 8 -#define IXGBE_BARCTRL_CSRSIZE 0x2000 - -/* RSCCTL Bit Masks */ -#define IXGBE_RSCCTL_RSCEN 0x01 -#define IXGBE_RSCCTL_MAXDESC_1 0x00 -#define IXGBE_RSCCTL_MAXDESC_4 0x04 -#define IXGBE_RSCCTL_MAXDESC_8 0x08 -#define IXGBE_RSCCTL_MAXDESC_16 0x0C - -/* RSCDBU Bit Masks */ -#define IXGBE_RSCDBU_RSCSMALDIS_MASK 0x0000007F -#define IXGBE_RSCDBU_RSCACKDIS 0x00000080 - -/* RDRXCTL Bit Masks */ -#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000 /* Rx Desc Min THLD Size */ -#define IXGBE_RDRXCTL_CRCSTRIP 0x00000002 /* CRC Strip */ -#define IXGBE_RDRXCTL_MVMEN 0x00000020 -#define IXGBE_RDRXCTL_DMAIDONE 0x00000008 /* DMA init cycle done */ -#define IXGBE_RDRXCTL_AGGDIS 0x00010000 /* Aggregation disable */ -#define IXGBE_RDRXCTL_RSCFRSTSIZE 0x003E0000 /* RSC First packet size */ -#define IXGBE_RDRXCTL_RSCLLIDIS 0x00800000 /* Disabl RSC compl on LLI */ -#define IXGBE_RDRXCTL_RSCACKC 0x02000000 /* must set 1 when RSC ena */ -#define IXGBE_RDRXCTL_FCOE_WRFIX 0x04000000 /* must set 1 when RSC ena */ - -/* RQTC Bit Masks and Shifts */ -#define IXGBE_RQTC_SHIFT_TC(_i) ((_i) * 4) -#define IXGBE_RQTC_TC0_MASK (0x7 << 0) -#define IXGBE_RQTC_TC1_MASK (0x7 << 4) -#define IXGBE_RQTC_TC2_MASK (0x7 << 8) -#define IXGBE_RQTC_TC3_MASK (0x7 << 12) -#define IXGBE_RQTC_TC4_MASK (0x7 << 16) -#define IXGBE_RQTC_TC5_MASK (0x7 << 20) -#define IXGBE_RQTC_TC6_MASK (0x7 << 24) -#define IXGBE_RQTC_TC7_MASK (0x7 << 28) - -/* PSRTYPE.RQPL Bit masks and shift */ -#define IXGBE_PSRTYPE_RQPL_MASK 0x7 -#define IXGBE_PSRTYPE_RQPL_SHIFT 29 - -/* CTRL Bit Masks */ -#define IXGBE_CTRL_GIO_DIS 0x00000004 /* Global IO Master Disable bit */ -#define IXGBE_CTRL_LNK_RST 0x00000008 /* Link Reset. Resets everything. */ -#define IXGBE_CTRL_RST 0x04000000 /* Reset (SW) */ -#define IXGBE_CTRL_RST_MASK (IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST) - -/* FACTPS */ -#define IXGBE_FACTPS_LFS 0x40000000 /* LAN Function Select */ - -/* MHADD Bit Masks */ -#define IXGBE_MHADD_MFS_MASK 0xFFFF0000 -#define IXGBE_MHADD_MFS_SHIFT 16 - -/* Extended Device Control */ -#define IXGBE_CTRL_EXT_PFRSTD 0x00004000 /* Physical Function Reset Done */ -#define IXGBE_CTRL_EXT_NS_DIS 0x00010000 /* No Snoop disable */ -#define IXGBE_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ -#define IXGBE_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */ - -/* Direct Cache Access (DCA) definitions */ -#define IXGBE_DCA_CTRL_DCA_ENABLE 0x00000000 /* DCA Enable */ -#define IXGBE_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */ - -#define IXGBE_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */ -#define IXGBE_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */ - -#define IXGBE_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */ -#define IXGBE_DCA_RXCTRL_CPUID_MASK_82599 0xFF000000 /* Rx CPUID Mask */ -#define IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599 24 /* Rx CPUID Shift */ -#define IXGBE_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* Rx Desc enable */ -#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* Rx Desc header ena */ -#define IXGBE_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* Rx Desc payload ena */ -#define IXGBE_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* Rx rd Desc Relax Order */ -#define IXGBE_DCA_RXCTRL_DATA_WRO_EN (1 << 13) /* Rx wr data Relax Order */ -#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN (1 << 15) /* Rx wr header RO */ - -#define IXGBE_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ -#define IXGBE_DCA_TXCTRL_CPUID_MASK_82599 0xFF000000 /* Tx CPUID Mask */ -#define IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599 24 /* Tx CPUID Shift */ -#define IXGBE_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ -#define IXGBE_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ -#define IXGBE_DCA_TXCTRL_DESC_WRO_EN (1 << 11) /* Tx Desc writeback RO bit */ -#define IXGBE_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ -#define IXGBE_DCA_MAX_QUEUES_82598 16 /* DCA regs only on 16 queues */ - -/* MSCA Bit Masks */ -#define IXGBE_MSCA_NP_ADDR_MASK 0x0000FFFF /* MDI Addr (new prot) */ -#define IXGBE_MSCA_NP_ADDR_SHIFT 0 -#define IXGBE_MSCA_DEV_TYPE_MASK 0x001F0000 /* Dev Type (new prot) */ -#define IXGBE_MSCA_DEV_TYPE_SHIFT 16 /* Register Address (old prot */ -#define IXGBE_MSCA_PHY_ADDR_MASK 0x03E00000 /* PHY Address mask */ -#define IXGBE_MSCA_PHY_ADDR_SHIFT 21 /* PHY Address shift*/ -#define IXGBE_MSCA_OP_CODE_MASK 0x0C000000 /* OP CODE mask */ -#define IXGBE_MSCA_OP_CODE_SHIFT 26 /* OP CODE shift */ -#define IXGBE_MSCA_ADDR_CYCLE 0x00000000 /* OP CODE 00 (addr cycle) */ -#define IXGBE_MSCA_WRITE 0x04000000 /* OP CODE 01 (wr) */ -#define IXGBE_MSCA_READ 0x0C000000 /* OP CODE 11 (rd) */ -#define IXGBE_MSCA_READ_AUTOINC 0x08000000 /* OP CODE 10 (rd auto inc)*/ -#define IXGBE_MSCA_ST_CODE_MASK 0x30000000 /* ST Code mask */ -#define IXGBE_MSCA_ST_CODE_SHIFT 28 /* ST Code shift */ -#define IXGBE_MSCA_NEW_PROTOCOL 0x00000000 /* ST CODE 00 (new prot) */ -#define IXGBE_MSCA_OLD_PROTOCOL 0x10000000 /* ST CODE 01 (old prot) */ -#define IXGBE_MSCA_MDI_COMMAND 0x40000000 /* Initiate MDI command */ -#define IXGBE_MSCA_MDI_IN_PROG_EN 0x80000000 /* MDI in progress ena */ - -/* MSRWD bit masks */ -#define IXGBE_MSRWD_WRITE_DATA_MASK 0x0000FFFF -#define IXGBE_MSRWD_WRITE_DATA_SHIFT 0 -#define IXGBE_MSRWD_READ_DATA_MASK 0xFFFF0000 -#define IXGBE_MSRWD_READ_DATA_SHIFT 16 - -/* Atlas registers */ -#define IXGBE_ATLAS_PDN_LPBK 0x24 -#define IXGBE_ATLAS_PDN_10G 0xB -#define IXGBE_ATLAS_PDN_1G 0xC -#define IXGBE_ATLAS_PDN_AN 0xD - -/* Atlas bit masks */ -#define IXGBE_ATLASCTL_WRITE_CMD 0x00010000 -#define IXGBE_ATLAS_PDN_TX_REG_EN 0x10 -#define IXGBE_ATLAS_PDN_TX_10G_QL_ALL 0xF0 -#define IXGBE_ATLAS_PDN_TX_1G_QL_ALL 0xF0 -#define IXGBE_ATLAS_PDN_TX_AN_QL_ALL 0xF0 - -/* Omer bit masks */ -#define IXGBE_CORECTL_WRITE_CMD 0x00010000 - -/* Device Type definitions for new protocol MDIO commands */ -#define IXGBE_MDIO_PMA_PMD_DEV_TYPE 0x1 -#define IXGBE_MDIO_PCS_DEV_TYPE 0x3 -#define IXGBE_MDIO_PHY_XS_DEV_TYPE 0x4 -#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE 0x7 -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE 0x1E /* Device 30 */ -#define IXGBE_TWINAX_DEV 1 - -#define IXGBE_MDIO_COMMAND_TIMEOUT 100 /* PHY Timeout for 1 GB mode */ - -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL 0x0 /* VS1 Ctrl Reg */ -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS 0x1 /* VS1 Status Reg */ -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS 0x0008 /* 1 = Link Up */ -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0-10G, 1-1G */ -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED 0x0018 -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED 0x0010 - -#define IXGBE_MDIO_AUTO_NEG_CONTROL 0x0 /* AUTO_NEG Control Reg */ -#define IXGBE_MDIO_AUTO_NEG_STATUS 0x1 /* AUTO_NEG Status Reg */ -#define IXGBE_MDIO_AUTO_NEG_ADVT 0x10 /* AUTO_NEG Advt Reg */ -#define IXGBE_MDIO_AUTO_NEG_LP 0x13 /* AUTO_NEG LP Status Reg */ -#define IXGBE_MDIO_PHY_XS_CONTROL 0x0 /* PHY_XS Control Reg */ -#define IXGBE_MDIO_PHY_XS_RESET 0x8000 /* PHY_XS Reset */ -#define IXGBE_MDIO_PHY_ID_HIGH 0x2 /* PHY ID High Reg*/ -#define IXGBE_MDIO_PHY_ID_LOW 0x3 /* PHY ID Low Reg*/ -#define IXGBE_MDIO_PHY_SPEED_ABILITY 0x4 /* Speed Ability Reg */ -#define IXGBE_MDIO_PHY_SPEED_10G 0x0001 /* 10G capable */ -#define IXGBE_MDIO_PHY_SPEED_1G 0x0010 /* 1G capable */ -#define IXGBE_MDIO_PHY_SPEED_100M 0x0020 /* 100M capable */ -#define IXGBE_MDIO_PHY_EXT_ABILITY 0xB /* Ext Ability Reg */ -#define IXGBE_MDIO_PHY_10GBASET_ABILITY 0x0004 /* 10GBaseT capable */ -#define IXGBE_MDIO_PHY_1000BASET_ABILITY 0x0020 /* 1000BaseT capable */ -#define IXGBE_MDIO_PHY_100BASETX_ABILITY 0x0080 /* 100BaseTX capable */ -#define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE 0x0800 /* Set low power mode */ - -#define IXGBE_MDIO_PMA_PMD_CONTROL_ADDR 0x0000 /* PMA/PMD Control Reg */ -#define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */ -#define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */ -#define IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT 0xC30C /* PHY_XS SDA/SCL Status Reg */ - -/* MII clause 22/28 definitions */ -#define IXGBE_MDIO_PHY_LOW_POWER_MODE 0x0800 - -#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG 0x20 /* 10G Control Reg */ -#define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */ -#define IXGBE_MII_AUTONEG_XNP_TX_REG 0x17 /* 1G XNP Transmit */ -#define IXGBE_MII_AUTONEG_ADVERTISE_REG 0x10 /* 100M Advertisement */ -#define IXGBE_MII_10GBASE_T_ADVERTISE 0x1000 /* full duplex, bit:12*/ -#define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX 0x4000 /* full duplex, bit:14*/ -#define IXGBE_MII_1GBASE_T_ADVERTISE 0x8000 /* full duplex, bit:15*/ -#define IXGBE_MII_100BASE_T_ADVERTISE 0x0100 /* full duplex, bit:8 */ -#define IXGBE_MII_100BASE_T_ADVERTISE_HALF 0x0080 /* half duplex, bit:7 */ -#define IXGBE_MII_RESTART 0x200 -#define IXGBE_MII_AUTONEG_COMPLETE 0x20 -#define IXGBE_MII_AUTONEG_LINK_UP 0x04 -#define IXGBE_MII_AUTONEG_REG 0x0 - -#define IXGBE_PHY_REVISION_MASK 0xFFFFFFF0 -#define IXGBE_MAX_PHY_ADDR 32 - -/* PHY IDs*/ -#define TN1010_PHY_ID 0x00A19410 -#define TNX_FW_REV 0xB -#define X540_PHY_ID 0x01540200 -#define AQ_FW_REV 0x20 -#define QT2022_PHY_ID 0x0043A400 -#define ATH_PHY_ID 0x03429050 - -/* PHY Types */ -#define IXGBE_M88E1145_E_PHY_ID 0x01410CD0 - -/* Special PHY Init Routine */ -#define IXGBE_PHY_INIT_OFFSET_NL 0x002B -#define IXGBE_PHY_INIT_END_NL 0xFFFF -#define IXGBE_CONTROL_MASK_NL 0xF000 -#define IXGBE_DATA_MASK_NL 0x0FFF -#define IXGBE_CONTROL_SHIFT_NL 12 -#define IXGBE_DELAY_NL 0 -#define IXGBE_DATA_NL 1 -#define IXGBE_CONTROL_NL 0x000F -#define IXGBE_CONTROL_EOL_NL 0x0FFF -#define IXGBE_CONTROL_SOL_NL 0x0000 - -/* General purpose Interrupt Enable */ -#define IXGBE_SDP0_GPIEN 0x00000001 /* SDP0 */ -#define IXGBE_SDP1_GPIEN 0x00000002 /* SDP1 */ -#define IXGBE_SDP2_GPIEN 0x00000004 /* SDP2 */ -#define IXGBE_GPIE_MSIX_MODE 0x00000010 /* MSI-X mode */ -#define IXGBE_GPIE_OCD 0x00000020 /* Other Clear Disable */ -#define IXGBE_GPIE_EIMEN 0x00000040 /* Immediate Interrupt Enable */ -#define IXGBE_GPIE_EIAME 0x40000000 -#define IXGBE_GPIE_PBA_SUPPORT 0x80000000 -#define IXGBE_GPIE_RSC_DELAY_SHIFT 11 -#define IXGBE_GPIE_VTMODE_MASK 0x0000C000 /* VT Mode Mask */ -#define IXGBE_GPIE_VTMODE_16 0x00004000 /* 16 VFs 8 queues per VF */ -#define IXGBE_GPIE_VTMODE_32 0x00008000 /* 32 VFs 4 queues per VF */ -#define IXGBE_GPIE_VTMODE_64 0x0000C000 /* 64 VFs 2 queues per VF */ - -/* Packet Buffer Initialization */ -#define IXGBE_MAX_PACKET_BUFFERS 8 - -#define IXGBE_TXPBSIZE_20KB 0x00005000 /* 20KB Packet Buffer */ -#define IXGBE_TXPBSIZE_40KB 0x0000A000 /* 40KB Packet Buffer */ -#define IXGBE_RXPBSIZE_48KB 0x0000C000 /* 48KB Packet Buffer */ -#define IXGBE_RXPBSIZE_64KB 0x00010000 /* 64KB Packet Buffer */ -#define IXGBE_RXPBSIZE_80KB 0x00014000 /* 80KB Packet Buffer */ -#define IXGBE_RXPBSIZE_128KB 0x00020000 /* 128KB Packet Buffer */ -#define IXGBE_RXPBSIZE_MAX 0x00080000 /* 512KB Packet Buffer */ -#define IXGBE_TXPBSIZE_MAX 0x00028000 /* 160KB Packet Buffer */ - -#define IXGBE_TXPKT_SIZE_MAX 0xA /* Max Tx Packet size */ -#define IXGBE_MAX_PB 8 - -/* Packet buffer allocation strategies */ -enum { - PBA_STRATEGY_EQUAL = 0, /* Distribute PB space equally */ -#define PBA_STRATEGY_EQUAL PBA_STRATEGY_EQUAL - PBA_STRATEGY_WEIGHTED = 1, /* Weight front half of TCs */ -#define PBA_STRATEGY_WEIGHTED PBA_STRATEGY_WEIGHTED -}; - -/* Transmit Flow Control status */ -#define IXGBE_TFCS_TXOFF 0x00000001 -#define IXGBE_TFCS_TXOFF0 0x00000100 -#define IXGBE_TFCS_TXOFF1 0x00000200 -#define IXGBE_TFCS_TXOFF2 0x00000400 -#define IXGBE_TFCS_TXOFF3 0x00000800 -#define IXGBE_TFCS_TXOFF4 0x00001000 -#define IXGBE_TFCS_TXOFF5 0x00002000 -#define IXGBE_TFCS_TXOFF6 0x00004000 -#define IXGBE_TFCS_TXOFF7 0x00008000 - -/* TCP Timer */ -#define IXGBE_TCPTIMER_KS 0x00000100 -#define IXGBE_TCPTIMER_COUNT_ENABLE 0x00000200 -#define IXGBE_TCPTIMER_COUNT_FINISH 0x00000400 -#define IXGBE_TCPTIMER_LOOP 0x00000800 -#define IXGBE_TCPTIMER_DURATION_MASK 0x000000FF - -/* HLREG0 Bit Masks */ -#define IXGBE_HLREG0_TXCRCEN 0x00000001 /* bit 0 */ -#define IXGBE_HLREG0_RXCRCSTRP 0x00000002 /* bit 1 */ -#define IXGBE_HLREG0_JUMBOEN 0x00000004 /* bit 2 */ -#define IXGBE_HLREG0_TXPADEN 0x00000400 /* bit 10 */ -#define IXGBE_HLREG0_TXPAUSEEN 0x00001000 /* bit 12 */ -#define IXGBE_HLREG0_RXPAUSEEN 0x00004000 /* bit 14 */ -#define IXGBE_HLREG0_LPBK 0x00008000 /* bit 15 */ -#define IXGBE_HLREG0_MDCSPD 0x00010000 /* bit 16 */ -#define IXGBE_HLREG0_CONTMDC 0x00020000 /* bit 17 */ -#define IXGBE_HLREG0_CTRLFLTR 0x00040000 /* bit 18 */ -#define IXGBE_HLREG0_PREPEND 0x00F00000 /* bits 20-23 */ -#define IXGBE_HLREG0_PRIPAUSEEN 0x01000000 /* bit 24 */ -#define IXGBE_HLREG0_RXPAUSERECDA 0x06000000 /* bits 25-26 */ -#define IXGBE_HLREG0_RXLNGTHERREN 0x08000000 /* bit 27 */ -#define IXGBE_HLREG0_RXPADSTRIPEN 0x10000000 /* bit 28 */ - -/* VMD_CTL bitmasks */ -#define IXGBE_VMD_CTL_VMDQ_EN 0x00000001 -#define IXGBE_VMD_CTL_VMDQ_FILTER 0x00000002 - -/* VT_CTL bitmasks */ -#define IXGBE_VT_CTL_DIS_DEFPL 0x20000000 /* disable default pool */ -#define IXGBE_VT_CTL_REPLEN 0x40000000 /* replication enabled */ -#define IXGBE_VT_CTL_VT_ENABLE 0x00000001 /* Enable VT Mode */ -#define IXGBE_VT_CTL_POOL_SHIFT 7 -#define IXGBE_VT_CTL_POOL_MASK (0x3F << IXGBE_VT_CTL_POOL_SHIFT) - -/* VMOLR bitmasks */ -#define IXGBE_VMOLR_AUPE 0x01000000 /* accept untagged packets */ -#define IXGBE_VMOLR_ROMPE 0x02000000 /* accept packets in MTA tbl */ -#define IXGBE_VMOLR_ROPE 0x04000000 /* accept packets in UC tbl */ -#define IXGBE_VMOLR_BAM 0x08000000 /* accept broadcast packets */ -#define IXGBE_VMOLR_MPE 0x10000000 /* multicast promiscuous */ - -/* VFRE bitmask */ -#define IXGBE_VFRE_ENABLE_ALL 0xFFFFFFFF - -#define IXGBE_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */ - -/* RDHMPN and TDHMPN bitmasks */ -#define IXGBE_RDHMPN_RDICADDR 0x007FF800 -#define IXGBE_RDHMPN_RDICRDREQ 0x00800000 -#define IXGBE_RDHMPN_RDICADDR_SHIFT 11 -#define IXGBE_TDHMPN_TDICADDR 0x003FF800 -#define IXGBE_TDHMPN_TDICRDREQ 0x00800000 -#define IXGBE_TDHMPN_TDICADDR_SHIFT 11 - -#define IXGBE_RDMAM_MEM_SEL_SHIFT 13 -#define IXGBE_RDMAM_DWORD_SHIFT 9 -#define IXGBE_RDMAM_DESC_COMP_FIFO 1 -#define IXGBE_RDMAM_DFC_CMD_FIFO 2 -#define IXGBE_RDMAM_RSC_HEADER_ADDR 3 -#define IXGBE_RDMAM_TCN_STATUS_RAM 4 -#define IXGBE_RDMAM_WB_COLL_FIFO 5 -#define IXGBE_RDMAM_QSC_CNT_RAM 6 -#define IXGBE_RDMAM_QSC_FCOE_RAM 7 -#define IXGBE_RDMAM_QSC_QUEUE_CNT 8 -#define IXGBE_RDMAM_QSC_QUEUE_RAM 0xA -#define IXGBE_RDMAM_QSC_RSC_RAM 0xB -#define IXGBE_RDMAM_DESC_COM_FIFO_RANGE 135 -#define IXGBE_RDMAM_DESC_COM_FIFO_COUNT 4 -#define IXGBE_RDMAM_DFC_CMD_FIFO_RANGE 48 -#define IXGBE_RDMAM_DFC_CMD_FIFO_COUNT 7 -#define IXGBE_RDMAM_RSC_HEADER_ADDR_RANGE 32 -#define IXGBE_RDMAM_RSC_HEADER_ADDR_COUNT 4 -#define IXGBE_RDMAM_TCN_STATUS_RAM_RANGE 256 -#define IXGBE_RDMAM_TCN_STATUS_RAM_COUNT 9 -#define IXGBE_RDMAM_WB_COLL_FIFO_RANGE 8 -#define IXGBE_RDMAM_WB_COLL_FIFO_COUNT 4 -#define IXGBE_RDMAM_QSC_CNT_RAM_RANGE 64 -#define IXGBE_RDMAM_QSC_CNT_RAM_COUNT 4 -#define IXGBE_RDMAM_QSC_FCOE_RAM_RANGE 512 -#define IXGBE_RDMAM_QSC_FCOE_RAM_COUNT 5 -#define IXGBE_RDMAM_QSC_QUEUE_CNT_RANGE 32 -#define IXGBE_RDMAM_QSC_QUEUE_CNT_COUNT 4 -#define IXGBE_RDMAM_QSC_QUEUE_RAM_RANGE 128 -#define IXGBE_RDMAM_QSC_QUEUE_RAM_COUNT 8 -#define IXGBE_RDMAM_QSC_RSC_RAM_RANGE 32 -#define IXGBE_RDMAM_QSC_RSC_RAM_COUNT 8 - -#define IXGBE_TXDESCIC_READY 0x80000000 - -/* Receive Checksum Control */ -#define IXGBE_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ -#define IXGBE_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ - -/* FCRTL Bit Masks */ -#define IXGBE_FCRTL_XONE 0x80000000 /* XON enable */ -#define IXGBE_FCRTH_FCEN 0x80000000 /* Packet buffer fc enable */ - -/* PAP bit masks*/ -#define IXGBE_PAP_TXPAUSECNT_MASK 0x0000FFFF /* Pause counter mask */ - -/* RMCS Bit Masks */ -#define IXGBE_RMCS_RRM 0x00000002 /* Rx Recycle Mode enable */ -/* Receive Arbitration Control: 0 Round Robin, 1 DFP */ -#define IXGBE_RMCS_RAC 0x00000004 -/* Deficit Fixed Prio ena */ -#define IXGBE_RMCS_DFP IXGBE_RMCS_RAC -#define IXGBE_RMCS_TFCE_802_3X 0x00000008 /* Tx Priority FC ena */ -#define IXGBE_RMCS_TFCE_PRIORITY 0x00000010 /* Tx Priority FC ena */ -#define IXGBE_RMCS_ARBDIS 0x00000040 /* Arbitration disable bit */ - -/* FCCFG Bit Masks */ -#define IXGBE_FCCFG_TFCE_802_3X 0x00000008 /* Tx link FC enable */ -#define IXGBE_FCCFG_TFCE_PRIORITY 0x00000010 /* Tx priority FC enable */ - -/* Interrupt register bitmasks */ - -/* Extended Interrupt Cause Read */ -#define IXGBE_EICR_RTX_QUEUE 0x0000FFFF /* RTx Queue Interrupt */ -#define IXGBE_EICR_FLOW_DIR 0x00010000 /* FDir Exception */ -#define IXGBE_EICR_RX_MISS 0x00020000 /* Packet Buffer Overrun */ -#define IXGBE_EICR_PCI 0x00040000 /* PCI Exception */ -#define IXGBE_EICR_MAILBOX 0x00080000 /* VF to PF Mailbox Interrupt */ -#define IXGBE_EICR_LSC 0x00100000 /* Link Status Change */ -#define IXGBE_EICR_LINKSEC 0x00200000 /* PN Threshold */ -#define IXGBE_EICR_MNG 0x00400000 /* Manageability Event Interrupt */ -#define IXGBE_EICR_TS 0x00800000 /* Thermal Sensor Event */ -#define IXGBE_EICR_GPI_SDP0 0x01000000 /* Gen Purpose Interrupt on SDP0 */ -#define IXGBE_EICR_GPI_SDP1 0x02000000 /* Gen Purpose Interrupt on SDP1 */ -#define IXGBE_EICR_GPI_SDP2 0x04000000 /* Gen Purpose Interrupt on SDP2 */ -#define IXGBE_EICR_ECC 0x10000000 /* ECC Error */ -#define IXGBE_EICR_PBUR 0x10000000 /* Packet Buffer Handler Error */ -#define IXGBE_EICR_DHER 0x20000000 /* Descriptor Handler Error */ -#define IXGBE_EICR_TCP_TIMER 0x40000000 /* TCP Timer */ -#define IXGBE_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ - -/* Extended Interrupt Cause Set */ -#define IXGBE_EICS_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */ -#define IXGBE_EICS_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */ -#define IXGBE_EICS_RX_MISS IXGBE_EICR_RX_MISS /* Pkt Buffer Overrun */ -#define IXGBE_EICS_PCI IXGBE_EICR_PCI /* PCI Exception */ -#define IXGBE_EICS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ -#define IXGBE_EICS_LSC IXGBE_EICR_LSC /* Link Status Change */ -#define IXGBE_EICS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ -#define IXGBE_EICS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ -#define IXGBE_EICS_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ -#define IXGBE_EICS_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ -#define IXGBE_EICS_ECC IXGBE_EICR_ECC /* ECC Error */ -#define IXGBE_EICS_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */ -#define IXGBE_EICS_DHER IXGBE_EICR_DHER /* Desc Handler Error */ -#define IXGBE_EICS_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */ -#define IXGBE_EICS_OTHER IXGBE_EICR_OTHER /* INT Cause Active */ - -/* Extended Interrupt Mask Set */ -#define IXGBE_EIMS_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */ -#define IXGBE_EIMS_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */ -#define IXGBE_EIMS_RX_MISS IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */ -#define IXGBE_EIMS_PCI IXGBE_EICR_PCI /* PCI Exception */ -#define IXGBE_EIMS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ -#define IXGBE_EIMS_LSC IXGBE_EICR_LSC /* Link Status Change */ -#define IXGBE_EIMS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ -#define IXGBE_EIMS_TS IXGBE_EICR_TS /* Thermal Sensor Event */ -#define IXGBE_EIMS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ -#define IXGBE_EIMS_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ -#define IXGBE_EIMS_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ -#define IXGBE_EIMS_ECC IXGBE_EICR_ECC /* ECC Error */ -#define IXGBE_EIMS_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */ -#define IXGBE_EIMS_DHER IXGBE_EICR_DHER /* Descr Handler Error */ -#define IXGBE_EIMS_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */ -#define IXGBE_EIMS_OTHER IXGBE_EICR_OTHER /* INT Cause Active */ - -/* Extended Interrupt Mask Clear */ -#define IXGBE_EIMC_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */ -#define IXGBE_EIMC_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */ -#define IXGBE_EIMC_RX_MISS IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */ -#define IXGBE_EIMC_PCI IXGBE_EICR_PCI /* PCI Exception */ -#define IXGBE_EIMC_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ -#define IXGBE_EIMC_LSC IXGBE_EICR_LSC /* Link Status Change */ -#define IXGBE_EIMC_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ -#define IXGBE_EIMC_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ -#define IXGBE_EIMC_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ -#define IXGBE_EIMC_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ -#define IXGBE_EIMC_ECC IXGBE_EICR_ECC /* ECC Error */ -#define IXGBE_EIMC_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */ -#define IXGBE_EIMC_DHER IXGBE_EICR_DHER /* Desc Handler Err */ -#define IXGBE_EIMC_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */ -#define IXGBE_EIMC_OTHER IXGBE_EICR_OTHER /* INT Cause Active */ - -#define IXGBE_EIMS_ENABLE_MASK ( \ - IXGBE_EIMS_RTX_QUEUE | \ - IXGBE_EIMS_LSC | \ - IXGBE_EIMS_TCP_TIMER | \ - IXGBE_EIMS_OTHER) - -/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */ -#define IXGBE_IMIR_PORT_IM_EN 0x00010000 /* TCP port enable */ -#define IXGBE_IMIR_PORT_BP 0x00020000 /* TCP port check bypass */ -#define IXGBE_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ -#define IXGBE_IMIREXT_CTRL_URG 0x00002000 /* Check URG bit in header */ -#define IXGBE_IMIREXT_CTRL_ACK 0x00004000 /* Check ACK bit in header */ -#define IXGBE_IMIREXT_CTRL_PSH 0x00008000 /* Check PSH bit in header */ -#define IXGBE_IMIREXT_CTRL_RST 0x00010000 /* Check RST bit in header */ -#define IXGBE_IMIREXT_CTRL_SYN 0x00020000 /* Check SYN bit in header */ -#define IXGBE_IMIREXT_CTRL_FIN 0x00040000 /* Check FIN bit in header */ -#define IXGBE_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of control bits */ -#define IXGBE_IMIR_SIZE_BP_82599 0x00001000 /* Packet size bypass */ -#define IXGBE_IMIR_CTRL_URG_82599 0x00002000 /* Check URG bit in header */ -#define IXGBE_IMIR_CTRL_ACK_82599 0x00004000 /* Check ACK bit in header */ -#define IXGBE_IMIR_CTRL_PSH_82599 0x00008000 /* Check PSH bit in header */ -#define IXGBE_IMIR_CTRL_RST_82599 0x00010000 /* Check RST bit in header */ -#define IXGBE_IMIR_CTRL_SYN_82599 0x00020000 /* Check SYN bit in header */ -#define IXGBE_IMIR_CTRL_FIN_82599 0x00040000 /* Check FIN bit in header */ -#define IXGBE_IMIR_CTRL_BP_82599 0x00080000 /* Bypass chk of ctrl bits */ -#define IXGBE_IMIR_LLI_EN_82599 0x00100000 /* Enables low latency Int */ -#define IXGBE_IMIR_RX_QUEUE_MASK_82599 0x0000007F /* Rx Queue Mask */ -#define IXGBE_IMIR_RX_QUEUE_SHIFT_82599 21 /* Rx Queue Shift */ -#define IXGBE_IMIRVP_PRIORITY_MASK 0x00000007 /* VLAN priority mask */ -#define IXGBE_IMIRVP_PRIORITY_EN 0x00000008 /* VLAN priority enable */ - -#define IXGBE_MAX_FTQF_FILTERS 128 -#define IXGBE_FTQF_PROTOCOL_MASK 0x00000003 -#define IXGBE_FTQF_PROTOCOL_TCP 0x00000000 -#define IXGBE_FTQF_PROTOCOL_UDP 0x00000001 -#define IXGBE_FTQF_PROTOCOL_SCTP 2 -#define IXGBE_FTQF_PRIORITY_MASK 0x00000007 -#define IXGBE_FTQF_PRIORITY_SHIFT 2 -#define IXGBE_FTQF_POOL_MASK 0x0000003F -#define IXGBE_FTQF_POOL_SHIFT 8 -#define IXGBE_FTQF_5TUPLE_MASK_MASK 0x0000001F -#define IXGBE_FTQF_5TUPLE_MASK_SHIFT 25 -#define IXGBE_FTQF_SOURCE_ADDR_MASK 0x1E -#define IXGBE_FTQF_DEST_ADDR_MASK 0x1D -#define IXGBE_FTQF_SOURCE_PORT_MASK 0x1B -#define IXGBE_FTQF_DEST_PORT_MASK 0x17 -#define IXGBE_FTQF_PROTOCOL_COMP_MASK 0x0F -#define IXGBE_FTQF_POOL_MASK_EN 0x40000000 -#define IXGBE_FTQF_QUEUE_ENABLE 0x80000000 - -/* Interrupt clear mask */ -#define IXGBE_IRQ_CLEAR_MASK 0xFFFFFFFF - -/* Interrupt Vector Allocation Registers */ -#define IXGBE_IVAR_REG_NUM 25 -#define IXGBE_IVAR_REG_NUM_82599 64 -#define IXGBE_IVAR_TXRX_ENTRY 96 -#define IXGBE_IVAR_RX_ENTRY 64 -#define IXGBE_IVAR_RX_QUEUE(_i) (0 + (_i)) -#define IXGBE_IVAR_TX_QUEUE(_i) (64 + (_i)) -#define IXGBE_IVAR_TX_ENTRY 32 - -#define IXGBE_IVAR_TCP_TIMER_INDEX 96 /* 0 based index */ -#define IXGBE_IVAR_OTHER_CAUSES_INDEX 97 /* 0 based index */ - -#define IXGBE_MSIX_VECTOR(_i) (0 + (_i)) - -#define IXGBE_IVAR_ALLOC_VAL 0x80 /* Interrupt Allocation valid */ - -/* ETYPE Queue Filter/Select Bit Masks */ -#define IXGBE_MAX_ETQF_FILTERS 8 -#define IXGBE_ETQF_FCOE 0x08000000 /* bit 27 */ -#define IXGBE_ETQF_BCN 0x10000000 /* bit 28 */ -#define IXGBE_ETQF_1588 0x40000000 /* bit 30 */ -#define IXGBE_ETQF_FILTER_EN 0x80000000 /* bit 31 */ -#define IXGBE_ETQF_POOL_ENABLE (1 << 26) /* bit 26 */ - -#define IXGBE_ETQS_RX_QUEUE 0x007F0000 /* bits 22:16 */ -#define IXGBE_ETQS_RX_QUEUE_SHIFT 16 -#define IXGBE_ETQS_LLI 0x20000000 /* bit 29 */ -#define IXGBE_ETQS_QUEUE_EN 0x80000000 /* bit 31 */ - -/* - * ETQF filter list: one static filter per filter consumer. This is - * to avoid filter collisions later. Add new filters - * here!! - * - * Current filters: - * EAPOL 802.1x (0x888e): Filter 0 - * FCoE (0x8906): Filter 2 - * 1588 (0x88f7): Filter 3 - * FIP (0x8914): Filter 4 - */ -#define IXGBE_ETQF_FILTER_EAPOL 0 -#define IXGBE_ETQF_FILTER_FCOE 2 -#define IXGBE_ETQF_FILTER_1588 3 -#define IXGBE_ETQF_FILTER_FIP 4 -/* VLAN Control Bit Masks */ -#define IXGBE_VLNCTRL_VET 0x0000FFFF /* bits 0-15 */ -#define IXGBE_VLNCTRL_CFI 0x10000000 /* bit 28 */ -#define IXGBE_VLNCTRL_CFIEN 0x20000000 /* bit 29 */ -#define IXGBE_VLNCTRL_VFE 0x40000000 /* bit 30 */ -#define IXGBE_VLNCTRL_VME 0x80000000 /* bit 31 */ - -/* VLAN pool filtering masks */ -#define IXGBE_VLVF_VIEN 0x80000000 /* filter is valid */ -#define IXGBE_VLVF_ENTRIES 64 -#define IXGBE_VLVF_VLANID_MASK 0x00000FFF -/* Per VF Port VLAN insertion rules */ -#define IXGBE_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */ -#define IXGBE_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */ - -#define IXGBE_ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.1q protocol */ - -/* STATUS Bit Masks */ -#define IXGBE_STATUS_LAN_ID 0x0000000C /* LAN ID */ -#define IXGBE_STATUS_LAN_ID_SHIFT 2 /* LAN ID Shift*/ -#define IXGBE_STATUS_GIO 0x00080000 /* GIO Master Ena Status */ - -#define IXGBE_STATUS_LAN_ID_0 0x00000000 /* LAN ID 0 */ -#define IXGBE_STATUS_LAN_ID_1 0x00000004 /* LAN ID 1 */ - -/* ESDP Bit Masks */ -#define IXGBE_ESDP_SDP0 0x00000001 /* SDP0 Data Value */ -#define IXGBE_ESDP_SDP1 0x00000002 /* SDP1 Data Value */ -#define IXGBE_ESDP_SDP2 0x00000004 /* SDP2 Data Value */ -#define IXGBE_ESDP_SDP3 0x00000008 /* SDP3 Data Value */ -#define IXGBE_ESDP_SDP4 0x00000010 /* SDP4 Data Value */ -#define IXGBE_ESDP_SDP5 0x00000020 /* SDP5 Data Value */ -#define IXGBE_ESDP_SDP6 0x00000040 /* SDP6 Data Value */ -#define IXGBE_ESDP_SDP0_DIR 0x00000100 /* SDP0 IO direction */ -#define IXGBE_ESDP_SDP1_DIR 0x00000200 /* SDP1 IO direction */ -#define IXGBE_ESDP_SDP4_DIR 0x00001000 /* SDP4 IO direction */ -#define IXGBE_ESDP_SDP5_DIR 0x00002000 /* SDP5 IO direction */ -#define IXGBE_ESDP_SDP0_NATIVE 0x00010000 /* SDP0 IO mode */ -#define IXGBE_ESDP_SDP1_NATIVE 0x00020000 /* SDP1 IO mode */ - - -/* LEDCTL Bit Masks */ -#define IXGBE_LED_IVRT_BASE 0x00000040 -#define IXGBE_LED_BLINK_BASE 0x00000080 -#define IXGBE_LED_MODE_MASK_BASE 0x0000000F -#define IXGBE_LED_OFFSET(_base, _i) (_base << (8 * (_i))) -#define IXGBE_LED_MODE_SHIFT(_i) (8*(_i)) -#define IXGBE_LED_IVRT(_i) IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i) -#define IXGBE_LED_BLINK(_i) IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i) -#define IXGBE_LED_MODE_MASK(_i) IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i) - -/* LED modes */ -#define IXGBE_LED_LINK_UP 0x0 -#define IXGBE_LED_LINK_10G 0x1 -#define IXGBE_LED_MAC 0x2 -#define IXGBE_LED_FILTER 0x3 -#define IXGBE_LED_LINK_ACTIVE 0x4 -#define IXGBE_LED_LINK_1G 0x5 -#define IXGBE_LED_ON 0xE -#define IXGBE_LED_OFF 0xF - -/* AUTOC Bit Masks */ -#define IXGBE_AUTOC_KX4_KX_SUPP_MASK 0xC0000000 -#define IXGBE_AUTOC_KX4_SUPP 0x80000000 -#define IXGBE_AUTOC_KX_SUPP 0x40000000 -#define IXGBE_AUTOC_PAUSE 0x30000000 -#define IXGBE_AUTOC_ASM_PAUSE 0x20000000 -#define IXGBE_AUTOC_SYM_PAUSE 0x10000000 -#define IXGBE_AUTOC_RF 0x08000000 -#define IXGBE_AUTOC_PD_TMR 0x06000000 -#define IXGBE_AUTOC_AN_RX_LOOSE 0x01000000 -#define IXGBE_AUTOC_AN_RX_DRIFT 0x00800000 -#define IXGBE_AUTOC_AN_RX_ALIGN 0x007C0000 -#define IXGBE_AUTOC_FECA 0x00040000 -#define IXGBE_AUTOC_FECR 0x00020000 -#define IXGBE_AUTOC_KR_SUPP 0x00010000 -#define IXGBE_AUTOC_AN_RESTART 0x00001000 -#define IXGBE_AUTOC_FLU 0x00000001 -#define IXGBE_AUTOC_LMS_SHIFT 13 -#define IXGBE_AUTOC_LMS_10G_SERIAL (0x3 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_KX4_KX_KR (0x4 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_SGMII_1G_100M (0x5 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN (0x6 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII (0x7 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_MASK (0x7 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_1G_LINK_NO_AN (0x0 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_10G_LINK_NO_AN (0x1 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_1G_AN (0x2 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_KX4_AN (0x4 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_KX4_AN_1G_AN (0x6 << IXGBE_AUTOC_LMS_SHIFT) -#define IXGBE_AUTOC_LMS_ATTACH_TYPE (0x7 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) - -#define IXGBE_AUTOC_1G_PMA_PMD_MASK 0x00000200 -#define IXGBE_AUTOC_1G_PMA_PMD_SHIFT 9 -#define IXGBE_AUTOC_10G_PMA_PMD_MASK 0x00000180 -#define IXGBE_AUTOC_10G_PMA_PMD_SHIFT 7 -#define IXGBE_AUTOC_10G_XAUI (0x0 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_10G_KX4 (0x1 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_10G_CX4 (0x2 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_1G_BX (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_1G_KX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_1G_SFI (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) -#define IXGBE_AUTOC_1G_KX_BX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) - -#define IXGBE_AUTOC2_UPPER_MASK 0xFFFF0000 -#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK 0x00030000 -#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT 16 -#define IXGBE_AUTOC2_10G_KR (0x0 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) -#define IXGBE_AUTOC2_10G_XFI (0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) -#define IXGBE_AUTOC2_10G_SFI (0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) - -#define IXGBE_MACC_FLU 0x00000001 -#define IXGBE_MACC_FSV_10G 0x00030000 -#define IXGBE_MACC_FS 0x00040000 -#define IXGBE_MAC_RX2TX_LPBK 0x00000002 - -/* LINKS Bit Masks */ -#define IXGBE_LINKS_KX_AN_COMP 0x80000000 -#define IXGBE_LINKS_UP 0x40000000 -#define IXGBE_LINKS_SPEED 0x20000000 -#define IXGBE_LINKS_MODE 0x18000000 -#define IXGBE_LINKS_RX_MODE 0x06000000 -#define IXGBE_LINKS_TX_MODE 0x01800000 -#define IXGBE_LINKS_XGXS_EN 0x00400000 -#define IXGBE_LINKS_SGMII_EN 0x02000000 -#define IXGBE_LINKS_PCS_1G_EN 0x00200000 -#define IXGBE_LINKS_1G_AN_EN 0x00100000 -#define IXGBE_LINKS_KX_AN_IDLE 0x00080000 -#define IXGBE_LINKS_1G_SYNC 0x00040000 -#define IXGBE_LINKS_10G_ALIGN 0x00020000 -#define IXGBE_LINKS_10G_LANE_SYNC 0x00017000 -#define IXGBE_LINKS_TL_FAULT 0x00001000 -#define IXGBE_LINKS_SIGNAL 0x00000F00 - -#define IXGBE_LINKS_SPEED_82599 0x30000000 -#define IXGBE_LINKS_SPEED_10G_82599 0x30000000 -#define IXGBE_LINKS_SPEED_1G_82599 0x20000000 -#define IXGBE_LINKS_SPEED_100_82599 0x10000000 -#define IXGBE_LINK_UP_TIME 90 /* 9.0 Seconds */ -#define IXGBE_AUTO_NEG_TIME 45 /* 4.5 Seconds */ - -#define IXGBE_LINKS2_AN_SUPPORTED 0x00000040 - -/* PCS1GLSTA Bit Masks */ -#define IXGBE_PCS1GLSTA_LINK_OK 1 -#define IXGBE_PCS1GLSTA_SYNK_OK 0x10 -#define IXGBE_PCS1GLSTA_AN_COMPLETE 0x10000 -#define IXGBE_PCS1GLSTA_AN_PAGE_RX 0x20000 -#define IXGBE_PCS1GLSTA_AN_TIMED_OUT 0x40000 -#define IXGBE_PCS1GLSTA_AN_REMOTE_FAULT 0x80000 -#define IXGBE_PCS1GLSTA_AN_ERROR_RWS 0x100000 - -#define IXGBE_PCS1GANA_SYM_PAUSE 0x80 -#define IXGBE_PCS1GANA_ASM_PAUSE 0x100 - -/* PCS1GLCTL Bit Masks */ -#define IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN 0x00040000 /* PCS 1G autoneg to en */ -#define IXGBE_PCS1GLCTL_FLV_LINK_UP 1 -#define IXGBE_PCS1GLCTL_FORCE_LINK 0x20 -#define IXGBE_PCS1GLCTL_LOW_LINK_LATCH 0x40 -#define IXGBE_PCS1GLCTL_AN_ENABLE 0x10000 -#define IXGBE_PCS1GLCTL_AN_RESTART 0x20000 - -/* ANLP1 Bit Masks */ -#define IXGBE_ANLP1_PAUSE 0x0C00 -#define IXGBE_ANLP1_SYM_PAUSE 0x0400 -#define IXGBE_ANLP1_ASM_PAUSE 0x0800 -#define IXGBE_ANLP1_AN_STATE_MASK 0x000f0000 - -/* SW Semaphore Register bitmasks */ -#define IXGBE_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ -#define IXGBE_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ -#define IXGBE_SWSM_WMNG 0x00000004 /* Wake MNG Clock */ -#define IXGBE_SWFW_REGSMP 0x80000000 /* Register Semaphore bit 31 */ - -/* SW_FW_SYNC/GSSR definitions */ -#define IXGBE_GSSR_EEP_SM 0x0001 -#define IXGBE_GSSR_PHY0_SM 0x0002 -#define IXGBE_GSSR_PHY1_SM 0x0004 -#define IXGBE_GSSR_MAC_CSR_SM 0x0008 -#define IXGBE_GSSR_FLASH_SM 0x0010 -#define IXGBE_GSSR_SW_MNG_SM 0x0400 - -/* FW Status register bitmask */ -#define IXGBE_FWSTS_FWRI 0x00000200 /* Firmware Reset Indication */ - -/* EEC Register */ -#define IXGBE_EEC_SK 0x00000001 /* EEPROM Clock */ -#define IXGBE_EEC_CS 0x00000002 /* EEPROM Chip Select */ -#define IXGBE_EEC_DI 0x00000004 /* EEPROM Data In */ -#define IXGBE_EEC_DO 0x00000008 /* EEPROM Data Out */ -#define IXGBE_EEC_FWE_MASK 0x00000030 /* FLASH Write Enable */ -#define IXGBE_EEC_FWE_DIS 0x00000010 /* Disable FLASH writes */ -#define IXGBE_EEC_FWE_EN 0x00000020 /* Enable FLASH writes */ -#define IXGBE_EEC_FWE_SHIFT 4 -#define IXGBE_EEC_REQ 0x00000040 /* EEPROM Access Request */ -#define IXGBE_EEC_GNT 0x00000080 /* EEPROM Access Grant */ -#define IXGBE_EEC_PRES 0x00000100 /* EEPROM Present */ -#define IXGBE_EEC_ARD 0x00000200 /* EEPROM Auto Read Done */ -#define IXGBE_EEC_FLUP 0x00800000 /* Flash update command */ -#define IXGBE_EEC_SEC1VAL 0x02000000 /* Sector 1 Valid */ -#define IXGBE_EEC_FLUDONE 0x04000000 /* Flash update done */ -/* EEPROM Addressing bits based on type (0-small, 1-large) */ -#define IXGBE_EEC_ADDR_SIZE 0x00000400 -#define IXGBE_EEC_SIZE 0x00007800 /* EEPROM Size */ -#define IXGBE_EERD_MAX_ADDR 0x00003FFF /* EERD alows 14 bits for addr. */ - -#define IXGBE_EEC_SIZE_SHIFT 11 -#define IXGBE_EEPROM_WORD_SIZE_SHIFT 6 -#define IXGBE_EEPROM_OPCODE_BITS 8 - -/* Part Number String Length */ -#define IXGBE_PBANUM_LENGTH 11 - -/* Checksum and EEPROM pointers */ -#define IXGBE_PBANUM_PTR_GUARD 0xFAFA -#define IXGBE_EEPROM_CHECKSUM 0x3F -#define IXGBE_EEPROM_SUM 0xBABA -#define IXGBE_PCIE_ANALOG_PTR 0x03 -#define IXGBE_ATLAS0_CONFIG_PTR 0x04 -#define IXGBE_PHY_PTR 0x04 -#define IXGBE_ATLAS1_CONFIG_PTR 0x05 -#define IXGBE_OPTION_ROM_PTR 0x05 -#define IXGBE_PCIE_GENERAL_PTR 0x06 -#define IXGBE_PCIE_CONFIG0_PTR 0x07 -#define IXGBE_PCIE_CONFIG1_PTR 0x08 -#define IXGBE_CORE0_PTR 0x09 -#define IXGBE_CORE1_PTR 0x0A -#define IXGBE_MAC0_PTR 0x0B -#define IXGBE_MAC1_PTR 0x0C -#define IXGBE_CSR0_CONFIG_PTR 0x0D -#define IXGBE_CSR1_CONFIG_PTR 0x0E -#define IXGBE_FW_PTR 0x0F -#define IXGBE_PBANUM0_PTR 0x15 -#define IXGBE_PBANUM1_PTR 0x16 -#define IXGBE_ALT_MAC_ADDR_PTR 0x37 -#define IXGBE_FREE_SPACE_PTR 0X3E - -/* External Thermal Sensor Config */ -#define IXGBE_ETS_CFG 0x26 -#define IXGBE_ETS_LTHRES_DELTA_MASK 0x07C0 -#define IXGBE_ETS_LTHRES_DELTA_SHIFT 6 -#define IXGBE_ETS_TYPE_MASK 0x0038 -#define IXGBE_ETS_TYPE_SHIFT 3 -#define IXGBE_ETS_TYPE_EMC 0x000 -#define IXGBE_ETS_NUM_SENSORS_MASK 0x0007 -#define IXGBE_ETS_DATA_LOC_MASK 0x3C00 -#define IXGBE_ETS_DATA_LOC_SHIFT 10 -#define IXGBE_ETS_DATA_INDEX_MASK 0x0300 -#define IXGBE_ETS_DATA_INDEX_SHIFT 8 -#define IXGBE_ETS_DATA_HTHRESH_MASK 0x00FF - -#define IXGBE_SAN_MAC_ADDR_PTR 0x28 -#define IXGBE_DEVICE_CAPS 0x2C -#define IXGBE_SERIAL_NUMBER_MAC_ADDR 0x11 -#define IXGBE_PCIE_MSIX_82599_CAPS 0x72 -#define IXGBE_MAX_MSIX_VECTORS_82599 0x40 -#define IXGBE_PCIE_MSIX_82598_CAPS 0x62 -#define IXGBE_MAX_MSIX_VECTORS_82598 0x13 - -/* MSI-X capability fields masks */ -#define IXGBE_PCIE_MSIX_TBL_SZ_MASK 0x7FF - -/* Legacy EEPROM word offsets */ -#define IXGBE_ISCSI_BOOT_CAPS 0x0033 -#define IXGBE_ISCSI_SETUP_PORT_0 0x0030 -#define IXGBE_ISCSI_SETUP_PORT_1 0x0034 - -/* EEPROM Commands - SPI */ -#define IXGBE_EEPROM_MAX_RETRY_SPI 5000 /* Max wait 5ms for RDY signal */ -#define IXGBE_EEPROM_STATUS_RDY_SPI 0x01 -#define IXGBE_EEPROM_READ_OPCODE_SPI 0x03 /* EEPROM read opcode */ -#define IXGBE_EEPROM_WRITE_OPCODE_SPI 0x02 /* EEPROM write opcode */ -#define IXGBE_EEPROM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = addr bit-8 */ -#define IXGBE_EEPROM_WREN_OPCODE_SPI 0x06 /* EEPROM set Write Ena latch */ -/* EEPROM reset Write Enable latch */ -#define IXGBE_EEPROM_WRDI_OPCODE_SPI 0x04 -#define IXGBE_EEPROM_RDSR_OPCODE_SPI 0x05 /* EEPROM read Status reg */ -#define IXGBE_EEPROM_WRSR_OPCODE_SPI 0x01 /* EEPROM write Status reg */ -#define IXGBE_EEPROM_ERASE4K_OPCODE_SPI 0x20 /* EEPROM ERASE 4KB */ -#define IXGBE_EEPROM_ERASE64K_OPCODE_SPI 0xD8 /* EEPROM ERASE 64KB */ -#define IXGBE_EEPROM_ERASE256_OPCODE_SPI 0xDB /* EEPROM ERASE 256B */ - -/* EEPROM Read Register */ -#define IXGBE_EEPROM_RW_REG_DATA 16 /* data offset in EEPROM read reg */ -#define IXGBE_EEPROM_RW_REG_DONE 2 /* Offset to READ done bit */ -#define IXGBE_EEPROM_RW_REG_START 1 /* First bit to start operation */ -#define IXGBE_EEPROM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ -#define IXGBE_NVM_POLL_WRITE 1 /* Flag for polling for wr complete */ -#define IXGBE_NVM_POLL_READ 0 /* Flag for polling for rd complete */ - -#define IXGBE_ETH_LENGTH_OF_ADDRESS 6 - -#define IXGBE_EEPROM_PAGE_SIZE_MAX 128 -#define IXGBE_EEPROM_RD_BUFFER_MAX_COUNT 512 /* words rd in burst */ -#define IXGBE_EEPROM_WR_BUFFER_MAX_COUNT 256 /* words wr in burst */ - -#ifndef IXGBE_EEPROM_GRANT_ATTEMPTS -#define IXGBE_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM attempts to gain grant */ -#endif - -#ifndef IXGBE_EERD_EEWR_ATTEMPTS -/* Number of 5 microseconds we wait for EERD read and - * EERW write to complete */ -#define IXGBE_EERD_EEWR_ATTEMPTS 100000 -#endif - -#ifndef IXGBE_FLUDONE_ATTEMPTS -/* # attempts we wait for flush update to complete */ -#define IXGBE_FLUDONE_ATTEMPTS 20000 -#endif - -#define IXGBE_PCIE_CTRL2 0x5 /* PCIe Control 2 Offset */ -#define IXGBE_PCIE_CTRL2_DUMMY_ENABLE 0x8 /* Dummy Function Enable */ -#define IXGBE_PCIE_CTRL2_LAN_DISABLE 0x2 /* LAN PCI Disable */ -#define IXGBE_PCIE_CTRL2_DISABLE_SELECT 0x1 /* LAN Disable Select */ - -#define IXGBE_SAN_MAC_ADDR_PORT0_OFFSET 0x0 -#define IXGBE_SAN_MAC_ADDR_PORT1_OFFSET 0x3 -#define IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP 0x1 -#define IXGBE_DEVICE_CAPS_FCOE_OFFLOADS 0x2 -#define IXGBE_FW_LESM_PARAMETERS_PTR 0x2 -#define IXGBE_FW_LESM_STATE_1 0x1 -#define IXGBE_FW_LESM_STATE_ENABLED 0x8000 /* LESM Enable bit */ -#define IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR 0x4 -#define IXGBE_FW_PATCH_VERSION_4 0x7 -#define IXGBE_FCOE_IBA_CAPS_BLK_PTR 0x33 /* iSCSI/FCOE block */ -#define IXGBE_FCOE_IBA_CAPS_FCOE 0x20 /* FCOE flags */ -#define IXGBE_ISCSI_FCOE_BLK_PTR 0x17 /* iSCSI/FCOE block */ -#define IXGBE_ISCSI_FCOE_FLAGS_OFFSET 0x0 /* FCOE flags */ -#define IXGBE_ISCSI_FCOE_FLAGS_ENABLE 0x1 /* FCOE flags enable bit */ -#define IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR 0x27 /* Alt. SAN MAC block */ -#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET 0x0 /* Alt SAN MAC capability */ -#define IXGBE_ALT_SAN_MAC_ADDR_PORT0_OFFSET 0x1 /* Alt SAN MAC 0 offset */ -#define IXGBE_ALT_SAN_MAC_ADDR_PORT1_OFFSET 0x4 /* Alt SAN MAC 1 offset */ -#define IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET 0x7 /* Alt WWNN prefix offset */ -#define IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET 0x8 /* Alt WWPN prefix offset */ -#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_SANMAC 0x0 /* Alt SAN MAC exists */ -#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN 0x1 /* Alt WWN base exists */ - -#define IXGBE_DEVICE_CAPS_WOL_PORT0_1 0x4 /* WoL supported on ports 0 & 1 */ -#define IXGBE_DEVICE_CAPS_WOL_PORT0 0x8 /* WoL supported on port 0 */ -#define IXGBE_DEVICE_CAPS_WOL_MASK 0xC /* Mask for WoL capabilities */ - -/* PCI Bus Info */ -#define IXGBE_PCI_DEVICE_STATUS 0xAA -#define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING 0x0020 -#define IXGBE_PCI_LINK_STATUS 0xB2 -#define IXGBE_PCI_DEVICE_CONTROL2 0xC8 -#define IXGBE_PCI_LINK_WIDTH 0x3F0 -#define IXGBE_PCI_LINK_WIDTH_1 0x10 -#define IXGBE_PCI_LINK_WIDTH_2 0x20 -#define IXGBE_PCI_LINK_WIDTH_4 0x40 -#define IXGBE_PCI_LINK_WIDTH_8 0x80 -#define IXGBE_PCI_LINK_SPEED 0xF -#define IXGBE_PCI_LINK_SPEED_2500 0x1 -#define IXGBE_PCI_LINK_SPEED_5000 0x2 -#define IXGBE_PCI_LINK_SPEED_8000 0x3 -#define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E -#define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 -#define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005 - -/* Number of 100 microseconds we wait for PCI Express master disable */ -#define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 - -/* Check whether address is multicast. This is little-endian specific check.*/ -#define IXGBE_IS_MULTICAST(Address) \ - (bool)(((u8 *)(Address))[0] & ((u8)0x01)) - -/* Check whether an address is broadcast. */ -#define IXGBE_IS_BROADCAST(Address) \ - ((((u8 *)(Address))[0] == ((u8)0xff)) && \ - (((u8 *)(Address))[1] == ((u8)0xff))) - -/* RAH */ -#define IXGBE_RAH_VIND_MASK 0x003C0000 -#define IXGBE_RAH_VIND_SHIFT 18 -#define IXGBE_RAH_AV 0x80000000 -#define IXGBE_CLEAR_VMDQ_ALL 0xFFFFFFFF - -/* Header split receive */ -#define IXGBE_RFCTL_ISCSI_DIS 0x00000001 -#define IXGBE_RFCTL_ISCSI_DWC_MASK 0x0000003E -#define IXGBE_RFCTL_ISCSI_DWC_SHIFT 1 -#define IXGBE_RFCTL_RSC_DIS 0x00000010 -#define IXGBE_RFCTL_NFSW_DIS 0x00000040 -#define IXGBE_RFCTL_NFSR_DIS 0x00000080 -#define IXGBE_RFCTL_NFS_VER_MASK 0x00000300 -#define IXGBE_RFCTL_NFS_VER_SHIFT 8 -#define IXGBE_RFCTL_NFS_VER_2 0 -#define IXGBE_RFCTL_NFS_VER_3 1 -#define IXGBE_RFCTL_NFS_VER_4 2 -#define IXGBE_RFCTL_IPV6_DIS 0x00000400 -#define IXGBE_RFCTL_IPV6_XSUM_DIS 0x00000800 -#define IXGBE_RFCTL_IPFRSP_DIS 0x00004000 -#define IXGBE_RFCTL_IPV6_EX_DIS 0x00010000 -#define IXGBE_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 - -/* Transmit Config masks */ -#define IXGBE_TXDCTL_ENABLE 0x02000000 /* Ena specific Tx Queue */ -#define IXGBE_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wr-bk flushing */ -#define IXGBE_TXDCTL_WTHRESH_SHIFT 16 /* shift to WTHRESH bits */ -/* Enable short packet padding to 64 bytes */ -#define IXGBE_TX_PAD_ENABLE 0x00000400 -#define IXGBE_JUMBO_FRAME_ENABLE 0x00000004 /* Allow jumbo frames */ -/* This allows for 16K packets + 4k for vlan */ -#define IXGBE_MAX_FRAME_SZ 0x40040000 - -#define IXGBE_TDWBAL_HEAD_WB_ENABLE 0x1 /* Tx head write-back enable */ -#define IXGBE_TDWBAL_SEQNUM_WB_ENABLE 0x2 /* Tx seq# write-back enable */ - -/* Receive Config masks */ -#define IXGBE_RXCTRL_RXEN 0x00000001 /* Enable Receiver */ -#define IXGBE_RXCTRL_DMBYPS 0x00000002 /* Desc Monitor Bypass */ -#define IXGBE_RXDCTL_ENABLE 0x02000000 /* Ena specific Rx Queue */ -#define IXGBE_RXDCTL_SWFLSH 0x04000000 /* Rx Desc wr-bk flushing */ -#define IXGBE_RXDCTL_RLPMLMASK 0x00003FFF /* X540 supported only */ -#define IXGBE_RXDCTL_RLPML_EN 0x00008000 -#define IXGBE_RXDCTL_VME 0x40000000 /* VLAN mode enable */ - -#define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ -#define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */ - -#define IXGBE_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ -#define IXGBE_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ -#define IXGBE_TSYNCRXCTL_TYPE_L2_V2 0x00 -#define IXGBE_TSYNCRXCTL_TYPE_L4_V1 0x02 -#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 -#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2 0x0A -#define IXGBE_TSYNCRXCTL_ENABLED 0x00000010 /* Rx Timestamping enabled */ - -#define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF -#define IXGBE_RXMTRL_V1_SYNC_MSG 0x00 -#define IXGBE_RXMTRL_V1_DELAY_REQ_MSG 0x01 -#define IXGBE_RXMTRL_V1_FOLLOWUP_MSG 0x02 -#define IXGBE_RXMTRL_V1_DELAY_RESP_MSG 0x03 -#define IXGBE_RXMTRL_V1_MGMT_MSG 0x04 - -#define IXGBE_RXMTRL_V2_MSGID_MASK 0x0000FF00 -#define IXGBE_RXMTRL_V2_SYNC_MSG 0x0000 -#define IXGBE_RXMTRL_V2_DELAY_REQ_MSG 0x0100 -#define IXGBE_RXMTRL_V2_PDELAY_REQ_MSG 0x0200 -#define IXGBE_RXMTRL_V2_PDELAY_RESP_MSG 0x0300 -#define IXGBE_RXMTRL_V2_FOLLOWUP_MSG 0x0800 -#define IXGBE_RXMTRL_V2_DELAY_RESP_MSG 0x0900 -#define IXGBE_RXMTRL_V2_PDELAY_FOLLOWUP_MSG 0x0A00 -#define IXGBE_RXMTRL_V2_ANNOUNCE_MSG 0x0B00 -#define IXGBE_RXMTRL_V2_SIGNALLING_MSG 0x0C00 -#define IXGBE_RXMTRL_V2_MGMT_MSG 0x0D00 - -#define IXGBE_FCTRL_SBP 0x00000002 /* Store Bad Packet */ -#define IXGBE_FCTRL_MPE 0x00000100 /* Multicast Promiscuous Ena*/ -#define IXGBE_FCTRL_UPE 0x00000200 /* Unicast Promiscuous Ena */ -#define IXGBE_FCTRL_BAM 0x00000400 /* Broadcast Accept Mode */ -#define IXGBE_FCTRL_PMCF 0x00001000 /* Pass MAC Control Frames */ -#define IXGBE_FCTRL_DPF 0x00002000 /* Discard Pause Frame */ -/* Receive Priority Flow Control Enable */ -#define IXGBE_FCTRL_RPFCE 0x00004000 -#define IXGBE_FCTRL_RFCE 0x00008000 /* Receive Flow Control Ena */ -#define IXGBE_MFLCN_PMCF 0x00000001 /* Pass MAC Control Frames */ -#define IXGBE_MFLCN_DPF 0x00000002 /* Discard Pause Frame */ -#define IXGBE_MFLCN_RPFCE 0x00000004 /* Receive Priority FC Enable */ -#define IXGBE_MFLCN_RFCE 0x00000008 /* Receive FC Enable */ -#define IXGBE_MFLCN_RPFCE_MASK 0x00000FF4 /* Rx Priority FC bitmap mask */ -#define IXGBE_MFLCN_RPFCE_SHIFT 4 /* Rx Priority FC bitmap shift */ - -/* Multiple Receive Queue Control */ -#define IXGBE_MRQC_RSSEN 0x00000001 /* RSS Enable */ -#define IXGBE_MRQC_MRQE_MASK 0xF /* Bits 3:0 */ -#define IXGBE_MRQC_RT8TCEN 0x00000002 /* 8 TC no RSS */ -#define IXGBE_MRQC_RT4TCEN 0x00000003 /* 4 TC no RSS */ -#define IXGBE_MRQC_RTRSS8TCEN 0x00000004 /* 8 TC w/ RSS */ -#define IXGBE_MRQC_RTRSS4TCEN 0x00000005 /* 4 TC w/ RSS */ -#define IXGBE_MRQC_VMDQEN 0x00000008 /* VMDq2 64 pools no RSS */ -#define IXGBE_MRQC_VMDQRSS32EN 0x0000000A /* VMDq2 32 pools w/ RSS */ -#define IXGBE_MRQC_VMDQRSS64EN 0x0000000B /* VMDq2 64 pools w/ RSS */ -#define IXGBE_MRQC_VMDQRT8TCEN 0x0000000C /* VMDq2/RT 16 pool 8 TC */ -#define IXGBE_MRQC_VMDQRT4TCEN 0x0000000D /* VMDq2/RT 32 pool 4 TC */ -#define IXGBE_MRQC_RSS_FIELD_MASK 0xFFFF0000 -#define IXGBE_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 -#define IXGBE_MRQC_RSS_FIELD_IPV4 0x00020000 -#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP 0x00040000 -#define IXGBE_MRQC_RSS_FIELD_IPV6_EX 0x00080000 -#define IXGBE_MRQC_RSS_FIELD_IPV6 0x00100000 -#define IXGBE_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 -#define IXGBE_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 -#define IXGBE_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 -#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP 0x01000000 -#define IXGBE_MRQC_L3L4TXSWEN 0x00008000 - -/* Queue Drop Enable */ -#define IXGBE_QDE_ENABLE 0x00000001 -#define IXGBE_QDE_IDX_MASK 0x00007F00 -#define IXGBE_QDE_IDX_SHIFT 8 -#define IXGBE_QDE_WRITE 0x00010000 -#define IXGBE_QDE_READ 0x00020000 - -#define IXGBE_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ -#define IXGBE_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ -#define IXGBE_TXD_CMD_EOP 0x01000000 /* End of Packet */ -#define IXGBE_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ -#define IXGBE_TXD_CMD_IC 0x04000000 /* Insert Checksum */ -#define IXGBE_TXD_CMD_RS 0x08000000 /* Report Status */ -#define IXGBE_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ -#define IXGBE_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ -#define IXGBE_TXD_STAT_DD 0x00000001 /* Descriptor Done */ - -#define IXGBE_RXDADV_IPSEC_STATUS_SECP 0x00020000 -#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000 -#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000 -#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED 0x18000000 -#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000 -/* Multiple Transmit Queue Command Register */ -#define IXGBE_MTQC_RT_ENA 0x1 /* DCB Enable */ -#define IXGBE_MTQC_VT_ENA 0x2 /* VMDQ2 Enable */ -#define IXGBE_MTQC_64Q_1PB 0x0 /* 64 queues 1 pack buffer */ -#define IXGBE_MTQC_32VF 0x8 /* 4 TX Queues per pool w/32VF's */ -#define IXGBE_MTQC_64VF 0x4 /* 2 TX Queues per pool w/64VF's */ -#define IXGBE_MTQC_4TC_4TQ 0x8 /* 4 TC if RT_ENA and VT_ENA */ -#define IXGBE_MTQC_8TC_8TQ 0xC /* 8 TC if RT_ENA or 8 TQ if VT_ENA */ - -/* Receive Descriptor bit definitions */ -#define IXGBE_RXD_STAT_DD 0x01 /* Descriptor Done */ -#define IXGBE_RXD_STAT_EOP 0x02 /* End of Packet */ -#define IXGBE_RXD_STAT_FLM 0x04 /* FDir Match */ -#define IXGBE_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ -#define IXGBE_RXDADV_NEXTP_MASK 0x000FFFF0 /* Next Descriptor Index */ -#define IXGBE_RXDADV_NEXTP_SHIFT 0x00000004 -#define IXGBE_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ -#define IXGBE_RXD_STAT_L4CS 0x20 /* L4 xsum calculated */ -#define IXGBE_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ -#define IXGBE_RXD_STAT_PIF 0x80 /* passed in-exact filter */ -#define IXGBE_RXD_STAT_CRCV 0x100 /* Speculative CRC Valid */ -#define IXGBE_RXD_STAT_VEXT 0x200 /* 1st VLAN found */ -#define IXGBE_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ -#define IXGBE_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ -#define IXGBE_RXD_STAT_LLINT 0x800 /* Pkt caused Low Latency Interrupt */ -#define IXGBE_RXD_STAT_TS 0x10000 /* Time Stamp */ -#define IXGBE_RXD_STAT_SECP 0x20000 /* Security Processing */ -#define IXGBE_RXD_STAT_LB 0x40000 /* Loopback Status */ -#define IXGBE_RXD_STAT_ACK 0x8000 /* ACK Packet indication */ -#define IXGBE_RXD_ERR_CE 0x01 /* CRC Error */ -#define IXGBE_RXD_ERR_LE 0x02 /* Length Error */ -#define IXGBE_RXD_ERR_PE 0x08 /* Packet Error */ -#define IXGBE_RXD_ERR_OSE 0x10 /* Oversize Error */ -#define IXGBE_RXD_ERR_USE 0x20 /* Undersize Error */ -#define IXGBE_RXD_ERR_TCPE 0x40 /* TCP/UDP Checksum Error */ -#define IXGBE_RXD_ERR_IPE 0x80 /* IP Checksum Error */ -#define IXGBE_RXDADV_ERR_MASK 0xfff00000 /* RDESC.ERRORS mask */ -#define IXGBE_RXDADV_ERR_SHIFT 20 /* RDESC.ERRORS shift */ -#define IXGBE_RXDADV_ERR_RXE 0x20000000 /* Any MAC Error */ -#define IXGBE_RXDADV_ERR_FCEOFE 0x80000000 /* FCoEFe/IPE */ -#define IXGBE_RXDADV_ERR_FCERR 0x00700000 /* FCERR/FDIRERR */ -#define IXGBE_RXDADV_ERR_FDIR_LEN 0x00100000 /* FDIR Length error */ -#define IXGBE_RXDADV_ERR_FDIR_DROP 0x00200000 /* FDIR Drop error */ -#define IXGBE_RXDADV_ERR_FDIR_COLL 0x00400000 /* FDIR Collision error */ -#define IXGBE_RXDADV_ERR_HBO 0x00800000 /*Header Buffer Overflow */ -#define IXGBE_RXDADV_ERR_CE 0x01000000 /* CRC Error */ -#define IXGBE_RXDADV_ERR_LE 0x02000000 /* Length Error */ -#define IXGBE_RXDADV_ERR_PE 0x08000000 /* Packet Error */ -#define IXGBE_RXDADV_ERR_OSE 0x10000000 /* Oversize Error */ -#define IXGBE_RXDADV_ERR_USE 0x20000000 /* Undersize Error */ -#define IXGBE_RXDADV_ERR_TCPE 0x40000000 /* TCP/UDP Checksum Error */ -#define IXGBE_RXDADV_ERR_IPE 0x80000000 /* IP Checksum Error */ -#define IXGBE_RXD_VLAN_ID_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ -#define IXGBE_RXD_PRI_MASK 0xE000 /* Priority is in upper 3 bits */ -#define IXGBE_RXD_PRI_SHIFT 13 -#define IXGBE_RXD_CFI_MASK 0x1000 /* CFI is bit 12 */ -#define IXGBE_RXD_CFI_SHIFT 12 - -#define IXGBE_RXDADV_STAT_DD IXGBE_RXD_STAT_DD /* Done */ -#define IXGBE_RXDADV_STAT_EOP IXGBE_RXD_STAT_EOP /* End of Packet */ -#define IXGBE_RXDADV_STAT_FLM IXGBE_RXD_STAT_FLM /* FDir Match */ -#define IXGBE_RXDADV_STAT_VP IXGBE_RXD_STAT_VP /* IEEE VLAN Pkt */ -#define IXGBE_RXDADV_STAT_MASK 0x000fffff /* Stat/NEXTP: bit 0-19 */ -#define IXGBE_RXDADV_STAT_FCEOFS 0x00000040 /* FCoE EOF/SOF Stat */ -#define IXGBE_RXDADV_STAT_FCSTAT 0x00000030 /* FCoE Pkt Stat */ -#define IXGBE_RXDADV_STAT_FCSTAT_NOMTCH 0x00000000 /* 00: No Ctxt Match */ -#define IXGBE_RXDADV_STAT_FCSTAT_NODDP 0x00000010 /* 01: Ctxt w/o DDP */ -#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */ -#define IXGBE_RXDADV_STAT_FCSTAT_DDP 0x00000030 /* 11: Ctxt w/ DDP */ -#define IXGBE_RXDADV_STAT_TS 0x00010000 /* IEEE1588 Time Stamp */ - -/* PSRTYPE bit definitions */ -#define IXGBE_PSRTYPE_TCPHDR 0x00000010 -#define IXGBE_PSRTYPE_UDPHDR 0x00000020 -#define IXGBE_PSRTYPE_IPV4HDR 0x00000100 -#define IXGBE_PSRTYPE_IPV6HDR 0x00000200 -#define IXGBE_PSRTYPE_L2HDR 0x00001000 - -/* SRRCTL bit definitions */ -#define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */ -#define IXGBE_SRRCTL_RDMTS_SHIFT 22 -#define IXGBE_SRRCTL_RDMTS_MASK 0x01C00000 -#define IXGBE_SRRCTL_DROP_EN 0x10000000 -#define IXGBE_SRRCTL_BSIZEPKT_MASK 0x0000007F -#define IXGBE_SRRCTL_BSIZEHDR_MASK 0x00003F00 -#define IXGBE_SRRCTL_DESCTYPE_LEGACY 0x00000000 -#define IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 -#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 -#define IXGBE_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000 -#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 -#define IXGBE_SRRCTL_DESCTYPE_MASK 0x0E000000 - -#define IXGBE_RXDPS_HDRSTAT_HDRSP 0x00008000 -#define IXGBE_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF - -#define IXGBE_RXDADV_RSSTYPE_MASK 0x0000000F -#define IXGBE_RXDADV_PKTTYPE_MASK 0x0000FFF0 -#define IXGBE_RXDADV_PKTTYPE_MASK_EX 0x0001FFF0 -#define IXGBE_RXDADV_HDRBUFLEN_MASK 0x00007FE0 -#define IXGBE_RXDADV_RSCCNT_MASK 0x001E0000 -#define IXGBE_RXDADV_RSCCNT_SHIFT 17 -#define IXGBE_RXDADV_HDRBUFLEN_SHIFT 5 -#define IXGBE_RXDADV_SPLITHEADER_EN 0x00001000 -#define IXGBE_RXDADV_SPH 0x8000 - -/* RSS Hash results */ -#define IXGBE_RXDADV_RSSTYPE_NONE 0x00000000 -#define IXGBE_RXDADV_RSSTYPE_IPV4_TCP 0x00000001 -#define IXGBE_RXDADV_RSSTYPE_IPV4 0x00000002 -#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP 0x00000003 -#define IXGBE_RXDADV_RSSTYPE_IPV6_EX 0x00000004 -#define IXGBE_RXDADV_RSSTYPE_IPV6 0x00000005 -#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006 -#define IXGBE_RXDADV_RSSTYPE_IPV4_UDP 0x00000007 -#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP 0x00000008 -#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009 - -/* RSS Packet Types as indicated in the receive descriptor. */ -#define IXGBE_RXDADV_PKTTYPE_NONE 0x00000000 -#define IXGBE_RXDADV_PKTTYPE_IPV4 0x00000010 /* IPv4 hdr present */ -#define IXGBE_RXDADV_PKTTYPE_IPV4_EX 0x00000020 /* IPv4 hdr + extensions */ -#define IXGBE_RXDADV_PKTTYPE_IPV6 0x00000040 /* IPv6 hdr present */ -#define IXGBE_RXDADV_PKTTYPE_IPV6_EX 0x00000080 /* IPv6 hdr + extensions */ -#define IXGBE_RXDADV_PKTTYPE_TCP 0x00000100 /* TCP hdr present */ -#define IXGBE_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */ -#define IXGBE_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */ -#define IXGBE_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */ -#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */ -#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH 0x00002000 /* IPSec AH */ -#define IXGBE_RXDADV_PKTTYPE_LINKSEC 0x00004000 /* LinkSec Encap */ -#define IXGBE_RXDADV_PKTTYPE_ETQF 0x00008000 /* PKTTYPE is ETQF index */ -#define IXGBE_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */ -#define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */ - -/* Security Processing bit Indication */ -#define IXGBE_RXDADV_LNKSEC_STATUS_SECP 0x00020000 -#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000 -#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000 -#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000 -#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000 - -/* Masks to determine if packets should be dropped due to frame errors */ -#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \ - IXGBE_RXD_ERR_CE | \ - IXGBE_RXD_ERR_LE | \ - IXGBE_RXD_ERR_PE | \ - IXGBE_RXD_ERR_OSE | \ - IXGBE_RXD_ERR_USE) - -#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK ( \ - IXGBE_RXDADV_ERR_CE | \ - IXGBE_RXDADV_ERR_LE | \ - IXGBE_RXDADV_ERR_PE | \ - IXGBE_RXDADV_ERR_OSE | \ - IXGBE_RXDADV_ERR_USE) - -#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK_82599 IXGBE_RXDADV_ERR_RXE - -/* Multicast bit mask */ -#define IXGBE_MCSTCTRL_MFE 0x4 - -/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ -#define IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE 8 -#define IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE 8 -#define IXGBE_REQ_TX_BUFFER_GRANULARITY 1024 - -/* Vlan-specific macros */ -#define IXGBE_RX_DESC_SPECIAL_VLAN_MASK 0x0FFF /* VLAN ID in lower 12 bits */ -#define IXGBE_RX_DESC_SPECIAL_PRI_MASK 0xE000 /* Priority in upper 3 bits */ -#define IXGBE_RX_DESC_SPECIAL_PRI_SHIFT 0x000D /* Priority in upper 3 of 16 */ -#define IXGBE_TX_DESC_SPECIAL_PRI_SHIFT IXGBE_RX_DESC_SPECIAL_PRI_SHIFT - -/* SR-IOV specific macros */ -#define IXGBE_MBVFICR_INDEX(vf_number) (vf_number >> 4) -#define IXGBE_MBVFICR(_i) (0x00710 + ((_i) * 4)) -#define IXGBE_VFLRE(_i) (((_i & 1) ? 0x001C0 : 0x00600)) -#define IXGBE_VFLREC(_i) (0x00700 + ((_i) * 4)) -/* Translated register #defines */ -#define IXGBE_PVFCTRL(P) (0x00300 + (4 * (P))) -#define IXGBE_PVFSTATUS(P) (0x00008 + (0 * (P))) -#define IXGBE_PVFLINKS(P) (0x042A4 + (0 * (P))) -#define IXGBE_PVFRTIMER(P) (0x00048 + (0 * (P))) -#define IXGBE_PVFMAILBOX(P) (0x04C00 + (4 * (P))) -#define IXGBE_PVFRXMEMWRAP(P) (0x03190 + (0 * (P))) -#define IXGBE_PVTEICR(P) (0x00B00 + (4 * (P))) -#define IXGBE_PVTEICS(P) (0x00C00 + (4 * (P))) -#define IXGBE_PVTEIMS(P) (0x00D00 + (4 * (P))) -#define IXGBE_PVTEIMC(P) (0x00E00 + (4 * (P))) -#define IXGBE_PVTEIAC(P) (0x00F00 + (4 * (P))) -#define IXGBE_PVTEIAM(P) (0x04D00 + (4 * (P))) -#define IXGBE_PVTEITR(P) (((P) < 24) ? (0x00820 + ((P) * 4)) : \ - (0x012300 + (((P) - 24) * 4))) -#define IXGBE_PVTIVAR(P) (0x12500 + (4 * (P))) -#define IXGBE_PVTIVAR_MISC(P) (0x04E00 + (4 * (P))) -#define IXGBE_PVTRSCINT(P) (0x12000 + (4 * (P))) -#define IXGBE_VFPBACL(P) (0x110C8 + (4 * (P))) -#define IXGBE_PVFRDBAL(P) ((P < 64) ? (0x01000 + (0x40 * (P))) \ - : (0x0D000 + (0x40 * ((P) - 64)))) -#define IXGBE_PVFRDBAH(P) ((P < 64) ? (0x01004 + (0x40 * (P))) \ - : (0x0D004 + (0x40 * ((P) - 64)))) -#define IXGBE_PVFRDLEN(P) ((P < 64) ? (0x01008 + (0x40 * (P))) \ - : (0x0D008 + (0x40 * ((P) - 64)))) -#define IXGBE_PVFRDH(P) ((P < 64) ? (0x01010 + (0x40 * (P))) \ - : (0x0D010 + (0x40 * ((P) - 64)))) -#define IXGBE_PVFRDT(P) ((P < 64) ? (0x01018 + (0x40 * (P))) \ - : (0x0D018 + (0x40 * ((P) - 64)))) -#define IXGBE_PVFRXDCTL(P) ((P < 64) ? (0x01028 + (0x40 * (P))) \ - : (0x0D028 + (0x40 * ((P) - 64)))) -#define IXGBE_PVFSRRCTL(P) ((P < 64) ? (0x01014 + (0x40 * (P))) \ - : (0x0D014 + (0x40 * ((P) - 64)))) -#define IXGBE_PVFPSRTYPE(P) (0x0EA00 + (4 * (P))) -#define IXGBE_PVFTDBAL(P) (0x06000 + (0x40 * (P))) -#define IXGBE_PVFTDBAH(P) (0x06004 + (0x40 * (P))) -#define IXGBE_PVFTTDLEN(P) (0x06008 + (0x40 * (P))) -#define IXGBE_PVFTDH(P) (0x06010 + (0x40 * (P))) -#define IXGBE_PVFTDT(P) (0x06018 + (0x40 * (P))) -#define IXGBE_PVFTXDCTL(P) (0x06028 + (0x40 * (P))) -#define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P))) -#define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P))) -#define IXGBE_PVFDCA_RXCTRL(P) (((P) < 64) ? (0x0100C + (0x40 * (P))) \ - : (0x0D00C + (0x40 * ((P) - 64)))) -#define IXGBE_PVFDCA_TXCTRL(P) (0x0600C + (0x40 * (P))) -#define IXGBE_PVFGPRC(x) (0x0101C + (0x40 * (x))) -#define IXGBE_PVFGPTC(x) (0x08300 + (0x04 * (x))) -#define IXGBE_PVFGORC_LSB(x) (0x01020 + (0x40 * (x))) -#define IXGBE_PVFGORC_MSB(x) (0x0D020 + (0x40 * (x))) -#define IXGBE_PVFGOTC_LSB(x) (0x08400 + (0x08 * (x))) -#define IXGBE_PVFGOTC_MSB(x) (0x08404 + (0x08 * (x))) -#define IXGBE_PVFMPRC(x) (0x0D01C + (0x40 * (x))) - -#define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \ - (IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index))) -#define IXGBE_PVFTDWBAHn(q_per_pool, vf_number, vf_q_index) \ - (IXGBE_PVFTDWBAH((q_per_pool)*(vf_number) + (vf_q_index))) - -/* Little Endian defines */ -#ifndef __le16 -#define __le16 u16 -#endif -#ifndef __le32 -#define __le32 u32 -#endif -#ifndef __le64 -#define __le64 u64 - -#endif -#ifndef __be16 -/* Big Endian defines */ -#define __be16 u16 -#define __be32 u32 -#define __be64 u64 - -#endif -enum ixgbe_fdir_pballoc_type { - IXGBE_FDIR_PBALLOC_NONE = 0, - IXGBE_FDIR_PBALLOC_64K = 1, - IXGBE_FDIR_PBALLOC_128K = 2, - IXGBE_FDIR_PBALLOC_256K = 3, -}; - -/* Flow Director register values */ -#define IXGBE_FDIRCTRL_PBALLOC_64K 0x00000001 -#define IXGBE_FDIRCTRL_PBALLOC_128K 0x00000002 -#define IXGBE_FDIRCTRL_PBALLOC_256K 0x00000003 -#define IXGBE_FDIRCTRL_INIT_DONE 0x00000008 -#define IXGBE_FDIRCTRL_PERFECT_MATCH 0x00000010 -#define IXGBE_FDIRCTRL_REPORT_STATUS 0x00000020 -#define IXGBE_FDIRCTRL_REPORT_STATUS_ALWAYS 0x00000080 -#define IXGBE_FDIRCTRL_DROP_Q_SHIFT 8 -#define IXGBE_FDIRCTRL_FLEX_SHIFT 16 -#define IXGBE_FDIRCTRL_SEARCHLIM 0x00800000 -#define IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT 24 -#define IXGBE_FDIRCTRL_FULL_THRESH_MASK 0xF0000000 -#define IXGBE_FDIRCTRL_FULL_THRESH_SHIFT 28 - -#define IXGBE_FDIRTCPM_DPORTM_SHIFT 16 -#define IXGBE_FDIRUDPM_DPORTM_SHIFT 16 -#define IXGBE_FDIRIP6M_DIPM_SHIFT 16 -#define IXGBE_FDIRM_VLANID 0x00000001 -#define IXGBE_FDIRM_VLANP 0x00000002 -#define IXGBE_FDIRM_POOL 0x00000004 -#define IXGBE_FDIRM_L4P 0x00000008 -#define IXGBE_FDIRM_FLEX 0x00000010 -#define IXGBE_FDIRM_DIPv6 0x00000020 - -#define IXGBE_FDIRFREE_FREE_MASK 0xFFFF -#define IXGBE_FDIRFREE_FREE_SHIFT 0 -#define IXGBE_FDIRFREE_COLL_MASK 0x7FFF0000 -#define IXGBE_FDIRFREE_COLL_SHIFT 16 -#define IXGBE_FDIRLEN_MAXLEN_MASK 0x3F -#define IXGBE_FDIRLEN_MAXLEN_SHIFT 0 -#define IXGBE_FDIRLEN_MAXHASH_MASK 0x7FFF0000 -#define IXGBE_FDIRLEN_MAXHASH_SHIFT 16 -#define IXGBE_FDIRUSTAT_ADD_MASK 0xFFFF -#define IXGBE_FDIRUSTAT_ADD_SHIFT 0 -#define IXGBE_FDIRUSTAT_REMOVE_MASK 0xFFFF0000 -#define IXGBE_FDIRUSTAT_REMOVE_SHIFT 16 -#define IXGBE_FDIRFSTAT_FADD_MASK 0x00FF -#define IXGBE_FDIRFSTAT_FADD_SHIFT 0 -#define IXGBE_FDIRFSTAT_FREMOVE_MASK 0xFF00 -#define IXGBE_FDIRFSTAT_FREMOVE_SHIFT 8 -#define IXGBE_FDIRPORT_DESTINATION_SHIFT 16 -#define IXGBE_FDIRVLAN_FLEX_SHIFT 16 -#define IXGBE_FDIRHASH_BUCKET_VALID_SHIFT 15 -#define IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT 16 - -#define IXGBE_FDIRCMD_CMD_MASK 0x00000003 -#define IXGBE_FDIRCMD_CMD_ADD_FLOW 0x00000001 -#define IXGBE_FDIRCMD_CMD_REMOVE_FLOW 0x00000002 -#define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT 0x00000003 -#define IXGBE_FDIRCMD_FILTER_VALID 0x00000004 -#define IXGBE_FDIRCMD_FILTER_UPDATE 0x00000008 -#define IXGBE_FDIRCMD_IPv6DMATCH 0x00000010 -#define IXGBE_FDIRCMD_L4TYPE_UDP 0x00000020 -#define IXGBE_FDIRCMD_L4TYPE_TCP 0x00000040 -#define IXGBE_FDIRCMD_L4TYPE_SCTP 0x00000060 -#define IXGBE_FDIRCMD_IPV6 0x00000080 -#define IXGBE_FDIRCMD_CLEARHT 0x00000100 -#define IXGBE_FDIRCMD_DROP 0x00000200 -#define IXGBE_FDIRCMD_INT 0x00000400 -#define IXGBE_FDIRCMD_LAST 0x00000800 -#define IXGBE_FDIRCMD_COLLISION 0x00001000 -#define IXGBE_FDIRCMD_QUEUE_EN 0x00008000 -#define IXGBE_FDIRCMD_FLOW_TYPE_SHIFT 5 -#define IXGBE_FDIRCMD_RX_QUEUE_SHIFT 16 -#define IXGBE_FDIRCMD_VT_POOL_SHIFT 24 -#define IXGBE_FDIR_INIT_DONE_POLL 10 -#define IXGBE_FDIRCMD_CMD_POLL 10 - -#define IXGBE_FDIR_DROP_QUEUE 127 - -#define IXGBE_STATUS_OVERHEATING_BIT 20 /* STATUS overtemp bit num */ - -/* Manageablility Host Interface defines */ -#define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Num of bytes in range */ -#define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Num of dwords in range */ -#define IXGBE_HI_COMMAND_TIMEOUT 500 /* Process HI command limit */ - -/* CEM Support */ -#define FW_CEM_HDR_LEN 0x4 -#define FW_CEM_CMD_DRIVER_INFO 0xDD -#define FW_CEM_CMD_DRIVER_INFO_LEN 0x5 -#define FW_CEM_CMD_RESERVED 0X0 -#define FW_CEM_UNUSED_VER 0x0 -#define FW_CEM_MAX_RETRIES 3 -#define FW_CEM_RESP_STATUS_SUCCESS 0x1 - -/* Host Interface Command Structures */ - -struct ixgbe_hic_hdr { - u8 cmd; - u8 buf_len; - union { - u8 cmd_resv; - u8 ret_status; - } cmd_or_resp; - u8 checksum; -}; - -struct ixgbe_hic_drv_info { - struct ixgbe_hic_hdr hdr; - u8 port_num; - u8 ver_sub; - u8 ver_build; - u8 ver_min; - u8 ver_maj; - u8 pad; /* end spacing to ensure length is mult. of dword */ - u16 pad2; /* end spacing to ensure length is mult. of dword2 */ -}; - -/* Transmit Descriptor - Legacy */ -struct ixgbe_legacy_tx_desc { - u64 buffer_addr; /* Address of the descriptor's data buffer */ - union { - __le32 data; - struct { - __le16 length; /* Data buffer length */ - u8 cso; /* Checksum offset */ - u8 cmd; /* Descriptor control */ - } flags; - } lower; - union { - __le32 data; - struct { - u8 status; /* Descriptor status */ - u8 css; /* Checksum start */ - __le16 vlan; - } fields; - } upper; -}; - -/* Transmit Descriptor - Advanced */ -union ixgbe_adv_tx_desc { - struct { - __le64 buffer_addr; /* Address of descriptor's data buf */ - __le32 cmd_type_len; - __le32 olinfo_status; - } read; - struct { - __le64 rsvd; /* Reserved */ - __le32 nxtseq_seed; - __le32 status; - } wb; -}; - -/* Receive Descriptor - Legacy */ -struct ixgbe_legacy_rx_desc { - __le64 buffer_addr; /* Address of the descriptor's data buffer */ - __le16 length; /* Length of data DMAed into data buffer */ - __le16 csum; /* Packet checksum */ - u8 status; /* Descriptor status */ - u8 errors; /* Descriptor Errors */ - __le16 vlan; -}; - -/* Receive Descriptor - Advanced */ -union ixgbe_adv_rx_desc { - struct { - __le64 pkt_addr; /* Packet buffer address */ - __le64 hdr_addr; /* Header buffer address */ - } read; - struct { - struct { - union { - __le32 data; - struct { - __le16 pkt_info; /* RSS, Pkt type */ - __le16 hdr_info; /* Splithdr, hdrlen */ - } hs_rss; - } lo_dword; - union { - __le32 rss; /* RSS Hash */ - struct { - __le16 ip_id; /* IP id */ - __le16 csum; /* Packet Checksum */ - } csum_ip; - } hi_dword; - } lower; - struct { - __le32 status_error; /* ext status/error */ - __le16 length; /* Packet length */ - __le16 vlan; /* VLAN tag */ - } upper; - } wb; /* writeback */ -}; - -/* Context descriptors */ -struct ixgbe_adv_tx_context_desc { - __le32 vlan_macip_lens; - __le32 seqnum_seed; - __le32 type_tucmd_mlhl; - __le32 mss_l4len_idx; -}; - -/* Adv Transmit Descriptor Config Masks */ -#define IXGBE_ADVTXD_DTALEN_MASK 0x0000FFFF /* Data buf length(bytes) */ -#define IXGBE_ADVTXD_MAC_LINKSEC 0x00040000 /* Insert LinkSec */ -#define IXGBE_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 time stamp */ -#define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK 0x000003FF /* IPSec SA index */ -#define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK 0x000001FF /* IPSec ESP length */ -#define IXGBE_ADVTXD_DTYP_MASK 0x00F00000 /* DTYP mask */ -#define IXGBE_ADVTXD_DTYP_CTXT 0x00200000 /* Adv Context Desc */ -#define IXGBE_ADVTXD_DTYP_DATA 0x00300000 /* Adv Data Descriptor */ -#define IXGBE_ADVTXD_DCMD_EOP IXGBE_TXD_CMD_EOP /* End of Packet */ -#define IXGBE_ADVTXD_DCMD_IFCS IXGBE_TXD_CMD_IFCS /* Insert FCS */ -#define IXGBE_ADVTXD_DCMD_RS IXGBE_TXD_CMD_RS /* Report Status */ -#define IXGBE_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000 /* DDP hdr type or iSCSI */ -#define IXGBE_ADVTXD_DCMD_DEXT IXGBE_TXD_CMD_DEXT /* Desc ext 1=Adv */ -#define IXGBE_ADVTXD_DCMD_VLE IXGBE_TXD_CMD_VLE /* VLAN pkt enable */ -#define IXGBE_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ -#define IXGBE_ADVTXD_STAT_DD IXGBE_TXD_STAT_DD /* Descriptor Done */ -#define IXGBE_ADVTXD_STAT_SN_CRC 0x00000002 /* NXTSEQ/SEED pres in WB */ -#define IXGBE_ADVTXD_STAT_RSV 0x0000000C /* STA Reserved */ -#define IXGBE_ADVTXD_IDX_SHIFT 4 /* Adv desc Index shift */ -#define IXGBE_ADVTXD_CC 0x00000080 /* Check Context */ -#define IXGBE_ADVTXD_POPTS_SHIFT 8 /* Adv desc POPTS shift */ -#define IXGBE_ADVTXD_POPTS_IXSM (IXGBE_TXD_POPTS_IXSM << \ - IXGBE_ADVTXD_POPTS_SHIFT) -#define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \ - IXGBE_ADVTXD_POPTS_SHIFT) -#define IXGBE_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */ -#define IXGBE_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */ -#define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */ -/* 1st&Last TSO-full iSCSI PDU */ -#define IXGBE_ADVTXD_POPTS_ISCO_FULL 0x00001800 -#define IXGBE_ADVTXD_POPTS_RSV 0x00002000 /* POPTS Reserved */ -#define IXGBE_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ -#define IXGBE_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ -#define IXGBE_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ -#define IXGBE_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ -#define IXGBE_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ -#define IXGBE_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ -#define IXGBE_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ -#define IXGBE_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ -#define IXGBE_ADVTXD_TUCMD_MKRREQ 0x00002000 /* req Markers and CRC */ -#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ -#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ -#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */ -#define IXGBE_ADVTXT_TUCMD_FCOE 0x00008000 /* FCoE Frame Type */ -#define IXGBE_ADVTXD_FCOEF_EOF_MASK (0x3 << 10) /* FC EOF index */ -#define IXGBE_ADVTXD_FCOEF_SOF ((1 << 2) << 10) /* FC SOF index */ -#define IXGBE_ADVTXD_FCOEF_PARINC ((1 << 3) << 10) /* Rel_Off in F_CTL */ -#define IXGBE_ADVTXD_FCOEF_ORIE ((1 << 4) << 10) /* Orientation End */ -#define IXGBE_ADVTXD_FCOEF_ORIS ((1 << 5) << 10) /* Orientation Start */ -#define IXGBE_ADVTXD_FCOEF_EOF_N (0x0 << 10) /* 00: EOFn */ -#define IXGBE_ADVTXD_FCOEF_EOF_T (0x1 << 10) /* 01: EOFt */ -#define IXGBE_ADVTXD_FCOEF_EOF_NI (0x2 << 10) /* 10: EOFni */ -#define IXGBE_ADVTXD_FCOEF_EOF_A (0x3 << 10) /* 11: EOFa */ -#define IXGBE_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ -#define IXGBE_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ - -/* Autonegotiation advertised speeds */ -typedef u32 ixgbe_autoneg_advertised; -/* Link speed */ -typedef u32 ixgbe_link_speed; -#define IXGBE_LINK_SPEED_UNKNOWN 0 -#define IXGBE_LINK_SPEED_100_FULL 0x0008 -#define IXGBE_LINK_SPEED_1GB_FULL 0x0020 -#define IXGBE_LINK_SPEED_10GB_FULL 0x0080 -#define IXGBE_LINK_SPEED_82598_AUTONEG (IXGBE_LINK_SPEED_1GB_FULL | \ - IXGBE_LINK_SPEED_10GB_FULL) -#define IXGBE_LINK_SPEED_82599_AUTONEG (IXGBE_LINK_SPEED_100_FULL | \ - IXGBE_LINK_SPEED_1GB_FULL | \ - IXGBE_LINK_SPEED_10GB_FULL) - - -/* Physical layer type */ -typedef u32 ixgbe_physical_layer; -#define IXGBE_PHYSICAL_LAYER_UNKNOWN 0 -#define IXGBE_PHYSICAL_LAYER_10GBASE_T 0x0001 -#define IXGBE_PHYSICAL_LAYER_1000BASE_T 0x0002 -#define IXGBE_PHYSICAL_LAYER_100BASE_TX 0x0004 -#define IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU 0x0008 -#define IXGBE_PHYSICAL_LAYER_10GBASE_LR 0x0010 -#define IXGBE_PHYSICAL_LAYER_10GBASE_LRM 0x0020 -#define IXGBE_PHYSICAL_LAYER_10GBASE_SR 0x0040 -#define IXGBE_PHYSICAL_LAYER_10GBASE_KX4 0x0080 -#define IXGBE_PHYSICAL_LAYER_10GBASE_CX4 0x0100 -#define IXGBE_PHYSICAL_LAYER_1000BASE_KX 0x0200 -#define IXGBE_PHYSICAL_LAYER_1000BASE_BX 0x0400 -#define IXGBE_PHYSICAL_LAYER_10GBASE_KR 0x0800 -#define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000 -#define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000 -#define IXGBE_PHYSICAL_LAYER_1000BASE_SX 0x4000 - -/* Flow Control Data Sheet defined values - * Calculation and defines taken from 802.1bb Annex O - */ - -/* BitTimes (BT) conversion */ -#define IXGBE_BT2KB(BT) ((BT + (8 * 1024 - 1)) / (8 * 1024)) -#define IXGBE_B2BT(BT) (BT * 8) - -/* Calculate Delay to respond to PFC */ -#define IXGBE_PFC_D 672 - -/* Calculate Cable Delay */ -#define IXGBE_CABLE_DC 5556 /* Delay Copper */ -#define IXGBE_CABLE_DO 5000 /* Delay Optical */ - -/* Calculate Interface Delay X540 */ -#define IXGBE_PHY_DC 25600 /* Delay 10G BASET */ -#define IXGBE_MAC_DC 8192 /* Delay Copper XAUI interface */ -#define IXGBE_XAUI_DC (2 * 2048) /* Delay Copper Phy */ - -#define IXGBE_ID_X540 (IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC) - -/* Calculate Interface Delay 82598, 82599 */ -#define IXGBE_PHY_D 12800 -#define IXGBE_MAC_D 4096 -#define IXGBE_XAUI_D (2 * 1024) - -#define IXGBE_ID (IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D) - -/* Calculate Delay incurred from higher layer */ -#define IXGBE_HD 6144 - -/* Calculate PCI Bus delay for low thresholds */ -#define IXGBE_PCI_DELAY 10000 - -/* Calculate X540 delay value in bit times */ -#define IXGBE_DV_X540(_max_frame_link, _max_frame_tc) \ - ((36 * \ - (IXGBE_B2BT(_max_frame_link) + \ - IXGBE_PFC_D + \ - (2 * IXGBE_CABLE_DC) + \ - (2 * IXGBE_ID_X540) + \ - IXGBE_HD) / 25 + 1) + \ - 2 * IXGBE_B2BT(_max_frame_tc)) - -/* Calculate 82599, 82598 delay value in bit times */ -#define IXGBE_DV(_max_frame_link, _max_frame_tc) \ - ((36 * \ - (IXGBE_B2BT(_max_frame_link) + \ - IXGBE_PFC_D + \ - (2 * IXGBE_CABLE_DC) + \ - (2 * IXGBE_ID) + \ - IXGBE_HD) / 25 + 1) + \ - 2 * IXGBE_B2BT(_max_frame_tc)) - -/* Calculate low threshold delay values */ -#define IXGBE_LOW_DV_X540(_max_frame_tc) \ - (2 * IXGBE_B2BT(_max_frame_tc) + \ - (36 * IXGBE_PCI_DELAY / 25) + 1) -#define IXGBE_LOW_DV(_max_frame_tc) \ - (2 * IXGBE_LOW_DV_X540(_max_frame_tc)) - -/* Software ATR hash keys */ -#define IXGBE_ATR_BUCKET_HASH_KEY 0x3DAD14E2 -#define IXGBE_ATR_SIGNATURE_HASH_KEY 0x174D3614 - -/* Software ATR input stream values and masks */ -#define IXGBE_ATR_HASH_MASK 0x7fff -#define IXGBE_ATR_L4TYPE_MASK 0x3 -#define IXGBE_ATR_L4TYPE_UDP 0x1 -#define IXGBE_ATR_L4TYPE_TCP 0x2 -#define IXGBE_ATR_L4TYPE_SCTP 0x3 -#define IXGBE_ATR_L4TYPE_IPV6_MASK 0x4 -enum ixgbe_atr_flow_type { - IXGBE_ATR_FLOW_TYPE_IPV4 = 0x0, - IXGBE_ATR_FLOW_TYPE_UDPV4 = 0x1, - IXGBE_ATR_FLOW_TYPE_TCPV4 = 0x2, - IXGBE_ATR_FLOW_TYPE_SCTPV4 = 0x3, - IXGBE_ATR_FLOW_TYPE_IPV6 = 0x4, - IXGBE_ATR_FLOW_TYPE_UDPV6 = 0x5, - IXGBE_ATR_FLOW_TYPE_TCPV6 = 0x6, - IXGBE_ATR_FLOW_TYPE_SCTPV6 = 0x7, -}; - -/* Flow Director ATR input struct. */ -union ixgbe_atr_input { - /* - * Byte layout in order, all values with MSB first: - * - * vm_pool - 1 byte - * flow_type - 1 byte - * vlan_id - 2 bytes - * src_ip - 16 bytes - * dst_ip - 16 bytes - * src_port - 2 bytes - * dst_port - 2 bytes - * flex_bytes - 2 bytes - * bkt_hash - 2 bytes - */ - struct { - u8 vm_pool; - u8 flow_type; - __be16 vlan_id; - __be32 dst_ip[4]; - __be32 src_ip[4]; - __be16 src_port; - __be16 dst_port; - __be16 flex_bytes; - __be16 bkt_hash; - } formatted; - __be32 dword_stream[11]; -}; - -/* Flow Director compressed ATR hash input struct */ -union ixgbe_atr_hash_dword { - struct { - u8 vm_pool; - u8 flow_type; - __be16 vlan_id; - } formatted; - __be32 ip; - struct { - __be16 src; - __be16 dst; - } port; - __be16 flex_bytes; - __be32 dword; -}; - - -/* - * Unavailable: The FCoE Boot Option ROM is not present in the flash. - * Disabled: Present; boot order is not set for any targets on the port. - * Enabled: Present; boot order is set for at least one target on the port. - */ -enum ixgbe_fcoe_boot_status { - ixgbe_fcoe_bootstatus_disabled = 0, - ixgbe_fcoe_bootstatus_enabled = 1, - ixgbe_fcoe_bootstatus_unavailable = 0xFFFF -}; - -enum ixgbe_eeprom_type { - ixgbe_eeprom_uninitialized = 0, - ixgbe_eeprom_spi, - ixgbe_flash, - ixgbe_eeprom_none /* No NVM support */ -}; - -enum ixgbe_mac_type { - ixgbe_mac_unknown = 0, - ixgbe_mac_82598EB, - ixgbe_mac_82599EB, - ixgbe_mac_X540, - ixgbe_num_macs -}; - -enum ixgbe_phy_type { - ixgbe_phy_unknown = 0, - ixgbe_phy_none, - ixgbe_phy_tn, - ixgbe_phy_aq, - ixgbe_phy_cu_unknown, - ixgbe_phy_qt, - ixgbe_phy_xaui, - ixgbe_phy_nl, - ixgbe_phy_sfp_passive_tyco, - ixgbe_phy_sfp_passive_unknown, - ixgbe_phy_sfp_active_unknown, - ixgbe_phy_sfp_avago, - ixgbe_phy_sfp_ftl, - ixgbe_phy_sfp_ftl_active, - ixgbe_phy_sfp_unknown, - ixgbe_phy_sfp_intel, - ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/ - ixgbe_phy_generic -}; - -/* - * SFP+ module type IDs: - * - * ID Module Type - * ============= - * 0 SFP_DA_CU - * 1 SFP_SR - * 2 SFP_LR - * 3 SFP_DA_CU_CORE0 - 82599-specific - * 4 SFP_DA_CU_CORE1 - 82599-specific - * 5 SFP_SR/LR_CORE0 - 82599-specific - * 6 SFP_SR/LR_CORE1 - 82599-specific - */ -enum ixgbe_sfp_type { - ixgbe_sfp_type_da_cu = 0, - ixgbe_sfp_type_sr = 1, - ixgbe_sfp_type_lr = 2, - ixgbe_sfp_type_da_cu_core0 = 3, - ixgbe_sfp_type_da_cu_core1 = 4, - ixgbe_sfp_type_srlr_core0 = 5, - ixgbe_sfp_type_srlr_core1 = 6, - ixgbe_sfp_type_da_act_lmt_core0 = 7, - ixgbe_sfp_type_da_act_lmt_core1 = 8, - ixgbe_sfp_type_1g_cu_core0 = 9, - ixgbe_sfp_type_1g_cu_core1 = 10, - ixgbe_sfp_type_1g_sx_core0 = 11, - ixgbe_sfp_type_1g_sx_core1 = 12, - ixgbe_sfp_type_not_present = 0xFFFE, - ixgbe_sfp_type_unknown = 0xFFFF -}; - -enum ixgbe_media_type { - ixgbe_media_type_unknown = 0, - ixgbe_media_type_fiber, - ixgbe_media_type_fiber_qsfp, - ixgbe_media_type_fiber_lco, - ixgbe_media_type_copper, - ixgbe_media_type_backplane, - ixgbe_media_type_cx4, - ixgbe_media_type_virtual -}; - -/* Flow Control Settings */ -enum ixgbe_fc_mode { - ixgbe_fc_none = 0, - ixgbe_fc_rx_pause, - ixgbe_fc_tx_pause, - ixgbe_fc_full, - ixgbe_fc_default -}; - -/* Smart Speed Settings */ -#define IXGBE_SMARTSPEED_MAX_RETRIES 3 -enum ixgbe_smart_speed { - ixgbe_smart_speed_auto = 0, - ixgbe_smart_speed_on, - ixgbe_smart_speed_off -}; - -/* PCI bus types */ -enum ixgbe_bus_type { - ixgbe_bus_type_unknown = 0, - ixgbe_bus_type_pci, - ixgbe_bus_type_pcix, - ixgbe_bus_type_pci_express, - ixgbe_bus_type_reserved -}; - -/* PCI bus speeds */ -enum ixgbe_bus_speed { - ixgbe_bus_speed_unknown = 0, - ixgbe_bus_speed_33 = 33, - ixgbe_bus_speed_66 = 66, - ixgbe_bus_speed_100 = 100, - ixgbe_bus_speed_120 = 120, - ixgbe_bus_speed_133 = 133, - ixgbe_bus_speed_2500 = 2500, - ixgbe_bus_speed_5000 = 5000, - ixgbe_bus_speed_8000 = 8000, - ixgbe_bus_speed_reserved -}; - -/* PCI bus widths */ -enum ixgbe_bus_width { - ixgbe_bus_width_unknown = 0, - ixgbe_bus_width_pcie_x1 = 1, - ixgbe_bus_width_pcie_x2 = 2, - ixgbe_bus_width_pcie_x4 = 4, - ixgbe_bus_width_pcie_x8 = 8, - ixgbe_bus_width_32 = 32, - ixgbe_bus_width_64 = 64, - ixgbe_bus_width_reserved -}; - -struct ixgbe_addr_filter_info { - u32 num_mc_addrs; - u32 rar_used_count; - u32 mta_in_use; - u32 overflow_promisc; - bool user_set_promisc; -}; - -/* Bus parameters */ -struct ixgbe_bus_info { - enum ixgbe_bus_speed speed; - enum ixgbe_bus_width width; - enum ixgbe_bus_type type; - - u16 func; - u16 lan_id; -}; - -/* Flow control parameters */ -struct ixgbe_fc_info { - u32 high_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl High-water */ - u32 low_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl Low-water */ - u16 pause_time; /* Flow Control Pause timer */ - bool send_xon; /* Flow control send XON */ - bool strict_ieee; /* Strict IEEE mode */ - bool disable_fc_autoneg; /* Do not autonegotiate FC */ - bool fc_was_autonegged; /* Is current_mode the result of autonegging? */ - enum ixgbe_fc_mode current_mode; /* FC mode in effect */ - enum ixgbe_fc_mode requested_mode; /* FC mode requested by caller */ -}; - -/* Statistics counters collected by the MAC */ -struct ixgbe_hw_stats { - u64 crcerrs; - u64 illerrc; - u64 errbc; - u64 mspdc; - u64 mpctotal; - u64 mpc[8]; - u64 mlfc; - u64 mrfc; - u64 rlec; - u64 lxontxc; - u64 lxonrxc; - u64 lxofftxc; - u64 lxoffrxc; - u64 pxontxc[8]; - u64 pxonrxc[8]; - u64 pxofftxc[8]; - u64 pxoffrxc[8]; - u64 prc64; - u64 prc127; - u64 prc255; - u64 prc511; - u64 prc1023; - u64 prc1522; - u64 gprc; - u64 bprc; - u64 mprc; - u64 gptc; - u64 gorc; - u64 gotc; - u64 rnbc[8]; - u64 ruc; - u64 rfc; - u64 roc; - u64 rjc; - u64 mngprc; - u64 mngpdc; - u64 mngptc; - u64 tor; - u64 tpr; - u64 tpt; - u64 ptc64; - u64 ptc127; - u64 ptc255; - u64 ptc511; - u64 ptc1023; - u64 ptc1522; - u64 mptc; - u64 bptc; - u64 xec; - u64 qprc[16]; - u64 qptc[16]; - u64 qbrc[16]; - u64 qbtc[16]; - u64 qprdc[16]; - u64 pxon2offc[8]; - u64 fdirustat_add; - u64 fdirustat_remove; - u64 fdirfstat_fadd; - u64 fdirfstat_fremove; - u64 fdirmatch; - u64 fdirmiss; - u64 fccrc; - u64 fclast; - u64 fcoerpdc; - u64 fcoeprc; - u64 fcoeptc; - u64 fcoedwrc; - u64 fcoedwtc; - u64 fcoe_noddp; - u64 fcoe_noddp_ext_buff; - u64 ldpcec; - u64 pcrc8ec; - u64 b2ospc; - u64 b2ogprc; - u64 o2bgptc; - u64 o2bspc; -}; - -/* forward declaration */ -struct ixgbe_hw; - -/* iterator type for walking multicast address lists */ -typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr, - u32 *vmdq); - -/* Function pointer table */ -struct ixgbe_eeprom_operations { - s32 (*init_params)(struct ixgbe_hw *); - s32 (*read)(struct ixgbe_hw *, u16, u16 *); - s32 (*read_buffer)(struct ixgbe_hw *, u16, u16, u16 *); - s32 (*write)(struct ixgbe_hw *, u16, u16); - s32 (*write_buffer)(struct ixgbe_hw *, u16, u16, u16 *); - s32 (*validate_checksum)(struct ixgbe_hw *, u16 *); - s32 (*update_checksum)(struct ixgbe_hw *); - u16 (*calc_checksum)(struct ixgbe_hw *); -}; - -struct ixgbe_mac_operations { - s32 (*init_hw)(struct ixgbe_hw *); - s32 (*reset_hw)(struct ixgbe_hw *); - s32 (*start_hw)(struct ixgbe_hw *); - s32 (*clear_hw_cntrs)(struct ixgbe_hw *); - enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *); - u32 (*get_supported_physical_layer)(struct ixgbe_hw *); - s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *); - s32 (*get_san_mac_addr)(struct ixgbe_hw *, u8 *); - s32 (*set_san_mac_addr)(struct ixgbe_hw *, u8 *); - s32 (*get_device_caps)(struct ixgbe_hw *, u16 *); - s32 (*get_wwn_prefix)(struct ixgbe_hw *, u16 *, u16 *); - s32 (*get_fcoe_boot_status)(struct ixgbe_hw *, u16 *); - s32 (*stop_adapter)(struct ixgbe_hw *); - s32 (*get_bus_info)(struct ixgbe_hw *); - void (*set_lan_id)(struct ixgbe_hw *); - s32 (*read_analog_reg8)(struct ixgbe_hw*, u32, u8*); - s32 (*write_analog_reg8)(struct ixgbe_hw*, u32, u8); - s32 (*setup_sfp)(struct ixgbe_hw *); - s32 (*enable_rx_dma)(struct ixgbe_hw *, u32); - s32 (*disable_sec_rx_path)(struct ixgbe_hw *); - s32 (*enable_sec_rx_path)(struct ixgbe_hw *); - s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u16); - void (*release_swfw_sync)(struct ixgbe_hw *, u16); - - /* Link */ - void (*disable_tx_laser)(struct ixgbe_hw *); - void (*enable_tx_laser)(struct ixgbe_hw *); - void (*flap_tx_laser)(struct ixgbe_hw *); - s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); - s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool); - s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *, - bool *); - - /* Packet Buffer manipulation */ - void (*setup_rxpba)(struct ixgbe_hw *, int, u32, int); - - /* LED */ - s32 (*led_on)(struct ixgbe_hw *, u32); - s32 (*led_off)(struct ixgbe_hw *, u32); - s32 (*blink_led_start)(struct ixgbe_hw *, u32); - s32 (*blink_led_stop)(struct ixgbe_hw *, u32); - - /* RAR, Multicast, VLAN */ - s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32); - s32 (*set_uc_addr)(struct ixgbe_hw *, u32, u8 *); - s32 (*clear_rar)(struct ixgbe_hw *, u32); - s32 (*insert_mac_addr)(struct ixgbe_hw *, u8 *, u32); - s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32); - s32 (*set_vmdq_san_mac)(struct ixgbe_hw *, u32); - s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32); - s32 (*init_rx_addrs)(struct ixgbe_hw *); - s32 (*update_uc_addr_list)(struct ixgbe_hw *, u8 *, u32, - ixgbe_mc_addr_itr); - s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32, - ixgbe_mc_addr_itr, bool clear); - s32 (*enable_mc)(struct ixgbe_hw *); - s32 (*disable_mc)(struct ixgbe_hw *); - s32 (*clear_vfta)(struct ixgbe_hw *); - s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool); - s32 (*set_vlvf)(struct ixgbe_hw *, u32, u32, bool, bool *); - s32 (*init_uta_tables)(struct ixgbe_hw *); - void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int); - void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int); - - /* Flow Control */ - s32 (*fc_enable)(struct ixgbe_hw *); - - /* Manageability interface */ - s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8); - s32 (*get_thermal_sensor_data)(struct ixgbe_hw *); - s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw); -}; - -struct ixgbe_phy_operations { - s32 (*identify)(struct ixgbe_hw *); - s32 (*identify_sfp)(struct ixgbe_hw *); - s32 (*init)(struct ixgbe_hw *); - s32 (*reset)(struct ixgbe_hw *); - s32 (*read_reg)(struct ixgbe_hw *, u32, u32, u16 *); - s32 (*write_reg)(struct ixgbe_hw *, u32, u32, u16); - s32 (*setup_link)(struct ixgbe_hw *); - s32 (*setup_link_speed)(struct ixgbe_hw *, ixgbe_link_speed, bool, - bool); - s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *); - s32 (*get_firmware_version)(struct ixgbe_hw *, u16 *); - s32 (*read_i2c_byte)(struct ixgbe_hw *, u8, u8, u8 *); - s32 (*write_i2c_byte)(struct ixgbe_hw *, u8, u8, u8); - s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *); - s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8); - void (*i2c_bus_clear)(struct ixgbe_hw *); - s32 (*check_overtemp)(struct ixgbe_hw *); -}; - -struct ixgbe_eeprom_info { - struct ixgbe_eeprom_operations ops; - enum ixgbe_eeprom_type type; - u32 semaphore_delay; - u16 word_size; - u16 address_bits; - u16 word_page_size; -}; - -#define IXGBE_FLAGS_DOUBLE_RESET_REQUIRED 0x01 -struct ixgbe_mac_info { - struct ixgbe_mac_operations ops; - enum ixgbe_mac_type type; - u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; - u8 perm_addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; - u8 san_addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; - /* prefix for World Wide Node Name (WWNN) */ - u16 wwnn_prefix; - /* prefix for World Wide Port Name (WWPN) */ - u16 wwpn_prefix; -#define IXGBE_MAX_MTA 128 - u32 mta_shadow[IXGBE_MAX_MTA]; - s32 mc_filter_type; - u32 mcft_size; - u32 vft_size; - u32 num_rar_entries; - u32 rar_highwater; - u32 rx_pb_size; - u32 max_tx_queues; - u32 max_rx_queues; - u32 orig_autoc; - u8 san_mac_rar_index; - u32 orig_autoc2; - u16 max_msix_vectors; - bool arc_subsystem_valid; - bool orig_link_settings_stored; - bool autotry_restart; - u8 flags; - struct ixgbe_thermal_sensor_data thermal_sensor_data; -}; - -struct ixgbe_phy_info { - struct ixgbe_phy_operations ops; - enum ixgbe_phy_type type; - u32 addr; - u32 id; - enum ixgbe_sfp_type sfp_type; - bool sfp_setup_needed; - u32 revision; - enum ixgbe_media_type media_type; - bool reset_disable; - ixgbe_autoneg_advertised autoneg_advertised; - enum ixgbe_smart_speed smart_speed; - bool smart_speed_active; - bool multispeed_fiber; - bool reset_if_overtemp; - bool qsfp_shared_i2c_bus; -}; - -#include "ixgbe_mbx.h" - -struct ixgbe_mbx_operations { - void (*init_params)(struct ixgbe_hw *hw); - s32 (*read)(struct ixgbe_hw *, u32 *, u16, u16); - s32 (*write)(struct ixgbe_hw *, u32 *, u16, u16); - s32 (*read_posted)(struct ixgbe_hw *, u32 *, u16, u16); - s32 (*write_posted)(struct ixgbe_hw *, u32 *, u16, u16); - s32 (*check_for_msg)(struct ixgbe_hw *, u16); - s32 (*check_for_ack)(struct ixgbe_hw *, u16); - s32 (*check_for_rst)(struct ixgbe_hw *, u16); -}; - -struct ixgbe_mbx_stats { - u32 msgs_tx; - u32 msgs_rx; - - u32 acks; - u32 reqs; - u32 rsts; -}; - -struct ixgbe_mbx_info { - struct ixgbe_mbx_operations ops; - struct ixgbe_mbx_stats stats; - u32 timeout; - u32 udelay; - u32 v2p_mailbox; - u16 size; -}; - -struct ixgbe_hw { - u8 __iomem *hw_addr; - void *back; - struct ixgbe_mac_info mac; - struct ixgbe_addr_filter_info addr_ctrl; - struct ixgbe_fc_info fc; - struct ixgbe_phy_info phy; - struct ixgbe_eeprom_info eeprom; - struct ixgbe_bus_info bus; - struct ixgbe_mbx_info mbx; - u16 device_id; - u16 vendor_id; - u16 subsystem_device_id; - u16 subsystem_vendor_id; - u8 revision_id; - bool adapter_stopped; - bool force_full_reset; - bool allow_unsupported_sfp; -}; - -#define ixgbe_call_func(hw, func, params, error) \ - (func != NULL) ? func params : error - - -/* Error Codes */ -#define IXGBE_ERR_EEPROM -1 -#define IXGBE_ERR_EEPROM_CHECKSUM -2 -#define IXGBE_ERR_PHY -3 -#define IXGBE_ERR_CONFIG -4 -#define IXGBE_ERR_PARAM -5 -#define IXGBE_ERR_MAC_TYPE -6 -#define IXGBE_ERR_UNKNOWN_PHY -7 -#define IXGBE_ERR_LINK_SETUP -8 -#define IXGBE_ERR_ADAPTER_STOPPED -9 -#define IXGBE_ERR_INVALID_MAC_ADDR -10 -#define IXGBE_ERR_DEVICE_NOT_SUPPORTED -11 -#define IXGBE_ERR_MASTER_REQUESTS_PENDING -12 -#define IXGBE_ERR_INVALID_LINK_SETTINGS -13 -#define IXGBE_ERR_AUTONEG_NOT_COMPLETE -14 -#define IXGBE_ERR_RESET_FAILED -15 -#define IXGBE_ERR_SWFW_SYNC -16 -#define IXGBE_ERR_PHY_ADDR_INVALID -17 -#define IXGBE_ERR_I2C -18 -#define IXGBE_ERR_SFP_NOT_SUPPORTED -19 -#define IXGBE_ERR_SFP_NOT_PRESENT -20 -#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT -21 -#define IXGBE_ERR_NO_SAN_ADDR_PTR -22 -#define IXGBE_ERR_FDIR_REINIT_FAILED -23 -#define IXGBE_ERR_EEPROM_VERSION -24 -#define IXGBE_ERR_NO_SPACE -25 -#define IXGBE_ERR_OVERTEMP -26 -#define IXGBE_ERR_FC_NOT_NEGOTIATED -27 -#define IXGBE_ERR_FC_NOT_SUPPORTED -28 -#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE -30 -#define IXGBE_ERR_PBA_SECTION -31 -#define IXGBE_ERR_INVALID_ARGUMENT -32 -#define IXGBE_ERR_HOST_INTERFACE_COMMAND -33 -#define IXGBE_ERR_OUT_OF_MEM -34 - -#define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF - -#define UNREFERENCED_XPARAMETER - -#endif /* _IXGBE_TYPE_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c deleted file mode 100644 index 07b219a1..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c +++ /dev/null @@ -1,922 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "ixgbe_x540.h" -#include "ixgbe_type.h" -#include "ixgbe_api.h" -#include "ixgbe_common.h" -#include "ixgbe_phy.h" - -static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw); -static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw); -static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw); -static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw); - -/** - * ixgbe_init_ops_X540 - Inits func ptrs and MAC type - * @hw: pointer to hardware structure - * - * Initialize the function pointers and assign the MAC type for X540. - * Does not touch the hardware. - **/ -s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw) -{ - struct ixgbe_mac_info *mac = &hw->mac; - struct ixgbe_phy_info *phy = &hw->phy; - struct ixgbe_eeprom_info *eeprom = &hw->eeprom; - s32 ret_val; - - ret_val = ixgbe_init_phy_ops_generic(hw); - ret_val = ixgbe_init_ops_generic(hw); - - - /* EEPROM */ - eeprom->ops.init_params = &ixgbe_init_eeprom_params_X540; - eeprom->ops.read = &ixgbe_read_eerd_X540; - eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_X540; - eeprom->ops.write = &ixgbe_write_eewr_X540; - eeprom->ops.write_buffer = &ixgbe_write_eewr_buffer_X540; - eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_X540; - eeprom->ops.validate_checksum = &ixgbe_validate_eeprom_checksum_X540; - eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_X540; - - /* PHY */ - phy->ops.init = &ixgbe_init_phy_ops_generic; - phy->ops.reset = NULL; - - /* MAC */ - mac->ops.reset_hw = &ixgbe_reset_hw_X540; - mac->ops.get_media_type = &ixgbe_get_media_type_X540; - mac->ops.get_supported_physical_layer = - &ixgbe_get_supported_physical_layer_X540; - mac->ops.read_analog_reg8 = NULL; - mac->ops.write_analog_reg8 = NULL; - mac->ops.start_hw = &ixgbe_start_hw_X540; - mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic; - mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic; - mac->ops.get_device_caps = &ixgbe_get_device_caps_generic; - mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic; - mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic; - mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync_X540; - mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync_X540; - mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic; - mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic; - - /* RAR, Multicast, VLAN */ - mac->ops.set_vmdq = &ixgbe_set_vmdq_generic; - mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic; - mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic; - mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic; - mac->rar_highwater = 1; - mac->ops.set_vfta = &ixgbe_set_vfta_generic; - mac->ops.set_vlvf = &ixgbe_set_vlvf_generic; - mac->ops.clear_vfta = &ixgbe_clear_vfta_generic; - mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic; - mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing; - mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing; - - /* Link */ - mac->ops.get_link_capabilities = - &ixgbe_get_copper_link_capabilities_generic; - mac->ops.setup_link = &ixgbe_setup_mac_link_X540; - mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic; - mac->ops.check_link = &ixgbe_check_mac_link_generic; - - mac->mcft_size = 128; - mac->vft_size = 128; - mac->num_rar_entries = 128; - mac->rx_pb_size = 384; - mac->max_tx_queues = 128; - mac->max_rx_queues = 128; - mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw); - - /* - * FWSM register - * ARC supported; valid only if manageability features are - * enabled. - */ - mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) & - IXGBE_FWSM_MODE_MASK) ? true : false; - - //hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf; - - /* LEDs */ - mac->ops.blink_led_start = ixgbe_blink_led_start_X540; - mac->ops.blink_led_stop = ixgbe_blink_led_stop_X540; - - /* Manageability interface */ - mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic; - - return ret_val; -} - -/** - * ixgbe_get_link_capabilities_X540 - Determines link capabilities - * @hw: pointer to hardware structure - * @speed: pointer to link speed - * @autoneg: true when autoneg or autotry is enabled - * - * Determines the link capabilities by reading the AUTOC register. - **/ -s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, - bool *autoneg) -{ - ixgbe_get_copper_link_capabilities_generic(hw, speed, autoneg); - - return 0; -} - -/** - * ixgbe_get_media_type_X540 - Get media type - * @hw: pointer to hardware structure - * - * Returns the media type (fiber, copper, backplane) - **/ -enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw) -{ - return ixgbe_media_type_copper; -} - -/** - * ixgbe_setup_mac_link_X540 - Sets the auto advertised capabilities - * @hw: pointer to hardware structure - * @speed: new link speed - * @autoneg: true if autonegotiation enabled - * @autoneg_wait_to_complete: true when waiting for completion is needed - **/ -s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, - ixgbe_link_speed speed, bool autoneg, - bool autoneg_wait_to_complete) -{ - return hw->phy.ops.setup_link_speed(hw, speed, autoneg, - autoneg_wait_to_complete); -} - -/** - * ixgbe_reset_hw_X540 - Perform hardware reset - * @hw: pointer to hardware structure - * - * Resets the hardware by resetting the transmit and receive units, masks - * and clears all interrupts, and perform a reset. - **/ -s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw) -{ - s32 status = 0; - - /* - * Userland DPDK takes the ownershiop of device - * Kernel driver here used as the simple path for ethtool only - * Won't real reset device anyway - */ -#if 0 - u32 ctrl, i; - - /* Call adapter stop to disable tx/rx and clear interrupts */ - status = hw->mac.ops.stop_adapter(hw); - if (status != 0) - goto reset_hw_out; - - /* flush pending Tx transactions */ - ixgbe_clear_tx_pending(hw); - -mac_reset_top: - ctrl = IXGBE_CTRL_RST; - ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); - IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); - IXGBE_WRITE_FLUSH(hw); - - /* Poll for reset bit to self-clear indicating reset is complete */ - for (i = 0; i < 10; i++) { - udelay(1); - ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); - if (!(ctrl & IXGBE_CTRL_RST_MASK)) - break; - } - - if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; - hw_dbg(hw, "Reset polling failed to complete.\n"); - } - msleep(100); - - /* - * Double resets are required for recovery from certain error - * conditions. Between resets, it is necessary to stall to allow time - * for any pending HW events to complete. - */ - if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { - hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; - goto mac_reset_top; - } - - /* Set the Rx packet buffer size. */ - IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), 384 << IXGBE_RXPBSIZE_SHIFT); - -#endif - - /* Store the permanent mac address */ - hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); - - /* - * Store MAC address from RAR0, clear receive address registers, and - * clear the multicast table. Also reset num_rar_entries to 128, - * since we modify this value when programming the SAN MAC address. - */ - hw->mac.num_rar_entries = 128; - hw->mac.ops.init_rx_addrs(hw); - - /* Store the permanent SAN mac address */ - hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr); - - /* Add the SAN MAC address to the RAR only if it's a valid address */ - if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { - hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1, - hw->mac.san_addr, 0, IXGBE_RAH_AV); - - /* Save the SAN MAC RAR index */ - hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; - - /* Reserve the last RAR for the SAN MAC address */ - hw->mac.num_rar_entries--; - } - - /* Store the alternative WWNN/WWPN prefix */ - hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix, - &hw->mac.wwpn_prefix); - -//reset_hw_out: - return status; -} - -/** - * ixgbe_start_hw_X540 - Prepare hardware for Tx/Rx - * @hw: pointer to hardware structure - * - * Starts the hardware using the generic start_hw function - * and the generation start_hw function. - * Then performs revision-specific operations, if any. - **/ -s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw) -{ - s32 ret_val = 0; - - ret_val = ixgbe_start_hw_generic(hw); - if (ret_val != 0) - goto out; - - ret_val = ixgbe_start_hw_gen2(hw); - -out: - return ret_val; -} - -/** - * ixgbe_get_supported_physical_layer_X540 - Returns physical layer type - * @hw: pointer to hardware structure - * - * Determines physical layer capabilities of the current configuration. - **/ -u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw) -{ - u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; - u16 ext_ability = 0; - - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); - if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY) - physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; - if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) - physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; - if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY) - physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; - - return physical_layer; -} - -/** - * ixgbe_init_eeprom_params_X540 - Initialize EEPROM params - * @hw: pointer to hardware structure - * - * Initializes the EEPROM parameters ixgbe_eeprom_info within the - * ixgbe_hw struct in order to set up EEPROM access. - **/ -s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw) -{ - struct ixgbe_eeprom_info *eeprom = &hw->eeprom; - u32 eec; - u16 eeprom_size; - - if (eeprom->type == ixgbe_eeprom_uninitialized) { - eeprom->semaphore_delay = 10; - eeprom->type = ixgbe_flash; - - eec = IXGBE_READ_REG(hw, IXGBE_EEC); - eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> - IXGBE_EEC_SIZE_SHIFT); - eeprom->word_size = 1 << (eeprom_size + - IXGBE_EEPROM_WORD_SIZE_SHIFT); - - hw_dbg(hw, "Eeprom params: type = %d, size = %d\n", - eeprom->type, eeprom->word_size); - } - - return 0; -} - -/** - * ixgbe_read_eerd_X540- Read EEPROM word using EERD - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to read - * @data: word read from the EEPROM - * - * Reads a 16 bit word from the EEPROM using the EERD register. - **/ -s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data) -{ - s32 status = 0; - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == - 0) - status = ixgbe_read_eerd_generic(hw, offset, data); - else - status = IXGBE_ERR_SWFW_SYNC; - - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - return status; -} - -/** - * ixgbe_read_eerd_buffer_X540- Read EEPROM word(s) using EERD - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to read - * @words: number of words - * @data: word(s) read from the EEPROM - * - * Reads a 16 bit word(s) from the EEPROM using the EERD register. - **/ -s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, - u16 offset, u16 words, u16 *data) -{ - s32 status = 0; - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == - 0) - status = ixgbe_read_eerd_buffer_generic(hw, offset, - words, data); - else - status = IXGBE_ERR_SWFW_SYNC; - - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - return status; -} - -/** - * ixgbe_write_eewr_X540 - Write EEPROM word using EEWR - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to write - * @data: word write to the EEPROM - * - * Write a 16 bit word to the EEPROM using the EEWR register. - **/ -s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data) -{ - s32 status = 0; - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == - 0) - status = ixgbe_write_eewr_generic(hw, offset, data); - else - status = IXGBE_ERR_SWFW_SYNC; - - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - return status; -} - -/** - * ixgbe_write_eewr_buffer_X540 - Write EEPROM word(s) using EEWR - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to write - * @words: number of words - * @data: word(s) write to the EEPROM - * - * Write a 16 bit word(s) to the EEPROM using the EEWR register. - **/ -s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, - u16 offset, u16 words, u16 *data) -{ - s32 status = 0; - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == - 0) - status = ixgbe_write_eewr_buffer_generic(hw, offset, - words, data); - else - status = IXGBE_ERR_SWFW_SYNC; - - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - return status; -} - -/** - * ixgbe_calc_eeprom_checksum_X540 - Calculates and returns the checksum - * - * This function does not use synchronization for EERD and EEWR. It can - * be used internally by function which utilize ixgbe_acquire_swfw_sync_X540. - * - * @hw: pointer to hardware structure - **/ -u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) -{ - u16 i; - u16 j; - u16 checksum = 0; - u16 length = 0; - u16 pointer = 0; - u16 word = 0; - - /* - * Do not use hw->eeprom.ops.read because we do not want to take - * the synchronization semaphores here. Instead use - * ixgbe_read_eerd_generic - */ - - /* Include 0x0-0x3F in the checksum */ - for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) { - if (ixgbe_read_eerd_generic(hw, i, &word) != 0) { - hw_dbg(hw, "EEPROM read failed\n"); - break; - } - checksum += word; - } - - /* - * Include all data from pointers 0x3, 0x6-0xE. This excludes the - * FW, PHY module, and PCIe Expansion/Option ROM pointers. - */ - for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) { - if (i == IXGBE_PHY_PTR || i == IXGBE_OPTION_ROM_PTR) - continue; - - if (ixgbe_read_eerd_generic(hw, i, &pointer) != 0) { - hw_dbg(hw, "EEPROM read failed\n"); - break; - } - - /* Skip pointer section if the pointer is invalid. */ - if (pointer == 0xFFFF || pointer == 0 || - pointer >= hw->eeprom.word_size) - continue; - - if (ixgbe_read_eerd_generic(hw, pointer, &length) != - 0) { - hw_dbg(hw, "EEPROM read failed\n"); - break; - } - - /* Skip pointer section if length is invalid. */ - if (length == 0xFFFF || length == 0 || - (pointer + length) >= hw->eeprom.word_size) - continue; - - for (j = pointer+1; j <= pointer+length; j++) { - if (ixgbe_read_eerd_generic(hw, j, &word) != - 0) { - hw_dbg(hw, "EEPROM read failed\n"); - break; - } - checksum += word; - } - } - - checksum = (u16)IXGBE_EEPROM_SUM - checksum; - - return checksum; -} - -/** - * ixgbe_validate_eeprom_checksum_X540 - Validate EEPROM checksum - * @hw: pointer to hardware structure - * @checksum_val: calculated checksum - * - * Performs checksum calculation and validates the EEPROM checksum. If the - * caller does not need checksum_val, the value can be NULL. - **/ -s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, - u16 *checksum_val) -{ - s32 status; - u16 checksum; - u16 read_checksum = 0; - - /* - * Read the first word from the EEPROM. If this times out or fails, do - * not continue or we could be in for a very long wait while every - * EEPROM read fails - */ - status = hw->eeprom.ops.read(hw, 0, &checksum); - - if (status != 0) { - hw_dbg(hw, "EEPROM read failed\n"); - goto out; - } - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == - 0) { - checksum = hw->eeprom.ops.calc_checksum(hw); - - /* - * Do not use hw->eeprom.ops.read because we do not want to take - * the synchronization semaphores twice here. - */ - ixgbe_read_eerd_generic(hw, IXGBE_EEPROM_CHECKSUM, - &read_checksum); - - /* - * Verify read checksum from EEPROM is the same as - * calculated checksum - */ - if (read_checksum != checksum) - status = IXGBE_ERR_EEPROM_CHECKSUM; - - /* If the user cares, return the calculated checksum */ - if (checksum_val) - *checksum_val = checksum; - } else { - status = IXGBE_ERR_SWFW_SYNC; - } - - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); -out: - return status; -} - -/** - * ixgbe_update_eeprom_checksum_X540 - Updates the EEPROM checksum and flash - * @hw: pointer to hardware structure - * - * After writing EEPROM to shadow RAM using EEWR register, software calculates - * checksum and updates the EEPROM and instructs the hardware to update - * the flash. - **/ -s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw) -{ - s32 status; - u16 checksum; - - /* - * Read the first word from the EEPROM. If this times out or fails, do - * not continue or we could be in for a very long wait while every - * EEPROM read fails - */ - status = hw->eeprom.ops.read(hw, 0, &checksum); - - if (status != 0) - hw_dbg(hw, "EEPROM read failed\n"); - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == - 0) { - checksum = hw->eeprom.ops.calc_checksum(hw); - - /* - * Do not use hw->eeprom.ops.write because we do not want to - * take the synchronization semaphores twice here. - */ - status = ixgbe_write_eewr_generic(hw, IXGBE_EEPROM_CHECKSUM, - checksum); - - if (status == 0) - status = ixgbe_update_flash_X540(hw); - else - status = IXGBE_ERR_SWFW_SYNC; - } - - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - - return status; -} - -/** - * ixgbe_update_flash_X540 - Instruct HW to copy EEPROM to Flash device - * @hw: pointer to hardware structure - * - * Set FLUP (bit 23) of the EEC register to instruct Hardware to copy - * EEPROM from shadow RAM to the flash device. - **/ -static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw) -{ - u32 flup; - s32 status = IXGBE_ERR_EEPROM; - - status = ixgbe_poll_flash_update_done_X540(hw); - if (status == IXGBE_ERR_EEPROM) { - hw_dbg(hw, "Flash update time out\n"); - goto out; - } - - flup = IXGBE_READ_REG(hw, IXGBE_EEC) | IXGBE_EEC_FLUP; - IXGBE_WRITE_REG(hw, IXGBE_EEC, flup); - - status = ixgbe_poll_flash_update_done_X540(hw); - if (status == 0) - hw_dbg(hw, "Flash update complete\n"); - else - hw_dbg(hw, "Flash update time out\n"); - - if (hw->revision_id == 0) { - flup = IXGBE_READ_REG(hw, IXGBE_EEC); - - if (flup & IXGBE_EEC_SEC1VAL) { - flup |= IXGBE_EEC_FLUP; - IXGBE_WRITE_REG(hw, IXGBE_EEC, flup); - } - - status = ixgbe_poll_flash_update_done_X540(hw); - if (status == 0) - hw_dbg(hw, "Flash update complete\n"); - else - hw_dbg(hw, "Flash update time out\n"); - } -out: - return status; -} - -/** - * ixgbe_poll_flash_update_done_X540 - Poll flash update status - * @hw: pointer to hardware structure - * - * Polls the FLUDONE (bit 26) of the EEC Register to determine when the - * flash update is done. - **/ -static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw) -{ - u32 i; - u32 reg; - s32 status = IXGBE_ERR_EEPROM; - - for (i = 0; i < IXGBE_FLUDONE_ATTEMPTS; i++) { - reg = IXGBE_READ_REG(hw, IXGBE_EEC); - if (reg & IXGBE_EEC_FLUDONE) { - status = 0; - break; - } - udelay(5); - } - return status; -} - -/** - * ixgbe_acquire_swfw_sync_X540 - Acquire SWFW semaphore - * @hw: pointer to hardware structure - * @mask: Mask to specify which semaphore to acquire - * - * Acquires the SWFW semaphore thought the SW_FW_SYNC register for - * the specified function (CSR, PHY0, PHY1, NVM, Flash) - **/ -s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 5; - u32 hwmask = 0; - u32 timeout = 200; - u32 i; - s32 ret_val = 0; - - if (swmask == IXGBE_GSSR_EEP_SM) - hwmask = IXGBE_GSSR_FLASH_SM; - - /* SW only mask doesn't have FW bit pair */ - if (swmask == IXGBE_GSSR_SW_MNG_SM) - fwmask = 0; - - for (i = 0; i < timeout; i++) { - /* - * SW NVM semaphore bit is used for access to all - * SW_FW_SYNC bits (not just NVM) - */ - if (ixgbe_get_swfw_sync_semaphore(hw)) { - ret_val = IXGBE_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); - if (!(swfw_sync & (fwmask | swmask | hwmask))) { - swfw_sync |= swmask; - IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync); - ixgbe_release_swfw_sync_semaphore(hw); - msleep(5); - goto out; - } else { - /* - * Firmware currently using resource (fwmask), hardware - * currently using resource (hwmask), or other software - * thread currently using resource (swmask) - */ - ixgbe_release_swfw_sync_semaphore(hw); - msleep(5); - } - } - - /* Failed to get SW only semaphore */ - if (swmask == IXGBE_GSSR_SW_MNG_SM) { - ret_val = IXGBE_ERR_SWFW_SYNC; - goto out; - } - - /* If the resource is not released by the FW/HW the SW can assume that - * the FW/HW malfunctions. In that case the SW should sets the SW bit(s) - * of the requested resource(s) while ignoring the corresponding FW/HW - * bits in the SW_FW_SYNC register. - */ - swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); - if (swfw_sync & (fwmask | hwmask)) { - if (ixgbe_get_swfw_sync_semaphore(hw)) { - ret_val = IXGBE_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync |= swmask; - IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync); - ixgbe_release_swfw_sync_semaphore(hw); - msleep(5); - } - -out: - return ret_val; -} - -/** - * ixgbe_release_swfw_sync_X540 - Release SWFW semaphore - * @hw: pointer to hardware structure - * @mask: Mask to specify which semaphore to release - * - * Releases the SWFW semaphore through the SW_FW_SYNC register - * for the specified function (CSR, PHY0, PHY1, EVM, Flash) - **/ -void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - - ixgbe_get_swfw_sync_semaphore(hw); - - swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); - swfw_sync &= ~swmask; - IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync); - - ixgbe_release_swfw_sync_semaphore(hw); - msleep(5); -} - -/** - * ixgbe_get_nvm_semaphore - Get hardware semaphore - * @hw: pointer to hardware structure - * - * Sets the hardware semaphores so SW/FW can gain control of shared resources - **/ -static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) -{ - s32 status = IXGBE_ERR_EEPROM; - u32 timeout = 2000; - u32 i; - u32 swsm; - - /* Get SMBI software semaphore between device drivers first */ - for (i = 0; i < timeout; i++) { - /* - * If the SMBI bit is 0 when we read it, then the bit will be - * set and we have the semaphore - */ - swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); - if (!(swsm & IXGBE_SWSM_SMBI)) { - status = 0; - break; - } - udelay(50); - } - - /* Now get the semaphore between SW/FW through the REGSMP bit */ - if (status == 0) { - for (i = 0; i < timeout; i++) { - swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); - if (!(swsm & IXGBE_SWFW_REGSMP)) - break; - - udelay(50); - } - - /* - * Release semaphores and return error if SW NVM semaphore - * was not granted because we don't have access to the EEPROM - */ - if (i >= timeout) { - hw_dbg(hw, "REGSMP Software NVM semaphore not " - "granted.\n"); - ixgbe_release_swfw_sync_semaphore(hw); - status = IXGBE_ERR_EEPROM; - } - } else { - hw_dbg(hw, "Software semaphore SMBI between device drivers " - "not granted.\n"); - } - - return status; -} - -/** - * ixgbe_release_nvm_semaphore - Release hardware semaphore - * @hw: pointer to hardware structure - * - * This function clears hardware semaphore bits. - **/ -static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw) -{ - u32 swsm; - - /* Release both semaphores by writing 0 to the bits REGSMP and SMBI */ - - swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); - swsm &= ~IXGBE_SWSM_SMBI; - IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); - - swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); - swsm &= ~IXGBE_SWFW_REGSMP; - IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swsm); - - IXGBE_WRITE_FLUSH(hw); -} - -/** - * ixgbe_blink_led_start_X540 - Blink LED based on index. - * @hw: pointer to hardware structure - * @index: led number to blink - * - * Devices that implement the version 2 interface: - * X540 - **/ -s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index) -{ - u32 macc_reg; - u32 ledctl_reg; - ixgbe_link_speed speed; - bool link_up; - - /* - * Link should be up in order for the blink bit in the LED control - * register to work. Force link and speed in the MAC if link is down. - * This will be reversed when we stop the blinking. - */ - hw->mac.ops.check_link(hw, &speed, &link_up, false); - if (link_up == false) { - macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC); - macc_reg |= IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS; - IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg); - } - /* Set the LED to LINK_UP + BLINK. */ - ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); - ledctl_reg &= ~IXGBE_LED_MODE_MASK(index); - ledctl_reg |= IXGBE_LED_BLINK(index); - IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg); - IXGBE_WRITE_FLUSH(hw); - - return 0; -} - -/** - * ixgbe_blink_led_stop_X540 - Stop blinking LED based on index. - * @hw: pointer to hardware structure - * @index: led number to stop blinking - * - * Devices that implement the version 2 interface: - * X540 - **/ -s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index) -{ - u32 macc_reg; - u32 ledctl_reg; - - /* Restore the LED to its default value. */ - ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); - ledctl_reg &= ~IXGBE_LED_MODE_MASK(index); - ledctl_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index); - ledctl_reg &= ~IXGBE_LED_BLINK(index); - IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg); - - /* Unforce link and speed in the MAC. */ - macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC); - macc_reg &= ~(IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS); - IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg); - IXGBE_WRITE_FLUSH(hw); - - return 0; -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h deleted file mode 100644 index 96020911..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _IXGBE_X540_H_ -#define _IXGBE_X540_H_ - -#include "ixgbe_type.h" - -s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw, - ixgbe_link_speed *speed, bool *autoneg); -enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw); -s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed, - bool autoneg, bool link_up_wait_to_complete); -s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw); -s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw); -u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw); - -s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw); -s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data); -s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words, - u16 *data); -s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data); -s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words, - u16 *data); -s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw); -s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, u16 *checksum_val); -u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw); - -s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask); -void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask); - -s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index); -s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index); -#endif /* _IXGBE_X540_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c deleted file mode 100644 index 6c994576..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c +++ /dev/null @@ -1,1231 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "ixgbe.h" -#include "kcompat.h" - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) -/* From lib/vsprintf.c */ -#include - -static int skip_atoi(const char **s) -{ - int i=0; - - while (isdigit(**s)) - i = i*10 + *((*s)++) - '0'; - return i; -} - -#define _kc_ZEROPAD 1 /* pad with zero */ -#define _kc_SIGN 2 /* unsigned/signed long */ -#define _kc_PLUS 4 /* show plus */ -#define _kc_SPACE 8 /* space if plus */ -#define _kc_LEFT 16 /* left justified */ -#define _kc_SPECIAL 32 /* 0x */ -#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ - -static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) -{ - char c,sign,tmp[66]; - const char *digits; - const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; - const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - int i; - - digits = (type & _kc_LARGE) ? large_digits : small_digits; - if (type & _kc_LEFT) - type &= ~_kc_ZEROPAD; - if (base < 2 || base > 36) - return 0; - c = (type & _kc_ZEROPAD) ? '0' : ' '; - sign = 0; - if (type & _kc_SIGN) { - if (num < 0) { - sign = '-'; - num = -num; - size--; - } else if (type & _kc_PLUS) { - sign = '+'; - size--; - } else if (type & _kc_SPACE) { - sign = ' '; - size--; - } - } - if (type & _kc_SPECIAL) { - if (base == 16) - size -= 2; - else if (base == 8) - size--; - } - i = 0; - if (num == 0) - tmp[i++]='0'; - else while (num != 0) - tmp[i++] = digits[do_div(num,base)]; - if (i > precision) - precision = i; - size -= precision; - if (!(type&(_kc_ZEROPAD+_kc_LEFT))) { - while(size-->0) { - if (buf <= end) - *buf = ' '; - ++buf; - } - } - if (sign) { - if (buf <= end) - *buf = sign; - ++buf; - } - if (type & _kc_SPECIAL) { - if (base==8) { - if (buf <= end) - *buf = '0'; - ++buf; - } else if (base==16) { - if (buf <= end) - *buf = '0'; - ++buf; - if (buf <= end) - *buf = digits[33]; - ++buf; - } - } - if (!(type & _kc_LEFT)) { - while (size-- > 0) { - if (buf <= end) - *buf = c; - ++buf; - } - } - while (i < precision--) { - if (buf <= end) - *buf = '0'; - ++buf; - } - while (i-- > 0) { - if (buf <= end) - *buf = tmp[i]; - ++buf; - } - while (size-- > 0) { - if (buf <= end) - *buf = ' '; - ++buf; - } - return buf; -} - -int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) -{ - int len; - unsigned long long num; - int i, base; - char *str, *end, c; - const char *s; - - int flags; /* flags to number() */ - - int field_width; /* width of output field */ - int precision; /* min. # of digits for integers; max - number of chars for from string */ - int qualifier; /* 'h', 'l', or 'L' for integer fields */ - /* 'z' support added 23/7/1999 S.H. */ - /* 'z' changed to 'Z' --davidm 1/25/99 */ - - str = buf; - end = buf + size - 1; - - if (end < buf - 1) { - end = ((void *) -1); - size = end - buf + 1; - } - - for (; *fmt ; ++fmt) { - if (*fmt != '%') { - if (str <= end) - *str = *fmt; - ++str; - continue; - } - - /* process flags */ - flags = 0; - repeat: - ++fmt; /* this also skips first '%' */ - switch (*fmt) { - case '-': flags |= _kc_LEFT; goto repeat; - case '+': flags |= _kc_PLUS; goto repeat; - case ' ': flags |= _kc_SPACE; goto repeat; - case '#': flags |= _kc_SPECIAL; goto repeat; - case '0': flags |= _kc_ZEROPAD; goto repeat; - } - - /* get field width */ - field_width = -1; - if (isdigit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - field_width = va_arg(args, int); - if (field_width < 0) { - field_width = -field_width; - flags |= _kc_LEFT; - } - } - - /* get the precision */ - precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - precision = va_arg(args, int); - } - if (precision < 0) - precision = 0; - } - - /* get the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { - qualifier = *fmt; - ++fmt; - } - - /* default base */ - base = 10; - - switch (*fmt) { - case 'c': - if (!(flags & _kc_LEFT)) { - while (--field_width > 0) { - if (str <= end) - *str = ' '; - ++str; - } - } - c = (unsigned char) va_arg(args, int); - if (str <= end) - *str = c; - ++str; - while (--field_width > 0) { - if (str <= end) - *str = ' '; - ++str; - } - continue; - - case 's': - s = va_arg(args, char *); - if (!s) - s = ""; - - len = strnlen(s, precision); - - if (!(flags & _kc_LEFT)) { - while (len < field_width--) { - if (str <= end) - *str = ' '; - ++str; - } - } - for (i = 0; i < len; ++i) { - if (str <= end) - *str = *s; - ++str; ++s; - } - while (len < field_width--) { - if (str <= end) - *str = ' '; - ++str; - } - continue; - - case 'p': - if (field_width == -1) { - field_width = 2*sizeof(void *); - flags |= _kc_ZEROPAD; - } - str = number(str, end, - (unsigned long) va_arg(args, void *), - 16, field_width, precision, flags); - continue; - - - case 'n': - /* FIXME: - * What does C99 say about the overflow case here? */ - if (qualifier == 'l') { - long * ip = va_arg(args, long *); - *ip = (str - buf); - } else if (qualifier == 'Z') { - size_t * ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int * ip = va_arg(args, int *); - *ip = (str - buf); - } - continue; - - case '%': - if (str <= end) - *str = '%'; - ++str; - continue; - - /* integer number formats - set up the flags and "break" */ - case 'o': - base = 8; - break; - - case 'X': - flags |= _kc_LARGE; - case 'x': - base = 16; - break; - - case 'd': - case 'i': - flags |= _kc_SIGN; - case 'u': - break; - - default: - if (str <= end) - *str = '%'; - ++str; - if (*fmt) { - if (str <= end) - *str = *fmt; - ++str; - } else { - --fmt; - } - continue; - } - if (qualifier == 'L') - num = va_arg(args, long long); - else if (qualifier == 'l') { - num = va_arg(args, unsigned long); - if (flags & _kc_SIGN) - num = (signed long) num; - } else if (qualifier == 'Z') { - num = va_arg(args, size_t); - } else if (qualifier == 'h') { - num = (unsigned short) va_arg(args, int); - if (flags & _kc_SIGN) - num = (signed short) num; - } else { - num = va_arg(args, unsigned int); - if (flags & _kc_SIGN) - num = (signed int) num; - } - str = number(str, end, num, base, - field_width, precision, flags); - } - if (str <= end) - *str = '\0'; - else if (size > 0) - /* don't write out a null byte if the buf size is zero */ - *end = '\0'; - /* the trailing null byte doesn't count towards the total - * ++str; - */ - return str-buf; -} - -int _kc_snprintf(char * buf, size_t size, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i = _kc_vsnprintf(buf,size,fmt,args); - va_end(args); - return i; -} -#endif /* < 2.4.8 */ - - - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) -#ifdef CONFIG_PCI_IOV -int __kc_pci_vfs_assigned(struct pci_dev *dev) -{ - unsigned int vfs_assigned = 0; -#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED - int pos; - struct pci_dev *vfdev; - unsigned short dev_id; - - /* only search if we are a PF */ - if (!dev->is_physfn) - return 0; - - /* find SR-IOV capability */ - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); - if (!pos) - return 0; - - /* - * * determine the device ID for the VFs, the vendor ID will be the - * * same as the PF so there is no need to check for that one - * */ - pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id); - - /* loop through all the VFs to see if we own any that are assigned */ - vfdev = pci_get_device(dev->vendor, dev_id, NULL); - while (vfdev) { - /* - * * It is considered assigned if it is a virtual function with - * * our dev as the physical function and the assigned bit is set - * */ - if (vfdev->is_virtfn && (vfdev->physfn == dev) && - (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)) - vfs_assigned++; - - vfdev = pci_get_device(dev->vendor, dev_id, vfdev); - } - -#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */ - return vfs_assigned; -} - -#endif /* CONFIG_PCI_IOV */ -#endif /* 3.10.0 */ - - - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) - -/**************************************/ -/* PCI DMA MAPPING */ - -#if defined(CONFIG_HIGHMEM) - -#ifndef PCI_DRAM_OFFSET -#define PCI_DRAM_OFFSET 0 -#endif - -u64 -_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, - size_t size, int direction) -{ - return ((u64) (page - mem_map) << PAGE_SHIFT) + offset + - PCI_DRAM_OFFSET; -} - -#else /* CONFIG_HIGHMEM */ - -u64 -_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, - size_t size, int direction) -{ - return pci_map_single(dev, (void *)page_address(page) + offset, size, - direction); -} - -#endif /* CONFIG_HIGHMEM */ - -void -_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, - int direction) -{ - return pci_unmap_single(dev, dma_addr, size, direction); -} - -#endif /* 2.4.13 => 2.4.3 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) - -/**************************************/ -/* PCI DRIVER API */ - -int -_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) -{ - if (!pci_dma_supported(dev, mask)) - return -EIO; - dev->dma_mask = mask; - return 0; -} - -int -_kc_pci_request_regions(struct pci_dev *dev, char *res_name) -{ - int i; - - for (i = 0; i < 6; i++) { - if (pci_resource_len(dev, i) == 0) - continue; - - if (pci_resource_flags(dev, i) & IORESOURCE_IO) { - if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { - pci_release_regions(dev); - return -EBUSY; - } - } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) { - if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { - pci_release_regions(dev); - return -EBUSY; - } - } - } - return 0; -} - -void -_kc_pci_release_regions(struct pci_dev *dev) -{ - int i; - - for (i = 0; i < 6; i++) { - if (pci_resource_len(dev, i) == 0) - continue; - - if (pci_resource_flags(dev, i) & IORESOURCE_IO) - release_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); - - else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) - release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); - } -} - -/**************************************/ -/* NETWORK DRIVER API */ - -struct net_device * -_kc_alloc_etherdev(int sizeof_priv) -{ - struct net_device *dev; - int alloc_size; - - alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31; - dev = kzalloc(alloc_size, GFP_KERNEL); - if (!dev) - return NULL; - - if (sizeof_priv) - dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31); - dev->name[0] = '\0'; - ether_setup(dev); - - return dev; -} - -int -_kc_is_valid_ether_addr(u8 *addr) -{ - const char zaddr[6] = { 0, }; - - return !(addr[0] & 1) && memcmp(addr, zaddr, 6); -} - -#endif /* 2.4.3 => 2.4.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) - -int -_kc_pci_set_power_state(struct pci_dev *dev, int state) -{ - return 0; -} - -int -_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable) -{ - return 0; -} - -#endif /* 2.4.6 => 2.4.3 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, - int off, int size) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; - frag->page_offset = off; - frag->size = size; - skb_shinfo(skb)->nr_frags = i + 1; -} - -/* - * Original Copyright: - * find_next_bit.c: fallback find next bit implementation - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + ffs(tmp); -} - -size_t _kc_strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = strlen(src); - - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; -} - -#endif /* 2.6.0 => 2.4.6 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) -int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i = vsnprintf(buf, size, fmt, args); - va_end(args); - return (i >= size) ? (size - 1) : i; -} -#endif /* < 2.6.4 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) -DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1}; -#endif /* < 2.6.10 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) -char *_kc_kstrdup(const char *s, unsigned int gfp) -{ - size_t len; - char *buf; - - if (!s) - return NULL; - - len = strlen(s) + 1; - buf = kmalloc(len, gfp); - if (buf) - memcpy(buf, s, len); - return buf; -} -#endif /* < 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) -void *_kc_kzalloc(size_t size, int flags) -{ - void *ret = kmalloc(size, flags); - if (ret) - memset(ret, 0, size); - return ret; -} -#endif /* <= 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) -int _kc_skb_pad(struct sk_buff *skb, int pad) -{ - int ntail; - - /* If the skbuff is non linear tailroom is always zero.. */ - if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) { - memset(skb->data+skb->len, 0, pad); - return 0; - } - - ntail = skb->data_len + pad - (skb->end - skb->tail); - if (likely(skb_cloned(skb) || ntail > 0)) { - if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC)); - goto free_skb; - } - -#ifdef MAX_SKB_FRAGS - if (skb_is_nonlinear(skb) && - !__pskb_pull_tail(skb, skb->data_len)) - goto free_skb; - -#endif - memset(skb->data + skb->len, 0, pad); - return 0; - -free_skb: - kfree_skb(skb); - return -ENOMEM; -} - -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) -int _kc_pci_save_state(struct pci_dev *pdev) -{ - struct adapter_struct *adapter = pci_get_drvdata(pdev); - int size = PCI_CONFIG_SPACE_LEN, i; - u16 pcie_cap_offset, pcie_link_status; - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) - /* no ->dev for 2.4 kernels */ - WARN_ON(pdev->dev.driver_data == NULL); -#endif - pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (pcie_cap_offset) { - if (!pci_read_config_word(pdev, - pcie_cap_offset + PCIE_LINK_STATUS, - &pcie_link_status)) - size = PCIE_CONFIG_SPACE_LEN; - } - pci_config_space_ich8lan(); -#ifdef HAVE_PCI_ERS - if (adapter->config_space == NULL) -#else - WARN_ON(adapter->config_space != NULL); -#endif - adapter->config_space = kmalloc(size, GFP_KERNEL); - if (!adapter->config_space) { - printk(KERN_ERR "Out of memory in pci_save_state\n"); - return -ENOMEM; - } - for (i = 0; i < (size / 4); i++) - pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]); - return 0; -} - -void _kc_pci_restore_state(struct pci_dev *pdev) -{ - struct adapter_struct *adapter = pci_get_drvdata(pdev); - int size = PCI_CONFIG_SPACE_LEN, i; - u16 pcie_cap_offset; - u16 pcie_link_status; - - if (adapter->config_space != NULL) { - pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (pcie_cap_offset && - !pci_read_config_word(pdev, - pcie_cap_offset + PCIE_LINK_STATUS, - &pcie_link_status)) - size = PCIE_CONFIG_SPACE_LEN; - - pci_config_space_ich8lan(); - for (i = 0; i < (size / 4); i++) - pci_write_config_dword(pdev, i * 4, adapter->config_space[i]); -#ifndef HAVE_PCI_ERS - kfree(adapter->config_space); - adapter->config_space = NULL; -#endif - } -} -#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ - -#ifdef HAVE_PCI_ERS -void _kc_free_netdev(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - - if (adapter->config_space != NULL) - kfree(adapter->config_space); -#ifdef CONFIG_SYSFS - if (netdev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)netdev - netdev->padded); - } else { - BUG_ON(netdev->reg_state != NETREG_UNREGISTERED); - netdev->reg_state = NETREG_RELEASED; - class_device_put(&netdev->class_dev); - } -#else - kfree((char *)netdev - netdev->padded); -#endif -} -#endif - -void *_kc_kmemdup(const void *src, size_t len, unsigned gfp) -{ - void *p; - - p = kzalloc(len, gfp); - if (p) - memcpy(p, src, len); - return p; -} -#endif /* <= 2.6.19 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) -/* hexdump code taken from lib/hexdump.c */ -static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize, - int groupsize, unsigned char *linebuf, - size_t linebuflen, bool ascii) -{ - const u8 *ptr = buf; - u8 ch; - int j, lx = 0; - int ascii_column; - - if (rowsize != 16 && rowsize != 32) - rowsize = 16; - - if (!len) - goto nil; - if (len > rowsize) /* limit to one line at a time */ - len = rowsize; - if ((len % groupsize) != 0) /* no mixed size output */ - groupsize = 1; - - switch (groupsize) { - case 8: { - const u64 *ptr8 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%16.16llx", j ? " " : "", - (unsigned long long)*(ptr8 + j)); - ascii_column = 17 * ngroups + 2; - break; - } - - case 4: { - const u32 *ptr4 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%8.8x", j ? " " : "", *(ptr4 + j)); - ascii_column = 9 * ngroups + 2; - break; - } - - case 2: { - const u16 *ptr2 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%4.4x", j ? " " : "", *(ptr2 + j)); - ascii_column = 5 * ngroups + 2; - break; - } - - default: - for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { - ch = ptr[j]; - linebuf[lx++] = hex_asc(ch >> 4); - linebuf[lx++] = hex_asc(ch & 0x0f); - linebuf[lx++] = ' '; - } - if (j) - lx--; - - ascii_column = 3 * rowsize + 2; - break; - } - if (!ascii) - goto nil; - - while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) - linebuf[lx++] = ' '; - for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) - linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] - : '.'; -nil: - linebuf[lx++] = '\0'; -} - -void _kc_print_hex_dump(const char *level, - const char *prefix_str, int prefix_type, - int rowsize, int groupsize, - const void *buf, size_t len, bool ascii) -{ - const u8 *ptr = buf; - int i, linelen, remaining = len; - unsigned char linebuf[200]; - - if (rowsize != 16 && rowsize != 32) - rowsize = 16; - - for (i = 0; i < len; i += rowsize) { - linelen = min(remaining, rowsize); - remaining -= rowsize; - _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, - linebuf, sizeof(linebuf), ascii); - - switch (prefix_type) { - case DUMP_PREFIX_ADDRESS: - printk("%s%s%*p: %s\n", level, prefix_str, - (int)(2 * sizeof(void *)), ptr + i, linebuf); - break; - case DUMP_PREFIX_OFFSET: - printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); - break; - default: - printk("%s%s%s\n", level, prefix_str, linebuf); - break; - } - } -} -#endif /* < 2.6.22 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) ) -int ixgbe_dcb_netlink_register(void) -{ - return 0; -} - -int ixgbe_dcb_netlink_unregister(void) -{ - return 0; -} - -int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max) -{ - return 0; -} -#endif /* < 2.6.23 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) -#ifdef NAPI -struct net_device *napi_to_poll_dev(struct napi_struct *napi) -{ - struct adapter_q_vector *q_vector = container_of(napi, - struct adapter_q_vector, - napi); - return &q_vector->poll_dev; -} - -int __kc_adapter_clean(struct net_device *netdev, int *budget) -{ - int work_done; - int work_to_do = min(*budget, netdev->quota); - /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */ - struct napi_struct *napi = netdev->priv; - work_done = napi->poll(napi, work_to_do); - *budget -= work_done; - netdev->quota -= work_done; - return (work_done >= work_to_do) ? 1 : 0; -} -#endif /* NAPI */ -#endif /* <= 2.6.24 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) -void _kc_pci_disable_link_state(struct pci_dev *pdev, int state) -{ - struct pci_dev *parent = pdev->bus->self; - u16 link_state; - int pos; - - if (!parent) - return; - - pos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (pos) { - pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state); - link_state &= ~state; - pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state); - } -} -#endif /* < 2.6.26 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) -#ifdef HAVE_TX_MQ -void _kc_netif_tx_stop_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_stop_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_stop_subqueue(netdev, i); -} -void _kc_netif_tx_wake_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_wake_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_wake_subqueue(netdev, i); -} -void _kc_netif_tx_start_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_start_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_start_subqueue(netdev, i); -} -#endif /* HAVE_TX_MQ */ - -#ifndef __WARN_printf -void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...) -{ - va_list args; - - printk(KERN_WARNING "------------[ cut here ]------------\n"); - printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line); - va_start(args, fmt); - vprintk(fmt, args); - va_end(args); - - dump_stack(); -} -#endif /* __WARN_printf */ -#endif /* < 2.6.27 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) - -int -_kc_pci_prepare_to_sleep(struct pci_dev *dev) -{ - pci_power_t target_state; - int error; - - target_state = pci_choose_state(dev, PMSG_SUSPEND); - - pci_enable_wake(dev, target_state, true); - - error = pci_set_power_state(dev, target_state); - - if (error) - pci_enable_wake(dev, target_state, false); - - return error; -} - -int -_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable) -{ - int err; - - err = pci_enable_wake(dev, PCI_D3cold, enable); - if (err) - goto out; - - err = pci_enable_wake(dev, PCI_D3hot, enable); - -out: - return err; -} -#endif /* < 2.6.28 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) -void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, - int off, int size) -{ - skb_fill_page_desc(skb, i, page, off, size); - skb->len += size; - skb->data_len += size; - skb->truesize += size; -} -#endif /* < 3.4.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) ) -#ifdef HAVE_NETDEV_SELECT_QUEUE -#include -static u32 _kc_simple_tx_hashrnd; -static u32 _kc_simple_tx_hashrnd_initialized; - -u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb) -{ - u32 addr1, addr2, ports; - u32 hash, ihl; - u8 ip_proto = 0; - - if (unlikely(!_kc_simple_tx_hashrnd_initialized)) { - get_random_bytes(&_kc_simple_tx_hashrnd, 4); - _kc_simple_tx_hashrnd_initialized = 1; - } - - switch (skb->protocol) { - case htons(ETH_P_IP): - if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) - ip_proto = ip_hdr(skb)->protocol; - addr1 = ip_hdr(skb)->saddr; - addr2 = ip_hdr(skb)->daddr; - ihl = ip_hdr(skb)->ihl; - break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case htons(ETH_P_IPV6): - ip_proto = ipv6_hdr(skb)->nexthdr; - addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; - addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; - ihl = (40 >> 2); - break; -#endif - default: - return 0; - } - - - switch (ip_proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_DCCP: - case IPPROTO_ESP: - case IPPROTO_AH: - case IPPROTO_SCTP: - case IPPROTO_UDPLITE: - ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); - break; - - default: - ports = 0; - break; - } - - hash = jhash_3words(addr1, addr2, ports, _kc_simple_tx_hashrnd); - - return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); -} -#endif /* HAVE_NETDEV_SELECT_QUEUE */ -#endif /* < 2.6.30 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) -#ifdef HAVE_TX_MQ -#ifndef CONFIG_NETDEVICES_MULTIQUEUE -void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) -{ - unsigned int real_num = dev->real_num_tx_queues; - struct Qdisc *qdisc; - int i; - - if (unlikely(txq > dev->num_tx_queues)) - ; - else if (txq > real_num) - dev->real_num_tx_queues = txq; - else if ( txq < real_num) { - dev->real_num_tx_queues = txq; - for (i = txq; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; - if (qdisc) { - spin_lock_bh(qdisc_lock(qdisc)); - qdisc_reset(qdisc); - spin_unlock_bh(qdisc_lock(qdisc)); - } - } - } -} -#endif /* CONFIG_NETDEVICES_MULTIQUEUE */ -#endif /* HAVE_TX_MQ */ -#endif /* < 2.6.35 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) -static const u32 _kc_flags_dup_features = - (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); - -u32 _kc_ethtool_op_get_flags(struct net_device *dev) -{ - return dev->features & _kc_flags_dup_features; -} - -int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) -{ - if (data & ~supported) - return -EINVAL; - - dev->features = ((dev->features & ~_kc_flags_dup_features) | - (data & _kc_flags_dup_features)); - return 0; -} -#endif /* < 2.6.36 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) -u8 _kc_netdev_get_num_tc(struct net_device *dev) -{ - struct adapter_struct *kc_adapter = netdev_priv(dev); - if (kc_adapter->flags & IXGBE_FLAG_DCB_ENABLED) - return kc_adapter->tc; - else - return 0; -} - -u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up) -{ - struct adapter_struct *kc_adapter = netdev_priv(dev); - int tc; - u8 map; - - for (tc = 0; tc < IXGBE_DCB_MAX_TRAFFIC_CLASS; tc++) { - map = kc_adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap; - - if (map & (1 << up)) - return tc; - } - - return 0; -} -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ -#endif /* < 2.6.39 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h deleted file mode 100644 index 7c7d6c31..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h +++ /dev/null @@ -1,3140 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#ifndef _KCOMPAT_H_ -#define _KCOMPAT_H_ - -#ifndef LINUX_VERSION_CODE -#include -#else -#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* NAPI enable/disable flags here */ -/* enable NAPI for ixgbe by default */ -#undef CONFIG_IXGBE_NAPI -#define CONFIG_IXGBE_NAPI -#define NAPI -#ifdef CONFIG_IXGBE_NAPI -#undef NAPI -#define NAPI -#endif /* CONFIG_IXGBE_NAPI */ -#ifdef IXGBE_NAPI -#undef NAPI -#define NAPI -#endif /* IXGBE_NAPI */ -#ifdef IXGBE_NO_NAPI -#undef NAPI -#endif /* IXGBE_NO_NAPI */ - -#define adapter_struct ixgbe_adapter -#define adapter_q_vector ixgbe_q_vector - -/* and finally set defines so that the code sees the changes */ -#ifdef NAPI -#ifndef CONFIG_IXGBE_NAPI -#define CONFIG_IXGBE_NAPI -#endif -#else -#undef CONFIG_IXGBE_NAPI -#endif /* NAPI */ - -/* packet split disable/enable */ -#ifdef DISABLE_PACKET_SPLIT -#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT -#define CONFIG_IXGBE_DISABLE_PACKET_SPLIT -#endif -#endif /* DISABLE_PACKET_SPLIT */ - -/* MSI compatibility code for all kernels and drivers */ -#ifdef DISABLE_PCI_MSI -#undef CONFIG_PCI_MSI -#endif -#ifndef CONFIG_PCI_MSI -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) -struct msix_entry { - u16 vector; /* kernel uses to write allocated vector */ - u16 entry; /* driver uses to specify entry, OS writes */ -}; -#endif -#undef pci_enable_msi -#define pci_enable_msi(a) -ENOTSUPP -#undef pci_disable_msi -#define pci_disable_msi(a) do {} while (0) -#undef pci_enable_msix -#define pci_enable_msix(a, b, c) -ENOTSUPP -#undef pci_disable_msix -#define pci_disable_msix(a) do {} while (0) -#define msi_remove_pci_irq_vectors(a) do {} while (0) -#endif /* CONFIG_PCI_MSI */ -#ifdef DISABLE_PM -#undef CONFIG_PM -#endif - -#ifdef DISABLE_NET_POLL_CONTROLLER -#undef CONFIG_NET_POLL_CONTROLLER -#endif - -#ifndef PMSG_SUSPEND -#define PMSG_SUSPEND 3 -#endif - -/* generic boolean compatibility */ -#undef TRUE -#undef FALSE -#define TRUE true -#define FALSE false -#ifdef GCC_VERSION -#if ( GCC_VERSION < 3000 ) -#define _Bool char -#endif -#else -#define _Bool char -#endif - -/* kernels less than 2.4.14 don't have this */ -#ifndef ETH_P_8021Q -#define ETH_P_8021Q 0x8100 -#endif - -#ifndef module_param -#define module_param(v,t,p) MODULE_PARM(v, "i"); -#endif - -#ifndef DMA_64BIT_MASK -#define DMA_64BIT_MASK 0xffffffffffffffffULL -#endif - -#ifndef DMA_32BIT_MASK -#define DMA_32BIT_MASK 0x00000000ffffffffULL -#endif - -#ifndef PCI_CAP_ID_EXP -#define PCI_CAP_ID_EXP 0x10 -#endif - -#ifndef PCIE_LINK_STATE_L0S -#define PCIE_LINK_STATE_L0S 1 -#endif -#ifndef PCIE_LINK_STATE_L1 -#define PCIE_LINK_STATE_L1 2 -#endif - -#ifndef mmiowb -#ifdef CONFIG_IA64 -#define mmiowb() asm volatile ("mf.a" ::: "memory") -#else -#define mmiowb() -#endif -#endif - -#ifndef SET_NETDEV_DEV -#define SET_NETDEV_DEV(net, pdev) -#endif - -#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) -#define free_netdev(x) kfree(x) -#endif - -#ifdef HAVE_POLL_CONTROLLER -#define CONFIG_NET_POLL_CONTROLLER -#endif - -#ifndef SKB_DATAREF_SHIFT -/* if we do not have the infrastructure to detect if skb_header is cloned - just return false in all cases */ -#define skb_header_cloned(x) 0 -#endif - -#ifndef NETIF_F_GSO -#define gso_size tso_size -#define gso_segs tso_segs -#endif - -#ifndef NETIF_F_GRO -#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \ - vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan) -#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb) -#endif - -#ifndef NETIF_F_SCTP_CSUM -#define NETIF_F_SCTP_CSUM 0 -#endif - -#ifndef NETIF_F_LRO -#define NETIF_F_LRO (1 << 15) -#endif - -#ifndef NETIF_F_NTUPLE -#define NETIF_F_NTUPLE (1 << 27) -#endif - -#ifndef IPPROTO_SCTP -#define IPPROTO_SCTP 132 -#endif - -#ifndef CHECKSUM_PARTIAL -#define CHECKSUM_PARTIAL CHECKSUM_HW -#define CHECKSUM_COMPLETE CHECKSUM_HW -#endif - -#ifndef __read_mostly -#define __read_mostly -#endif - -#ifndef MII_RESV1 -#define MII_RESV1 0x17 /* Reserved... */ -#endif - -#ifndef unlikely -#define unlikely(_x) _x -#define likely(_x) _x -#endif - -#ifndef WARN_ON -#define WARN_ON(x) -#endif - -#ifndef PCI_DEVICE -#define PCI_DEVICE(vend,dev) \ - .vendor = (vend), .device = (dev), \ - .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID -#endif - -#ifndef node_online -#define node_online(node) ((node) == 0) -#endif - -#ifndef num_online_cpus -#define num_online_cpus() smp_num_cpus -#endif - -#ifndef cpu_online -#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map) -#endif - -#ifndef _LINUX_RANDOM_H -#include -#endif - -#ifndef DECLARE_BITMAP -#ifndef BITS_TO_LONGS -#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) -#endif -#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)] -#endif - -#ifndef VLAN_HLEN -#define VLAN_HLEN 4 -#endif - -#ifndef VLAN_ETH_HLEN -#define VLAN_ETH_HLEN 18 -#endif - -#ifndef VLAN_ETH_FRAME_LEN -#define VLAN_ETH_FRAME_LEN 1518 -#endif - -#if !defined(IXGBE_DCA) && !defined(IGB_DCA) -#define dca_get_tag(b) 0 -#define dca_add_requester(a) -1 -#define dca_remove_requester(b) do { } while(0) -#define DCA_PROVIDER_ADD 0x0001 -#define DCA_PROVIDER_REMOVE 0x0002 -#endif - -#ifndef DCA_GET_TAG_TWO_ARGS -#define dca3_get_tag(a,b) dca_get_tag(b) -#endif - -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -#if defined(__i386__) || defined(__x86_64__) -#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -#endif -#endif - -/* taken from 2.6.24 definition in linux/kernel.h */ -#ifndef IS_ALIGNED -#define IS_ALIGNED(x,a) (((x) % ((typeof(x))(a))) == 0) -#endif - -#ifndef NETIF_F_HW_VLAN_TX -struct _kc_vlan_ethhdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; - __be16 h_vlan_proto; - __be16 h_vlan_TCI; - __be16 h_vlan_encapsulated_proto; -}; -#define vlan_ethhdr _kc_vlan_ethhdr -struct _kc_vlan_hdr { - __be16 h_vlan_TCI; - __be16 h_vlan_encapsulated_proto; -}; -#define vlan_hdr _kc_vlan_hdr -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) -#define vlan_tx_tag_present(_skb) 0 -#define vlan_tx_tag_get(_skb) 0 -#endif -#endif - -#ifndef VLAN_PRIO_SHIFT -#define VLAN_PRIO_SHIFT 13 -#endif - - -#ifndef __GFP_COLD -#define __GFP_COLD 0 -#endif - -/*****************************************************************************/ -/* Installations with ethtool version without eeprom, adapter id, or statistics - * support */ - -#ifndef ETH_GSTRING_LEN -#define ETH_GSTRING_LEN 32 -#endif - -#ifndef ETHTOOL_GSTATS -#define ETHTOOL_GSTATS 0x1d -#undef ethtool_drvinfo -#define ethtool_drvinfo k_ethtool_drvinfo -struct k_ethtool_drvinfo { - u32 cmd; - char driver[32]; - char version[32]; - char fw_version[32]; - char bus_info[32]; - char reserved1[32]; - char reserved2[16]; - u32 n_stats; - u32 testinfo_len; - u32 eedump_len; - u32 regdump_len; -}; - -struct ethtool_stats { - u32 cmd; - u32 n_stats; - u64 data[0]; -}; -#endif /* ETHTOOL_GSTATS */ - -#ifndef ETHTOOL_PHYS_ID -#define ETHTOOL_PHYS_ID 0x1c -#endif /* ETHTOOL_PHYS_ID */ - -#ifndef ETHTOOL_GSTRINGS -#define ETHTOOL_GSTRINGS 0x1b -enum ethtool_stringset { - ETH_SS_TEST = 0, - ETH_SS_STATS, -}; -struct ethtool_gstrings { - u32 cmd; /* ETHTOOL_GSTRINGS */ - u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/ - u32 len; /* number of strings in the string set */ - u8 data[0]; -}; -#endif /* ETHTOOL_GSTRINGS */ - -#ifndef ETHTOOL_TEST -#define ETHTOOL_TEST 0x1a -enum ethtool_test_flags { - ETH_TEST_FL_OFFLINE = (1 << 0), - ETH_TEST_FL_FAILED = (1 << 1), -}; -struct ethtool_test { - u32 cmd; - u32 flags; - u32 reserved; - u32 len; - u64 data[0]; -}; -#endif /* ETHTOOL_TEST */ - -#ifndef ETHTOOL_GEEPROM -#define ETHTOOL_GEEPROM 0xb -#undef ETHTOOL_GREGS -struct ethtool_eeprom { - u32 cmd; - u32 magic; - u32 offset; - u32 len; - u8 data[0]; -}; - -struct ethtool_value { - u32 cmd; - u32 data; -}; -#endif /* ETHTOOL_GEEPROM */ - -#ifndef ETHTOOL_GLINK -#define ETHTOOL_GLINK 0xa -#endif /* ETHTOOL_GLINK */ - -#ifndef ETHTOOL_GWOL -#define ETHTOOL_GWOL 0x5 -#define ETHTOOL_SWOL 0x6 -#define SOPASS_MAX 6 -struct ethtool_wolinfo { - u32 cmd; - u32 supported; - u32 wolopts; - u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */ -}; -#endif /* ETHTOOL_GWOL */ - -#ifndef ETHTOOL_GREGS -#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */ -#define ethtool_regs _kc_ethtool_regs -/* for passing big chunks of data */ -struct _kc_ethtool_regs { - u32 cmd; - u32 version; /* driver-specific, indicates different chips/revs */ - u32 len; /* bytes */ - u8 data[0]; -}; -#endif /* ETHTOOL_GREGS */ - -#ifndef ETHTOOL_GMSGLVL -#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ -#endif -#ifndef ETHTOOL_SMSGLVL -#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ -#endif -#ifndef ETHTOOL_NWAY_RST -#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */ -#endif -#ifndef ETHTOOL_GLINK -#define ETHTOOL_GLINK 0x0000000a /* Get link status */ -#endif -#ifndef ETHTOOL_GEEPROM -#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ -#endif -#ifndef ETHTOOL_SEEPROM -#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */ -#endif -#ifndef ETHTOOL_GCOALESCE -#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ -/* for configuring coalescing parameters of chip */ -#define ethtool_coalesce _kc_ethtool_coalesce -struct _kc_ethtool_coalesce { - u32 cmd; /* ETHTOOL_{G,S}COALESCE */ - - /* How many usecs to delay an RX interrupt after - * a packet arrives. If 0, only rx_max_coalesced_frames - * is used. - */ - u32 rx_coalesce_usecs; - - /* How many packets to delay an RX interrupt after - * a packet arrives. If 0, only rx_coalesce_usecs is - * used. It is illegal to set both usecs and max frames - * to zero as this would cause RX interrupts to never be - * generated. - */ - u32 rx_max_coalesced_frames; - - /* Same as above two parameters, except that these values - * apply while an IRQ is being serviced by the host. Not - * all cards support this feature and the values are ignored - * in that case. - */ - u32 rx_coalesce_usecs_irq; - u32 rx_max_coalesced_frames_irq; - - /* How many usecs to delay a TX interrupt after - * a packet is sent. If 0, only tx_max_coalesced_frames - * is used. - */ - u32 tx_coalesce_usecs; - - /* How many packets to delay a TX interrupt after - * a packet is sent. If 0, only tx_coalesce_usecs is - * used. It is illegal to set both usecs and max frames - * to zero as this would cause TX interrupts to never be - * generated. - */ - u32 tx_max_coalesced_frames; - - /* Same as above two parameters, except that these values - * apply while an IRQ is being serviced by the host. Not - * all cards support this feature and the values are ignored - * in that case. - */ - u32 tx_coalesce_usecs_irq; - u32 tx_max_coalesced_frames_irq; - - /* How many usecs to delay in-memory statistics - * block updates. Some drivers do not have an in-memory - * statistic block, and in such cases this value is ignored. - * This value must not be zero. - */ - u32 stats_block_coalesce_usecs; - - /* Adaptive RX/TX coalescing is an algorithm implemented by - * some drivers to improve latency under low packet rates and - * improve throughput under high packet rates. Some drivers - * only implement one of RX or TX adaptive coalescing. Anything - * not implemented by the driver causes these values to be - * silently ignored. - */ - u32 use_adaptive_rx_coalesce; - u32 use_adaptive_tx_coalesce; - - /* When the packet rate (measured in packets per second) - * is below pkt_rate_low, the {rx,tx}_*_low parameters are - * used. - */ - u32 pkt_rate_low; - u32 rx_coalesce_usecs_low; - u32 rx_max_coalesced_frames_low; - u32 tx_coalesce_usecs_low; - u32 tx_max_coalesced_frames_low; - - /* When the packet rate is below pkt_rate_high but above - * pkt_rate_low (both measured in packets per second) the - * normal {rx,tx}_* coalescing parameters are used. - */ - - /* When the packet rate is (measured in packets per second) - * is above pkt_rate_high, the {rx,tx}_*_high parameters are - * used. - */ - u32 pkt_rate_high; - u32 rx_coalesce_usecs_high; - u32 rx_max_coalesced_frames_high; - u32 tx_coalesce_usecs_high; - u32 tx_max_coalesced_frames_high; - - /* How often to do adaptive coalescing packet rate sampling, - * measured in seconds. Must not be zero. - */ - u32 rate_sample_interval; -}; -#endif /* ETHTOOL_GCOALESCE */ - -#ifndef ETHTOOL_SCOALESCE -#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ -#endif -#ifndef ETHTOOL_GRINGPARAM -#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ -/* for configuring RX/TX ring parameters */ -#define ethtool_ringparam _kc_ethtool_ringparam -struct _kc_ethtool_ringparam { - u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */ - - /* Read only attributes. These indicate the maximum number - * of pending RX/TX ring entries the driver will allow the - * user to set. - */ - u32 rx_max_pending; - u32 rx_mini_max_pending; - u32 rx_jumbo_max_pending; - u32 tx_max_pending; - - /* Values changeable by the user. The valid values are - * in the range 1 to the "*_max_pending" counterpart above. - */ - u32 rx_pending; - u32 rx_mini_pending; - u32 rx_jumbo_pending; - u32 tx_pending; -}; -#endif /* ETHTOOL_GRINGPARAM */ - -#ifndef ETHTOOL_SRINGPARAM -#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */ -#endif -#ifndef ETHTOOL_GPAUSEPARAM -#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ -/* for configuring link flow control parameters */ -#define ethtool_pauseparam _kc_ethtool_pauseparam -struct _kc_ethtool_pauseparam { - u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */ - - /* If the link is being auto-negotiated (via ethtool_cmd.autoneg - * being true) the user may set 'autoneg' here non-zero to have the - * pause parameters be auto-negotiated too. In such a case, the - * {rx,tx}_pause values below determine what capabilities are - * advertised. - * - * If 'autoneg' is zero or the link is not being auto-negotiated, - * then {rx,tx}_pause force the driver to use/not-use pause - * flow control. - */ - u32 autoneg; - u32 rx_pause; - u32 tx_pause; -}; -#endif /* ETHTOOL_GPAUSEPARAM */ - -#ifndef ETHTOOL_SPAUSEPARAM -#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ -#endif -#ifndef ETHTOOL_GRXCSUM -#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_SRXCSUM -#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_GTXCSUM -#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_STXCSUM -#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_GSG -#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable - * (ethtool_value) */ -#endif -#ifndef ETHTOOL_SSG -#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable - * (ethtool_value). */ -#endif -#ifndef ETHTOOL_TEST -#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */ -#endif -#ifndef ETHTOOL_GSTRINGS -#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ -#endif -#ifndef ETHTOOL_PHYS_ID -#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ -#endif -#ifndef ETHTOOL_GSTATS -#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ -#endif -#ifndef ETHTOOL_GTSO -#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ -#endif -#ifndef ETHTOOL_STSO -#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ -#endif - -#ifndef ETHTOOL_BUSINFO_LEN -#define ETHTOOL_BUSINFO_LEN 32 -#endif - -#ifndef RHEL_RELEASE_CODE -/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */ -#define RHEL_RELEASE_CODE 0 -#endif -#ifndef RHEL_RELEASE_VERSION -#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b)) -#endif -#ifndef AX_RELEASE_CODE -#define AX_RELEASE_CODE 0 -#endif -#ifndef AX_RELEASE_VERSION -#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b)) -#endif - -/* SuSE version macro is the same as Linux kernel version */ -#ifndef SLE_VERSION -#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c) -#endif -#ifndef SLE_VERSION_CODE -#ifdef CONFIG_SUSE_KERNEL -/* SLES11 GA is 2.6.27 based */ -#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) -#define SLE_VERSION_CODE SLE_VERSION(11,0,0) -#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) -/* SLES11 SP1 is 2.6.32 based */ -#define SLE_VERSION_CODE SLE_VERSION(11,1,0) -#else -#define SLE_VERSION_CODE 0 -#endif -#else /* CONFIG_SUSE_KERNEL */ -#define SLE_VERSION_CODE 0 -#endif /* CONFIG_SUSE_KERNEL */ -#endif /* SLE_VERSION_CODE */ - -#ifdef __KLOCWORK__ -#ifdef ARRAY_SIZE -#undef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif -#endif /* __KLOCWORK__ */ - -/*****************************************************************************/ -/* 2.4.3 => 2.4.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) - -/**************************************/ -/* PCI DRIVER API */ - -#ifndef pci_set_dma_mask -#define pci_set_dma_mask _kc_pci_set_dma_mask -extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask); -#endif - -#ifndef pci_request_regions -#define pci_request_regions _kc_pci_request_regions -extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name); -#endif - -#ifndef pci_release_regions -#define pci_release_regions _kc_pci_release_regions -extern void _kc_pci_release_regions(struct pci_dev *pdev); -#endif - -/**************************************/ -/* NETWORK DRIVER API */ - -#ifndef alloc_etherdev -#define alloc_etherdev _kc_alloc_etherdev -extern struct net_device * _kc_alloc_etherdev(int sizeof_priv); -#endif - -#ifndef is_valid_ether_addr -#define is_valid_ether_addr _kc_is_valid_ether_addr -extern int _kc_is_valid_ether_addr(u8 *addr); -#endif - -/**************************************/ -/* MISCELLANEOUS */ - -#ifndef INIT_TQUEUE -#define INIT_TQUEUE(_tq, _routine, _data) \ - do { \ - INIT_LIST_HEAD(&(_tq)->list); \ - (_tq)->sync = 0; \ - (_tq)->routine = _routine; \ - (_tq)->data = _data; \ - } while (0) -#endif - -#endif /* 2.4.3 => 2.4.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) ) -/* Generic MII registers. */ -#define MII_BMCR 0x00 /* Basic mode control register */ -#define MII_BMSR 0x01 /* Basic mode status register */ -#define MII_PHYSID1 0x02 /* PHYS ID 1 */ -#define MII_PHYSID2 0x03 /* PHYS ID 2 */ -#define MII_ADVERTISE 0x04 /* Advertisement control reg */ -#define MII_LPA 0x05 /* Link partner ability reg */ -#define MII_EXPANSION 0x06 /* Expansion register */ -/* Basic mode control register. */ -#define BMCR_FULLDPLX 0x0100 /* Full duplex */ -#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */ -/* Basic mode status register. */ -#define BMSR_ERCAP 0x0001 /* Ext-reg capability */ -#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ -#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ -#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ -#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ -#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */ -/* Advertisement control register. */ -#define ADVERTISE_CSMA 0x0001 /* Only selector supported */ -#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ -#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ -#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ -#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ -#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \ - ADVERTISE_100HALF | ADVERTISE_100FULL) -/* Expansion register for auto-negotiation. */ -#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */ -#endif - -/*****************************************************************************/ -/* 2.4.6 => 2.4.3 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) - -#ifndef pci_set_power_state -#define pci_set_power_state _kc_pci_set_power_state -extern int _kc_pci_set_power_state(struct pci_dev *dev, int state); -#endif - -#ifndef pci_enable_wake -#define pci_enable_wake _kc_pci_enable_wake -extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable); -#endif - -#ifndef pci_disable_device -#define pci_disable_device _kc_pci_disable_device -extern void _kc_pci_disable_device(struct pci_dev *pdev); -#endif - -/* PCI PM entry point syntax changed, so don't support suspend/resume */ -#undef CONFIG_PM - -#endif /* 2.4.6 => 2.4.3 */ - -#ifndef HAVE_PCI_SET_MWI -#define pci_set_mwi(X) pci_write_config_word(X, \ - PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \ - PCI_COMMAND_INVALIDATE); -#define pci_clear_mwi(X) pci_write_config_word(X, \ - PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \ - ~PCI_COMMAND_INVALIDATE); -#endif - -/*****************************************************************************/ -/* 2.4.10 => 2.4.9 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) ) - -/**************************************/ -/* MODULE API */ - -#ifndef MODULE_LICENSE - #define MODULE_LICENSE(X) -#endif - -/**************************************/ -/* OTHER */ - -#undef min -#define min(x,y) ({ \ - const typeof(x) _x = (x); \ - const typeof(y) _y = (y); \ - (void) (&_x == &_y); \ - _x < _y ? _x : _y; }) - -#undef max -#define max(x,y) ({ \ - const typeof(x) _x = (x); \ - const typeof(y) _y = (y); \ - (void) (&_x == &_y); \ - _x > _y ? _x : _y; }) - -#define min_t(type,x,y) ({ \ - type _x = (x); \ - type _y = (y); \ - _x < _y ? _x : _y; }) - -#define max_t(type,x,y) ({ \ - type _x = (x); \ - type _y = (y); \ - _x > _y ? _x : _y; }) - -#ifndef list_for_each_safe -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) -#endif - -#ifndef ____cacheline_aligned_in_smp -#ifdef CONFIG_SMP -#define ____cacheline_aligned_in_smp ____cacheline_aligned -#else -#define ____cacheline_aligned_in_smp -#endif /* CONFIG_SMP */ -#endif - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) -extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...); -#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args) -extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args); -#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args) -#else /* 2.4.8 => 2.4.9 */ -extern int snprintf(char * buf, size_t size, const char *fmt, ...); -extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); -#endif -#endif /* 2.4.10 -> 2.4.6 */ - - -/*****************************************************************************/ -/* 2.4.12 => 2.4.10 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) ) -#ifndef HAVE_NETIF_MSG -#define HAVE_NETIF_MSG 1 -enum { - NETIF_MSG_DRV = 0x0001, - NETIF_MSG_PROBE = 0x0002, - NETIF_MSG_LINK = 0x0004, - NETIF_MSG_TIMER = 0x0008, - NETIF_MSG_IFDOWN = 0x0010, - NETIF_MSG_IFUP = 0x0020, - NETIF_MSG_RX_ERR = 0x0040, - NETIF_MSG_TX_ERR = 0x0080, - NETIF_MSG_TX_QUEUED = 0x0100, - NETIF_MSG_INTR = 0x0200, - NETIF_MSG_TX_DONE = 0x0400, - NETIF_MSG_RX_STATUS = 0x0800, - NETIF_MSG_PKTDATA = 0x1000, - NETIF_MSG_HW = 0x2000, - NETIF_MSG_WOL = 0x4000, -}; - -#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) -#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) -#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) -#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) -#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) -#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) -#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) -#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) -#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) -#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) -#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) -#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) -#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) -#endif /* !HAVE_NETIF_MSG */ -#endif /* 2.4.12 => 2.4.10 */ - -/*****************************************************************************/ -/* 2.4.13 => 2.4.12 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) - -/**************************************/ -/* PCI DMA MAPPING */ - -#ifndef virt_to_page - #define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT)) -#endif - -#ifndef pci_map_page -#define pci_map_page _kc_pci_map_page -extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction); -#endif - -#ifndef pci_unmap_page -#define pci_unmap_page _kc_pci_unmap_page -extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction); -#endif - -/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */ - -#undef DMA_32BIT_MASK -#define DMA_32BIT_MASK 0xffffffff -#undef DMA_64BIT_MASK -#define DMA_64BIT_MASK 0xffffffff - -/**************************************/ -/* OTHER */ - -#ifndef cpu_relax -#define cpu_relax() rep_nop() -#endif - -struct vlan_ethhdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; - unsigned short h_vlan_proto; - unsigned short h_vlan_TCI; - unsigned short h_vlan_encapsulated_proto; -}; -#endif /* 2.4.13 => 2.4.12 */ - -/*****************************************************************************/ -/* 2.4.17 => 2.4.12 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) ) - -#ifndef __devexit_p - #define __devexit_p(x) &(x) -#endif - -#endif /* 2.4.17 => 2.4.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) ) -#define NETIF_MSG_HW 0x2000 -#define NETIF_MSG_WOL 0x4000 - -#ifndef netif_msg_hw -#define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) -#endif -#ifndef netif_msg_wol -#define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) -#endif -#endif /* 2.4.18 */ - -/*****************************************************************************/ - -/*****************************************************************************/ -/* 2.4.20 => 2.4.19 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) ) - -/* we won't support NAPI on less than 2.4.20 */ -#ifdef NAPI -#undef NAPI -#undef CONFIG_IXGBE_NAPI -#endif - -#endif /* 2.4.20 => 2.4.19 */ - -/*****************************************************************************/ -/* 2.4.22 => 2.4.17 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) -#define pci_name(x) ((x)->slot_name) -#endif - -/*****************************************************************************/ -/* 2.4.22 => 2.4.17 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) -#ifndef IXGBE_NO_LRO -/* Don't enable LRO for these legacy kernels */ -#define IXGBE_NO_LRO -#endif -#endif - -/*****************************************************************************/ -/*****************************************************************************/ -/* 2.4.23 => 2.4.22 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) ) -/*****************************************************************************/ -#ifdef NAPI -#ifndef netif_poll_disable -#define netif_poll_disable(x) _kc_netif_poll_disable(x) -static inline void _kc_netif_poll_disable(struct net_device *netdev) -{ - while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) { - /* No hurry */ - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } -} -#endif -#ifndef netif_poll_enable -#define netif_poll_enable(x) _kc_netif_poll_enable(x) -static inline void _kc_netif_poll_enable(struct net_device *netdev) -{ - clear_bit(__LINK_STATE_RX_SCHED, &netdev->state); -} -#endif -#endif /* NAPI */ -#ifndef netif_tx_disable -#define netif_tx_disable(x) _kc_netif_tx_disable(x) -static inline void _kc_netif_tx_disable(struct net_device *dev) -{ - spin_lock_bh(&dev->xmit_lock); - netif_stop_queue(dev); - spin_unlock_bh(&dev->xmit_lock); -} -#endif -#else /* 2.4.23 => 2.4.22 */ -#define HAVE_SCTP -#endif /* 2.4.23 => 2.4.22 */ - -/*****************************************************************************/ -/* 2.6.4 => 2.6.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \ - ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ - LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) ) -#define ETHTOOL_OPS_COMPAT -#endif /* 2.6.4 => 2.6.0 */ - -/*****************************************************************************/ -/* 2.5.71 => 2.4.x */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) ) -#define sk_protocol protocol -#define pci_get_device pci_find_device -#endif /* 2.5.70 => 2.4.x */ - -/*****************************************************************************/ -/* < 2.4.27 or 2.6.0 <= 2.6.5 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \ - ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ - LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) ) - -#ifndef netif_msg_init -#define netif_msg_init _kc_netif_msg_init -static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits) -{ - /* use default */ - if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) - return default_msg_enable_bits; - if (debug_value == 0) /* no output */ - return 0; - /* set low N bits */ - return (1 << debug_value) -1; -} -#endif - -#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */ -/*****************************************************************************/ -#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \ - (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \ - ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) ))) -#define netdev_priv(x) x->priv -#endif - -/*****************************************************************************/ -/* <= 2.5.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) ) -#include -#undef pci_register_driver -#define pci_register_driver pci_module_init - -/* - * Most of the dma compat code is copied/modified from the 2.4.37 - * /include/linux/libata-compat.h header file - */ -/* These definitions mirror those in pci.h, so they can be used - * interchangeably with their PCI_ counterparts */ -enum dma_data_direction { - DMA_BIDIRECTIONAL = 0, - DMA_TO_DEVICE = 1, - DMA_FROM_DEVICE = 2, - DMA_NONE = 3, -}; - -struct device { - struct pci_dev pdev; -}; - -static inline struct pci_dev *to_pci_dev (struct device *dev) -{ - return (struct pci_dev *) dev; -} -static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) -{ - return (struct device *) pdev; -} - -#define pdev_printk(lvl, pdev, fmt, args...) \ - printk("%s %s: " fmt, lvl, pci_name(pdev), ## args) -#define dev_err(dev, fmt, args...) \ - pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args) -#define dev_info(dev, fmt, args...) \ - pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args) -#define dev_warn(dev, fmt, args...) \ - pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args) - -/* NOTE: dangerous! we ignore the 'gfp' argument */ -#define dma_alloc_coherent(dev,sz,dma,gfp) \ - pci_alloc_consistent(to_pci_dev(dev),(sz),(dma)) -#define dma_free_coherent(dev,sz,addr,dma_addr) \ - pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr)) - -#define dma_map_page(dev,a,b,c,d) \ - pci_map_page(to_pci_dev(dev),(a),(b),(c),(d)) -#define dma_unmap_page(dev,a,b,c) \ - pci_unmap_page(to_pci_dev(dev),(a),(b),(c)) - -#define dma_map_single(dev,a,b,c) \ - pci_map_single(to_pci_dev(dev),(a),(b),(c)) -#define dma_unmap_single(dev,a,b,c) \ - pci_unmap_single(to_pci_dev(dev),(a),(b),(c)) - -#define dma_sync_single(dev,a,b,c) \ - pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c)) - -/* for range just sync everything, that's all the pci API can do */ -#define dma_sync_single_range(dev,addr,off,sz,dir) \ - pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir)) - -#define dma_set_mask(dev,mask) \ - pci_set_dma_mask(to_pci_dev(dev),(mask)) - -/* hlist_* code - double linked lists */ -struct hlist_head { - struct hlist_node *first; -}; - -struct hlist_node { - struct hlist_node *next, **pprev; -}; - -static inline void __hlist_del(struct hlist_node *n) -{ - struct hlist_node *next = n->next; - struct hlist_node **pprev = n->pprev; - *pprev = next; - if (next) - next->pprev = pprev; -} - -static inline void hlist_del(struct hlist_node *n) -{ - __hlist_del(n); - n->next = NULL; - n->pprev = NULL; -} - -static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - if (first) - first->pprev = &n->next; - h->first = n; - n->pprev = &h->first; -} - -static inline int hlist_empty(const struct hlist_head *h) -{ - return !h->first; -} -#define HLIST_HEAD_INIT { .first = NULL } -#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } -#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) -static inline void INIT_HLIST_NODE(struct hlist_node *h) -{ - h->next = NULL; - h->pprev = NULL; -} -#define hlist_entry(ptr, type, member) container_of(ptr,type,member) - -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) - -#ifndef might_sleep -#define might_sleep() -#endif -#else -static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) -{ - return &pdev->dev; -} -#endif /* <= 2.5.0 */ - -/*****************************************************************************/ -/* 2.5.28 => 2.4.23 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) ) - -static inline void _kc_synchronize_irq(void) -{ - synchronize_irq(); -} -#undef synchronize_irq -#define synchronize_irq(X) _kc_synchronize_irq() - -#include -#define work_struct tq_struct -#undef INIT_WORK -#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a) -#undef container_of -#define container_of list_entry -#define schedule_work schedule_task -#define flush_scheduled_work flush_scheduled_tasks -#define cancel_work_sync(x) flush_scheduled_work() - -#endif /* 2.5.28 => 2.4.17 */ - -/*****************************************************************************/ -/* 2.6.0 => 2.5.28 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -#undef get_cpu -#define get_cpu() smp_processor_id() -#undef put_cpu -#define put_cpu() do { } while(0) -#define MODULE_INFO(version, _version) -#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT -#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1 -#endif -#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1 - -#define dma_set_coherent_mask(dev,mask) 1 - -#undef dev_put -#define dev_put(dev) __dev_put(dev) - -#ifndef skb_fill_page_desc -#define skb_fill_page_desc _kc_skb_fill_page_desc -extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size); -#endif - -#undef ALIGN -#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) - -#ifndef page_count -#define page_count(p) atomic_read(&(p)->count) -#endif - -#ifdef MAX_NUMNODES -#undef MAX_NUMNODES -#endif -#define MAX_NUMNODES 1 - -/* find_first_bit and find_next bit are not defined for most - * 2.4 kernels (except for the redhat 2.4.21 kernels - */ -#include -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) -#undef find_next_bit -#define find_next_bit _kc_find_next_bit -extern unsigned long _kc_find_next_bit(const unsigned long *addr, - unsigned long size, - unsigned long offset); -#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) - - -#ifndef netdev_name -static inline const char *_kc_netdev_name(const struct net_device *dev) -{ - if (strchr(dev->name, '%')) - return "(unregistered net_device)"; - return dev->name; -} -#define netdev_name(netdev) _kc_netdev_name(netdev) -#endif /* netdev_name */ - -#ifndef strlcpy -#define strlcpy _kc_strlcpy -extern size_t _kc_strlcpy(char *dest, const char *src, size_t size); -#endif /* strlcpy */ - -#endif /* 2.6.0 => 2.5.28 */ - -/*****************************************************************************/ -/* 2.6.4 => 2.6.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) -#define MODULE_VERSION(_version) MODULE_INFO(version, _version) -#endif /* 2.6.4 => 2.6.0 */ - -/*****************************************************************************/ -/* 2.6.5 => 2.6.0 */ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) -#define dma_sync_single_for_cpu dma_sync_single -#define dma_sync_single_for_device dma_sync_single -#define dma_sync_single_range_for_cpu dma_sync_single_range -#define dma_sync_single_range_for_device dma_sync_single_range -#ifndef pci_dma_mapping_error -#define pci_dma_mapping_error _kc_pci_dma_mapping_error -static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr) -{ - return dma_addr == 0; -} -#endif -#endif /* 2.6.5 => 2.6.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) -extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...); -#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args) -#endif /* < 2.6.4 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) ) -/* taken from 2.6 include/linux/bitmap.h */ -#undef bitmap_zero -#define bitmap_zero _kc_bitmap_zero -static inline void _kc_bitmap_zero(unsigned long *dst, int nbits) -{ - if (nbits <= BITS_PER_LONG) - *dst = 0UL; - else { - int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); - memset(dst, 0, len); - } -} -#define random_ether_addr _kc_random_ether_addr -static inline void _kc_random_ether_addr(u8 *addr) -{ - get_random_bytes(addr, ETH_ALEN); - addr[0] &= 0xfe; /* clear multicast */ - addr[0] |= 0x02; /* set local assignment */ -} -#define page_to_nid(x) 0 - -#endif /* < 2.6.6 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) ) -#undef if_mii -#define if_mii _kc_if_mii -static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq) -{ - return (struct mii_ioctl_data *) &rq->ifr_ifru; -} - -#ifndef __force -#define __force -#endif -#endif /* < 2.6.7 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) -#ifndef PCI_EXP_DEVCTL -#define PCI_EXP_DEVCTL 8 -#endif -#ifndef PCI_EXP_DEVCTL_CERE -#define PCI_EXP_DEVCTL_CERE 0x0001 -#endif -#define msleep(x) do { set_current_state(TASK_UNINTERRUPTIBLE); \ - schedule_timeout((x * HZ)/1000 + 2); \ - } while (0) - -#endif /* < 2.6.8 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) -#include -#define __iomem - -#ifndef kcalloc -#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags) -extern void *_kc_kzalloc(size_t size, int flags); -#endif -#define MSEC_PER_SEC 1000L -static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j) -{ -#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) - return (MSEC_PER_SEC / HZ) * j; -#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) - return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); -#else - return (j * MSEC_PER_SEC) / HZ; -#endif -} -static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m) -{ - if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET)) - return MAX_JIFFY_OFFSET; -#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) - return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); -#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) - return m * (HZ / MSEC_PER_SEC); -#else - return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; -#endif -} - -#define msleep_interruptible _kc_msleep_interruptible -static inline unsigned long _kc_msleep_interruptible(unsigned int msecs) -{ - unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1; - - while (timeout && !signal_pending(current)) { - __set_current_state(TASK_INTERRUPTIBLE); - timeout = schedule_timeout(timeout); - } - return _kc_jiffies_to_msecs(timeout); -} - -/* Basic mode control register. */ -#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */ - -#ifndef __le16 -#define __le16 u16 -#endif -#ifndef __le32 -#define __le32 u32 -#endif -#ifndef __le64 -#define __le64 u64 -#endif -#ifndef __be16 -#define __be16 u16 -#endif -#ifndef __be32 -#define __be32 u32 -#endif -#ifndef __be64 -#define __be64 u64 -#endif - -static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) -{ - return (struct vlan_ethhdr *)skb->mac.raw; -} - -/* Wake-On-Lan options. */ -#define WAKE_PHY (1 << 0) -#define WAKE_UCAST (1 << 1) -#define WAKE_MCAST (1 << 2) -#define WAKE_BCAST (1 << 3) -#define WAKE_ARP (1 << 4) -#define WAKE_MAGIC (1 << 5) -#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ - -#define skb_header_pointer _kc_skb_header_pointer -static inline void *_kc_skb_header_pointer(const struct sk_buff *skb, - int offset, int len, void *buffer) -{ - int hlen = skb_headlen(skb); - - if (hlen - offset >= len) - return skb->data + offset; - -#ifdef MAX_SKB_FRAGS - if (skb_copy_bits(skb, offset, buffer, len) < 0) - return NULL; - - return buffer; -#else - return NULL; -#endif - -#ifndef NETDEV_TX_OK -#define NETDEV_TX_OK 0 -#endif -#ifndef NETDEV_TX_BUSY -#define NETDEV_TX_BUSY 1 -#endif -#ifndef NETDEV_TX_LOCKED -#define NETDEV_TX_LOCKED -1 -#endif -} - -#ifndef __bitwise -#define __bitwise -#endif -#endif /* < 2.6.9 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) -#ifdef module_param_array_named -#undef module_param_array_named -#define module_param_array_named(name, array, type, nump, perm) \ - static struct kparam_array __param_arr_##name \ - = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \ - sizeof(array[0]), array }; \ - module_param_call(name, param_array_set, param_array_get, \ - &__param_arr_##name, perm) -#endif /* module_param_array_named */ -/* - * num_online is broken for all < 2.6.10 kernels. This is needed to support - * Node module parameter of ixgbe. - */ -#undef num_online_nodes -#define num_online_nodes(n) 1 -extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES); -#undef node_online_map -#define node_online_map _kcompat_node_online_map -#endif /* < 2.6.10 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) ) -#define PCI_D0 0 -#define PCI_D1 1 -#define PCI_D2 2 -#define PCI_D3hot 3 -#define PCI_D3cold 4 -typedef int pci_power_t; -#define pci_choose_state(pdev,state) state -#define PMSG_SUSPEND 3 -#define PCI_EXP_LNKCTL 16 - -#undef NETIF_F_LLTX - -#ifndef ARCH_HAS_PREFETCH -#define prefetch(X) -#endif - -#ifndef NET_IP_ALIGN -#define NET_IP_ALIGN 2 -#endif - -#define KC_USEC_PER_SEC 1000000L -#define usecs_to_jiffies _kc_usecs_to_jiffies -static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j) -{ -#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) - return (KC_USEC_PER_SEC / HZ) * j; -#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) - return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC); -#else - return (j * KC_USEC_PER_SEC) / HZ; -#endif -} -static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m) -{ - if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET)) - return MAX_JIFFY_OFFSET; -#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) - return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ); -#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) - return m * (HZ / KC_USEC_PER_SEC); -#else - return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC; -#endif -} -#endif /* < 2.6.11 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) ) -#include -#define USE_REBOOT_NOTIFIER - -/* Generic MII registers. */ -#define MII_CTRL1000 0x09 /* 1000BASE-T control */ -#define MII_STAT1000 0x0a /* 1000BASE-T status */ -/* Advertisement control register. */ -#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ -#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymmetric pause */ -/* 1000BASE-T Control register */ -#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ -#ifndef is_zero_ether_addr -#define is_zero_ether_addr _kc_is_zero_ether_addr -static inline int _kc_is_zero_ether_addr(const u8 *addr) -{ - return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); -} -#endif /* is_zero_ether_addr */ -#ifndef is_multicast_ether_addr -#define is_multicast_ether_addr _kc_is_multicast_ether_addr -static inline int _kc_is_multicast_ether_addr(const u8 *addr) -{ - return addr[0] & 0x01; -} -#endif /* is_multicast_ether_addr */ -#endif /* < 2.6.12 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) -#ifndef kstrdup -#define kstrdup _kc_kstrdup -extern char *_kc_kstrdup(const char *s, unsigned int gfp); -#endif -#endif /* < 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) -#define pm_message_t u32 -#ifndef kzalloc -#define kzalloc _kc_kzalloc -extern void *_kc_kzalloc(size_t size, int flags); -#endif - -/* Generic MII registers. */ -#define MII_ESTATUS 0x0f /* Extended Status */ -/* Basic mode status register. */ -#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ -/* Extended status register. */ -#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */ -#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */ - -#define ADVERTISED_Pause (1 << 13) -#define ADVERTISED_Asym_Pause (1 << 14) - -#if (!(RHEL_RELEASE_CODE && \ - (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)))) -#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t)) -#define gfp_t unsigned -#else -typedef unsigned gfp_t; -#endif -#endif /* !RHEL4.3->RHEL5.0 */ - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) ) -#ifdef CONFIG_X86_64 -#define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir) \ - dma_sync_single_for_cpu(dev, dma_handle, size, dir) -#define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir) \ - dma_sync_single_for_device(dev, dma_handle, size, dir) -#endif -#endif -#endif /* < 2.6.14 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) ) -#ifndef vmalloc_node -#define vmalloc_node(a,b) vmalloc(a) -#endif /* vmalloc_node*/ - -#define setup_timer(_timer, _function, _data) \ -do { \ - (_timer)->function = _function; \ - (_timer)->data = _data; \ - init_timer(_timer); \ -} while (0) -#ifndef device_can_wakeup -#define device_can_wakeup(dev) (1) -#endif -#ifndef device_set_wakeup_enable -#define device_set_wakeup_enable(dev, val) do{}while(0) -#endif -#ifndef device_init_wakeup -#define device_init_wakeup(dev,val) do {} while (0) -#endif -static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2) -{ - const u16 *a = (const u16 *) addr1; - const u16 *b = (const u16 *) addr2; - - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; -} -#undef compare_ether_addr -#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2) -#endif /* < 2.6.15 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) ) -#undef DEFINE_MUTEX -#define DEFINE_MUTEX(x) DECLARE_MUTEX(x) -#define mutex_lock(x) down_interruptible(x) -#define mutex_unlock(x) up(x) - -#ifndef ____cacheline_internodealigned_in_smp -#ifdef CONFIG_SMP -#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp -#else -#define ____cacheline_internodealigned_in_smp -#endif /* CONFIG_SMP */ -#endif /* ____cacheline_internodealigned_in_smp */ -#undef HAVE_PCI_ERS -#else /* 2.6.16 and above */ -#undef HAVE_PCI_ERS -#define HAVE_PCI_ERS -#endif /* < 2.6.16 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) ) -#ifndef first_online_node -#define first_online_node 0 -#endif -#ifndef NET_SKB_PAD -#define NET_SKB_PAD 16 -#endif -#endif /* < 2.6.17 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ) - -#ifndef IRQ_HANDLED -#define irqreturn_t void -#define IRQ_HANDLED -#define IRQ_NONE -#endif - -#ifndef IRQF_PROBE_SHARED -#ifdef SA_PROBEIRQ -#define IRQF_PROBE_SHARED SA_PROBEIRQ -#else -#define IRQF_PROBE_SHARED 0 -#endif -#endif - -#ifndef IRQF_SHARED -#define IRQF_SHARED SA_SHIRQ -#endif - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - -#ifndef FIELD_SIZEOF -#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) -#endif - -#ifndef skb_is_gso -#ifdef NETIF_F_TSO -#define skb_is_gso _kc_skb_is_gso -static inline int _kc_skb_is_gso(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->gso_size; -} -#else -#define skb_is_gso(a) 0 -#endif -#endif - -#ifndef resource_size_t -#define resource_size_t unsigned long -#endif - -#ifdef skb_pad -#undef skb_pad -#endif -#define skb_pad(x,y) _kc_skb_pad(x, y) -int _kc_skb_pad(struct sk_buff *skb, int pad); -#ifdef skb_padto -#undef skb_padto -#endif -#define skb_padto(x,y) _kc_skb_padto(x, y) -static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len) -{ - unsigned int size = skb->len; - if(likely(size >= len)) - return 0; - return _kc_skb_pad(skb, len - size); -} - -#ifndef DECLARE_PCI_UNMAP_ADDR -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ - dma_addr_t ADDR_NAME -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ - u32 LEN_NAME -#define pci_unmap_addr(PTR, ADDR_NAME) \ - ((PTR)->ADDR_NAME) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ - (((PTR)->ADDR_NAME) = (VAL)) -#define pci_unmap_len(PTR, LEN_NAME) \ - ((PTR)->LEN_NAME) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ - (((PTR)->LEN_NAME) = (VAL)) -#endif /* DECLARE_PCI_UNMAP_ADDR */ -#endif /* < 2.6.18 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) - -#ifndef DIV_ROUND_UP -#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) -#endif -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) ) -#if (!((RHEL_RELEASE_CODE && \ - ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \ - RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \ - (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0)))) || \ - (AX_RELEASE_CODE && AX_RELEASE_CODE > AX_RELEASE_VERSION(3,0)))) -typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *); -#endif -#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) -#undef CONFIG_INET_LRO -#undef CONFIG_INET_LRO_MODULE -#undef CONFIG_FCOE -#undef CONFIG_FCOE_MODULE -#endif -typedef irqreturn_t (*new_handler_t)(int, void*); -static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) -#else /* 2.4.x */ -typedef void (*irq_handler_t)(int, void*, struct pt_regs *); -typedef void (*new_handler_t)(int, void*); -static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) -#endif /* >= 2.5.x */ -{ - irq_handler_t new_handler = (irq_handler_t) handler; - return request_irq(irq, new_handler, flags, devname, dev_id); -} - -#undef request_irq -#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id)) - -#define irq_handler_t new_handler_t -/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */ -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) -#define PCIE_CONFIG_SPACE_LEN 256 -#define PCI_CONFIG_SPACE_LEN 64 -#define PCIE_LINK_STATUS 0x12 -#define pci_config_space_ich8lan() do {} while(0) -#undef pci_save_state -extern int _kc_pci_save_state(struct pci_dev *); -#define pci_save_state(pdev) _kc_pci_save_state(pdev) -#undef pci_restore_state -extern void _kc_pci_restore_state(struct pci_dev *); -#define pci_restore_state(pdev) _kc_pci_restore_state(pdev) -#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ - -#ifdef HAVE_PCI_ERS -#undef free_netdev -extern void _kc_free_netdev(struct net_device *); -#define free_netdev(netdev) _kc_free_netdev(netdev) -#endif -static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) -{ - return 0; -} -#define pci_disable_pcie_error_reporting(dev) do {} while (0) -#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0) - -extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp); -#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp) -#ifndef bool -#define bool _Bool -#define true 1 -#define false 0 -#endif -#else /* 2.6.19 */ -#include -#include -#endif /* < 2.6.19 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) ) -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) ) -#undef INIT_WORK -#define INIT_WORK(_work, _func) \ -do { \ - INIT_LIST_HEAD(&(_work)->entry); \ - (_work)->pending = 0; \ - (_work)->func = (void (*)(void *))_func; \ - (_work)->data = _work; \ - init_timer(&(_work)->timer); \ -} while (0) -#endif - -#ifndef PCI_VDEVICE -#define PCI_VDEVICE(ven, dev) \ - PCI_VENDOR_ID_##ven, (dev), \ - PCI_ANY_ID, PCI_ANY_ID, 0, 0 -#endif - -#ifndef round_jiffies -#define round_jiffies(x) x -#endif - -#define csum_offset csum - -#define HAVE_EARLY_VMALLOC_NODE -#define dev_to_node(dev) -1 -#undef set_dev_node -/* remove compiler warning with b=b, for unused variable */ -#define set_dev_node(a, b) do { (b) = (b); } while(0) - -#if (!(RHEL_RELEASE_CODE && \ - (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \ - !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) -typedef __u16 __bitwise __sum16; -typedef __u32 __bitwise __wsum; -#endif - -#if (!(RHEL_RELEASE_CODE && \ - (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \ - !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) -static inline __wsum csum_unfold(__sum16 n) -{ - return (__force __wsum)n; -} -#endif - -#else /* < 2.6.20 */ -#define HAVE_DEVICE_NUMA_NODE -#endif /* < 2.6.20 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) -#define to_net_dev(class) container_of(class, struct net_device, class_dev) -#define NETDEV_CLASS_DEV -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5))) -#define vlan_group_get_device(vg, id) (vg->vlan_devices[id]) -#define vlan_group_set_device(vg, id, dev) \ - do { \ - if (vg) vg->vlan_devices[id] = dev; \ - } while (0) -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */ -#define pci_channel_offline(pdev) (pdev->error_state && \ - pdev->error_state != pci_channel_io_normal) -#define pci_request_selected_regions(pdev, bars, name) \ - pci_request_regions(pdev, name) -#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev); -#endif /* < 2.6.21 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) -#define tcp_hdr(skb) (skb->h.th) -#define tcp_hdrlen(skb) (skb->h.th->doff << 2) -#define skb_transport_offset(skb) (skb->h.raw - skb->data) -#define skb_transport_header(skb) (skb->h.raw) -#define ipv6_hdr(skb) (skb->nh.ipv6h) -#define ip_hdr(skb) (skb->nh.iph) -#define skb_network_offset(skb) (skb->nh.raw - skb->data) -#define skb_network_header(skb) (skb->nh.raw) -#define skb_tail_pointer(skb) skb->tail -#define skb_reset_tail_pointer(skb) \ - do { \ - skb->tail = skb->data; \ - } while (0) -#define skb_copy_to_linear_data(skb, from, len) \ - memcpy(skb->data, from, len) -#define skb_copy_to_linear_data_offset(skb, offset, from, len) \ - memcpy(skb->data + offset, from, len) -#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw) -#define pci_register_driver pci_module_init -#define skb_mac_header(skb) skb->mac.raw - -#ifdef NETIF_F_MULTI_QUEUE -#ifndef alloc_etherdev_mq -#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a) -#endif -#endif /* NETIF_F_MULTI_QUEUE */ - -#ifndef ETH_FCS_LEN -#define ETH_FCS_LEN 4 -#endif -#define cancel_work_sync(x) flush_scheduled_work() -#ifndef udp_hdr -#define udp_hdr _udp_hdr -static inline struct udphdr *_udp_hdr(const struct sk_buff *skb) -{ - return (struct udphdr *)skb_transport_header(skb); -} -#endif - -#ifdef cpu_to_be16 -#undef cpu_to_be16 -#endif -#define cpu_to_be16(x) __constant_htons(x) - -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1))) -enum { - DUMP_PREFIX_NONE, - DUMP_PREFIX_ADDRESS, - DUMP_PREFIX_OFFSET -}; -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */ -#ifndef hex_asc -#define hex_asc(x) "0123456789abcdef"[x] -#endif -#include -extern void _kc_print_hex_dump(const char *level, const char *prefix_str, - int prefix_type, int rowsize, int groupsize, - const void *buf, size_t len, bool ascii); -#define print_hex_dump(lvl, s, t, r, g, b, l, a) \ - _kc_print_hex_dump(lvl, s, t, r, g, b, l, a) -#else /* 2.6.22 */ -#define ETH_TYPE_TRANS_SETS_DEV -#define HAVE_NETDEV_STATS_IN_NETDEV -#endif /* < 2.6.22 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) ) -#endif /* > 2.6.22 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) ) -#define netif_subqueue_stopped(_a, _b) 0 -#ifndef PTR_ALIGN -#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) -#endif - -#ifndef CONFIG_PM_SLEEP -#define CONFIG_PM_SLEEP CONFIG_PM -#endif - -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) ) -#define HAVE_ETHTOOL_GET_PERM_ADDR -#endif /* 2.6.14 through 2.6.22 */ -#endif /* < 2.6.23 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) -#ifndef ETH_FLAG_LRO -#define ETH_FLAG_LRO NETIF_F_LRO -#endif - -/* if GRO is supported then the napi struct must already exist */ -#ifndef NETIF_F_GRO -/* NAPI API changes in 2.6.24 break everything */ -struct napi_struct { - /* used to look up the real NAPI polling routine */ - int (*poll)(struct napi_struct *, int); - struct net_device *dev; - int weight; -}; -#endif - -#ifdef NAPI -extern int __kc_adapter_clean(struct net_device *, int *); -extern struct net_device *napi_to_poll_dev(struct napi_struct *napi); -#define netif_napi_add(_netdev, _napi, _poll, _weight) \ - do { \ - struct napi_struct *__napi = (_napi); \ - struct net_device *poll_dev = napi_to_poll_dev(__napi); \ - poll_dev->poll = &(__kc_adapter_clean); \ - poll_dev->priv = (_napi); \ - poll_dev->weight = (_weight); \ - set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \ - set_bit(__LINK_STATE_START, &poll_dev->state);\ - dev_hold(poll_dev); \ - __napi->poll = &(_poll); \ - __napi->weight = (_weight); \ - __napi->dev = (_netdev); \ - } while (0) -#define netif_napi_del(_napi) \ - do { \ - struct net_device *poll_dev = napi_to_poll_dev(_napi); \ - WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \ - dev_put(poll_dev); \ - memset(poll_dev, 0, sizeof(struct net_device));\ - } while (0) -#define napi_schedule_prep(_napi) \ - (netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi))) -#define napi_schedule(_napi) \ - do { \ - if (napi_schedule_prep(_napi)) \ - __netif_rx_schedule(napi_to_poll_dev(_napi)); \ - } while (0) -#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi)) -#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi)) -#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi)) -#ifndef NETIF_F_GRO -#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi)) -#else -#define napi_complete(_napi) \ - do { \ - napi_gro_flush(_napi); \ - netif_rx_complete(napi_to_poll_dev(_napi)); \ - } while (0) -#endif /* NETIF_F_GRO */ -#else /* NAPI */ -#define netif_napi_add(_netdev, _napi, _poll, _weight) \ - do { \ - struct napi_struct *__napi = _napi; \ - _netdev->poll = &(_poll); \ - _netdev->weight = (_weight); \ - __napi->poll = &(_poll); \ - __napi->weight = (_weight); \ - __napi->dev = (_netdev); \ - } while (0) -#define netif_napi_del(_a) do {} while (0) -#endif /* NAPI */ - -#undef dev_get_by_name -#define dev_get_by_name(_a, _b) dev_get_by_name(_b) -#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b) -#ifndef DMA_BIT_MASK -#define DMA_BIT_MASK(n) (((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1)) -#endif - -#ifdef NETIF_F_TSO6 -#define skb_is_gso_v6 _kc_skb_is_gso_v6 -static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; -} -#endif /* NETIF_F_TSO6 */ - -#ifndef KERN_CONT -#define KERN_CONT "" -#endif -#else /* < 2.6.24 */ -#define HAVE_ETHTOOL_GET_SSET_COUNT -#define HAVE_NETDEV_NAPI_LIST -#endif /* < 2.6.24 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) ) -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) -#include -#else /* >= 3.2.0 */ -#include -#endif /* else >= 3.2.0 */ -#endif /* > 2.6.24 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) ) -#define PM_QOS_CPU_DMA_LATENCY 1 - -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) ) -#include -#define PM_QOS_DEFAULT_VALUE INFINITE_LATENCY -#define pm_qos_add_requirement(pm_qos_class, name, value) \ - set_acceptable_latency(name, value) -#define pm_qos_remove_requirement(pm_qos_class, name) \ - remove_acceptable_latency(name) -#define pm_qos_update_requirement(pm_qos_class, name, value) \ - modify_acceptable_latency(name, value) -#else -#define PM_QOS_DEFAULT_VALUE -1 -#define pm_qos_add_requirement(pm_qos_class, name, value) -#define pm_qos_remove_requirement(pm_qos_class, name) -#define pm_qos_update_requirement(pm_qos_class, name, value) { \ - if (value != PM_QOS_DEFAULT_VALUE) { \ - printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \ - pci_name(adapter->pdev)); \ - } \ -} - -#endif /* > 2.6.18 */ - -#define pci_enable_device_mem(pdev) pci_enable_device(pdev) - -#ifndef DEFINE_PCI_DEVICE_TABLE -#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[] -#endif /* DEFINE_PCI_DEVICE_TABLE */ - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) -#ifndef IXGBE_PROCFS -#define IXGBE_PROCFS -#endif /* IXGBE_PROCFS */ -#endif /* >= 2.6.0 */ - - -#else /* < 2.6.25 */ - -#ifndef IXGBE_SYSFS -#define IXGBE_SYSFS -#endif /* IXGBE_SYSFS */ - - -#endif /* < 2.6.25 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) -#ifndef clamp_t -#define clamp_t(type, val, min, max) ({ \ - type __val = (val); \ - type __min = (min); \ - type __max = (max); \ - __val = __val < __min ? __min : __val; \ - __val > __max ? __max : __val; }) -#endif /* clamp_t */ -#ifdef NETIF_F_TSO -#ifdef NETIF_F_TSO6 -#define netif_set_gso_max_size(_netdev, size) \ - do { \ - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { \ - _netdev->features &= ~NETIF_F_TSO; \ - _netdev->features &= ~NETIF_F_TSO6; \ - } else { \ - _netdev->features |= NETIF_F_TSO; \ - _netdev->features |= NETIF_F_TSO6; \ - } \ - } while (0) -#else /* NETIF_F_TSO6 */ -#define netif_set_gso_max_size(_netdev, size) \ - do { \ - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) \ - _netdev->features &= ~NETIF_F_TSO; \ - else \ - _netdev->features |= NETIF_F_TSO; \ - } while (0) -#endif /* NETIF_F_TSO6 */ -#else -#define netif_set_gso_max_size(_netdev, size) do {} while (0) -#endif /* NETIF_F_TSO */ -#undef kzalloc_node -#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags) - -extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state); -#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s) -#else /* < 2.6.26 */ -#include -#define HAVE_NETDEV_VLAN_FEATURES -#endif /* < 2.6.26 */ -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) -static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep, - __u32 speed) -{ - ep->speed = (__u16)speed; - /* ep->speed_hi = (__u16)(speed >> 16); */ -} -#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set - -static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep) -{ - /* no speed_hi before 2.6.27, and probably no need for it yet */ - return (__u32)ep->speed; -} -#define ethtool_cmd_speed _kc_ethtool_cmd_speed - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) ) -#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM)) -#define ANCIENT_PM 1 -#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \ - defined(CONFIG_PM_SLEEP)) -#define NEWER_PM 1 -#endif -#if defined(ANCIENT_PM) || defined(NEWER_PM) -#undef device_set_wakeup_enable -#define device_set_wakeup_enable(dev, val) \ - do { \ - u16 pmc = 0; \ - int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \ - if (pm) { \ - pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \ - &pmc); \ - } \ - (dev)->power.can_wakeup = !!(pmc >> 11); \ - (dev)->power.should_wakeup = (val && (pmc >> 11)); \ - } while (0) -#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */ -#endif /* 2.6.15 through 2.6.27 */ -#ifndef netif_napi_del -#define netif_napi_del(_a) do {} while (0) -#ifdef NAPI -#ifdef CONFIG_NETPOLL -#undef netif_napi_del -#define netif_napi_del(_a) list_del(&(_a)->dev_list); -#endif -#endif -#endif /* netif_napi_del */ -#ifdef dma_mapping_error -#undef dma_mapping_error -#endif -#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr) - -#ifdef CONFIG_NETDEVICES_MULTIQUEUE -#define HAVE_TX_MQ -#endif - -#ifdef HAVE_TX_MQ -extern void _kc_netif_tx_stop_all_queues(struct net_device *); -extern void _kc_netif_tx_wake_all_queues(struct net_device *); -extern void _kc_netif_tx_start_all_queues(struct net_device *); -#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a) -#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a) -#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a) -#undef netif_stop_subqueue -#define netif_stop_subqueue(_ndev,_qi) do { \ - if (netif_is_multiqueue((_ndev))) \ - netif_stop_subqueue((_ndev), (_qi)); \ - else \ - netif_stop_queue((_ndev)); \ - } while (0) -#undef netif_start_subqueue -#define netif_start_subqueue(_ndev,_qi) do { \ - if (netif_is_multiqueue((_ndev))) \ - netif_start_subqueue((_ndev), (_qi)); \ - else \ - netif_start_queue((_ndev)); \ - } while (0) -#else /* HAVE_TX_MQ */ -#define netif_tx_stop_all_queues(a) netif_stop_queue(a) -#define netif_tx_wake_all_queues(a) netif_wake_queue(a) -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) ) -#define netif_tx_start_all_queues(a) netif_start_queue(a) -#else -#define netif_tx_start_all_queues(a) do {} while (0) -#endif -#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev)) -#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev)) -#endif /* HAVE_TX_MQ */ -#ifndef NETIF_F_MULTI_QUEUE -#define NETIF_F_MULTI_QUEUE 0 -#define netif_is_multiqueue(a) 0 -#define netif_wake_subqueue(a, b) -#endif /* NETIF_F_MULTI_QUEUE */ - -#ifndef __WARN_printf -extern void __kc_warn_slowpath(const char *file, const int line, - const char *fmt, ...) __attribute__((format(printf, 3, 4))); -#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg) -#endif /* __WARN_printf */ - -#ifndef WARN -#define WARN(condition, format...) ({ \ - int __ret_warn_on = !!(condition); \ - if (unlikely(__ret_warn_on)) \ - __WARN_printf(format); \ - unlikely(__ret_warn_on); \ -}) -#endif /* WARN */ -#else /* < 2.6.27 */ -#define HAVE_TX_MQ -#define HAVE_NETDEV_SELECT_QUEUE -#endif /* < 2.6.27 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) -#define pci_ioremap_bar(pdev, bar) ioremap(pci_resource_start(pdev, bar), \ - pci_resource_len(pdev, bar)) -#define pci_wake_from_d3 _kc_pci_wake_from_d3 -#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep -extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable); -extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev); -#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC) -#ifndef __skb_queue_head_init -static inline void __kc_skb_queue_head_init(struct sk_buff_head *list) -{ - list->prev = list->next = (struct sk_buff *)list; - list->qlen = 0; -} -#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q) -#endif -#endif /* < 2.6.28 */ - -#ifndef skb_add_rx_frag -#define skb_add_rx_frag _kc_skb_add_rx_frag -extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *, int, int); -#endif - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) -#ifndef swap -#define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) -#endif -#define pci_request_selected_regions_exclusive(pdev, bars, name) \ - pci_request_selected_regions(pdev, bars, name) -#ifndef CONFIG_NR_CPUS -#define CONFIG_NR_CPUS 1 -#endif /* CONFIG_NR_CPUS */ -#ifndef pcie_aspm_enabled -#define pcie_aspm_enabled() (1) -#endif /* pcie_aspm_enabled */ -#else /* < 2.6.29 */ -#ifndef HAVE_NET_DEVICE_OPS -#define HAVE_NET_DEVICE_OPS -#endif -#ifdef CONFIG_DCB -#define HAVE_PFC_MODE_ENABLE -#endif /* CONFIG_DCB */ -#endif /* < 2.6.29 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) ) -#define skb_rx_queue_recorded(a) false -#define skb_get_rx_queue(a) 0 -#undef CONFIG_FCOE -#undef CONFIG_FCOE_MODULE -extern u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb); -#define skb_tx_hash(n, s) _kc_skb_tx_hash(n, s) -#define skb_record_rx_queue(a, b) do {} while (0) -#ifndef CONFIG_PCI_IOV -#undef pci_enable_sriov -#define pci_enable_sriov(a, b) -ENOTSUPP -#undef pci_disable_sriov -#define pci_disable_sriov(a) do {} while (0) -#endif /* CONFIG_PCI_IOV */ -#ifndef pr_cont -#define pr_cont(fmt, ...) \ - printk(KERN_CONT fmt, ##__VA_ARGS__) -#endif /* pr_cont */ -#else -#define HAVE_ASPM_QUIRKS -#endif /* < 2.6.30 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) ) -#define ETH_P_1588 0x88F7 -#define ETH_P_FIP 0x8914 -#ifndef netdev_uc_count -#define netdev_uc_count(dev) ((dev)->uc_count) -#endif -#ifndef netdev_for_each_uc_addr -#define netdev_for_each_uc_addr(uclist, dev) \ - for (uclist = dev->uc_list; uclist; uclist = uclist->next) -#endif -#else -#ifndef HAVE_NETDEV_STORAGE_ADDRESS -#define HAVE_NETDEV_STORAGE_ADDRESS -#endif -#ifndef HAVE_NETDEV_HW_ADDR -#define HAVE_NETDEV_HW_ADDR -#endif -#ifndef HAVE_TRANS_START_IN_QUEUE -#define HAVE_TRANS_START_IN_QUEUE -#endif -#endif /* < 2.6.31 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) ) -#undef netdev_tx_t -#define netdev_tx_t int -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef NETIF_F_FCOE_MTU -#define NETIF_F_FCOE_MTU (1 << 26) -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ - -#ifndef pm_runtime_get_sync -#define pm_runtime_get_sync(dev) do {} while (0) -#endif -#ifndef pm_runtime_put -#define pm_runtime_put(dev) do {} while (0) -#endif -#ifndef pm_runtime_put_sync -#define pm_runtime_put_sync(dev) do {} while (0) -#endif -#ifndef pm_runtime_resume -#define pm_runtime_resume(dev) do {} while (0) -#endif -#ifndef pm_schedule_suspend -#define pm_schedule_suspend(dev, t) do {} while (0) -#endif -#ifndef pm_runtime_set_suspended -#define pm_runtime_set_suspended(dev) do {} while (0) -#endif -#ifndef pm_runtime_disable -#define pm_runtime_disable(dev) do {} while (0) -#endif -#ifndef pm_runtime_put_noidle -#define pm_runtime_put_noidle(dev) do {} while (0) -#endif -#ifndef pm_runtime_set_active -#define pm_runtime_set_active(dev) do {} while (0) -#endif -#ifndef pm_runtime_enable -#define pm_runtime_enable(dev) do {} while (0) -#endif -#ifndef pm_runtime_get_noresume -#define pm_runtime_get_noresume(dev) do {} while (0) -#endif -#else /* < 2.6.32 */ -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE -#define HAVE_NETDEV_OPS_FCOE_ENABLE -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ -#ifdef CONFIG_DCB -#ifndef HAVE_DCBNL_OPS_GETAPP -#define HAVE_DCBNL_OPS_GETAPP -#endif -#endif /* CONFIG_DCB */ -#include -/* IOV bad DMA target work arounds require at least this kernel rev support */ -#define HAVE_PCIE_TYPE -#endif /* < 2.6.32 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) ) -#ifndef pci_pcie_cap -#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP) -#endif -#ifndef IPV4_FLOW -#define IPV4_FLOW 0x10 -#endif /* IPV4_FLOW */ -#ifndef IPV6_FLOW -#define IPV6_FLOW 0x11 -#endif /* IPV6_FLOW */ -/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */ -#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \ - (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) ) -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN -#define HAVE_NETDEV_OPS_FCOE_GETWWN -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ -#endif /* RHEL6 or SLES11 SP1 */ -#ifndef __percpu -#define __percpu -#endif /* __percpu */ -#else /* < 2.6.33 */ -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN -#define HAVE_NETDEV_OPS_FCOE_GETWWN -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ -#define HAVE_ETHTOOL_SFP_DISPLAY_PORT -#endif /* < 2.6.33 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) -#ifndef ETH_FLAG_NTUPLE -#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE -#endif - -#ifndef netdev_mc_count -#define netdev_mc_count(dev) ((dev)->mc_count) -#endif -#ifndef netdev_mc_empty -#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0) -#endif -#ifndef netdev_for_each_mc_addr -#define netdev_for_each_mc_addr(mclist, dev) \ - for (mclist = dev->mc_list; mclist; mclist = mclist->next) -#endif -#ifndef netdev_uc_count -#define netdev_uc_count(dev) ((dev)->uc.count) -#endif -#ifndef netdev_uc_empty -#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0) -#endif -#ifndef netdev_for_each_uc_addr -#define netdev_for_each_uc_addr(ha, dev) \ - list_for_each_entry(ha, &dev->uc.list, list) -#endif -#ifndef dma_set_coherent_mask -#define dma_set_coherent_mask(dev,mask) \ - pci_set_consistent_dma_mask(to_pci_dev(dev),(mask)) -#endif -#ifndef pci_dev_run_wake -#define pci_dev_run_wake(pdev) (0) -#endif - -/* netdev logging taken from include/linux/netdevice.h */ -#ifndef netdev_name -static inline const char *_kc_netdev_name(const struct net_device *dev) -{ - if (dev->reg_state != NETREG_REGISTERED) - return "(unregistered net_device)"; - return dev->name; -} -#define netdev_name(netdev) _kc_netdev_name(netdev) -#endif /* netdev_name */ - -#undef netdev_printk -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -#define netdev_printk(level, netdev, format, args...) \ -do { \ - struct adapter_struct *kc_adapter = netdev_priv(netdev);\ - struct pci_dev *pdev = kc_adapter->pdev; \ - printk("%s %s: " format, level, pci_name(pdev), \ - ##args); \ -} while(0) -#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) -#define netdev_printk(level, netdev, format, args...) \ -do { \ - struct adapter_struct *kc_adapter = netdev_priv(netdev);\ - struct pci_dev *pdev = kc_adapter->pdev; \ - struct device *dev = pci_dev_to_dev(pdev); \ - dev_printk(level, dev, "%s: " format, \ - netdev_name(netdev), ##args); \ -} while(0) -#else /* 2.6.21 => 2.6.34 */ -#define netdev_printk(level, netdev, format, args...) \ - dev_printk(level, (netdev)->dev.parent, \ - "%s: " format, \ - netdev_name(netdev), ##args) -#endif /* <2.6.0 <2.6.21 <2.6.34 */ -#undef netdev_emerg -#define netdev_emerg(dev, format, args...) \ - netdev_printk(KERN_EMERG, dev, format, ##args) -#undef netdev_alert -#define netdev_alert(dev, format, args...) \ - netdev_printk(KERN_ALERT, dev, format, ##args) -#undef netdev_crit -#define netdev_crit(dev, format, args...) \ - netdev_printk(KERN_CRIT, dev, format, ##args) -#undef netdev_err -#define netdev_err(dev, format, args...) \ - netdev_printk(KERN_ERR, dev, format, ##args) -#undef netdev_warn -#define netdev_warn(dev, format, args...) \ - netdev_printk(KERN_WARNING, dev, format, ##args) -#undef netdev_notice -#define netdev_notice(dev, format, args...) \ - netdev_printk(KERN_NOTICE, dev, format, ##args) -#undef netdev_info -#define netdev_info(dev, format, args...) \ - netdev_printk(KERN_INFO, dev, format, ##args) -#undef netdev_dbg -#if defined(DEBUG) -#define netdev_dbg(__dev, format, args...) \ - netdev_printk(KERN_DEBUG, __dev, format, ##args) -#elif defined(CONFIG_DYNAMIC_DEBUG) -#define netdev_dbg(__dev, format, args...) \ -do { \ - dynamic_dev_dbg((__dev)->dev.parent, "%s: " format, \ - netdev_name(__dev), ##args); \ -} while (0) -#else /* DEBUG */ -#define netdev_dbg(__dev, format, args...) \ -({ \ - if (0) \ - netdev_printk(KERN_DEBUG, __dev, format, ##args); \ - 0; \ -}) -#endif /* DEBUG */ - -#undef netif_printk -#define netif_printk(priv, type, level, dev, fmt, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - netdev_printk(level, (dev), fmt, ##args); \ -} while (0) - -#undef netif_emerg -#define netif_emerg(priv, type, dev, fmt, args...) \ - netif_level(emerg, priv, type, dev, fmt, ##args) -#undef netif_alert -#define netif_alert(priv, type, dev, fmt, args...) \ - netif_level(alert, priv, type, dev, fmt, ##args) -#undef netif_crit -#define netif_crit(priv, type, dev, fmt, args...) \ - netif_level(crit, priv, type, dev, fmt, ##args) -#undef netif_err -#define netif_err(priv, type, dev, fmt, args...) \ - netif_level(err, priv, type, dev, fmt, ##args) -#undef netif_warn -#define netif_warn(priv, type, dev, fmt, args...) \ - netif_level(warn, priv, type, dev, fmt, ##args) -#undef netif_notice -#define netif_notice(priv, type, dev, fmt, args...) \ - netif_level(notice, priv, type, dev, fmt, ##args) -#undef netif_info -#define netif_info(priv, type, dev, fmt, args...) \ - netif_level(info, priv, type, dev, fmt, ##args) - -#ifdef SET_SYSTEM_SLEEP_PM_OPS -#define HAVE_SYSTEM_SLEEP_PM_OPS -#endif - -#ifndef for_each_set_bit -#define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) -#endif /* for_each_set_bit */ - -#ifndef DEFINE_DMA_UNMAP_ADDR -#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR -#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN -#define dma_unmap_addr pci_unmap_addr -#define dma_unmap_addr_set pci_unmap_addr_set -#define dma_unmap_len pci_unmap_len -#define dma_unmap_len_set pci_unmap_len_set -#endif /* DEFINE_DMA_UNMAP_ADDR */ -#else /* < 2.6.34 */ -#define HAVE_SYSTEM_SLEEP_PM_OPS -#ifndef HAVE_SET_RX_MODE -#define HAVE_SET_RX_MODE -#endif - -#endif /* < 2.6.34 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) -#ifndef numa_node_id -#define numa_node_id() 0 -#endif -#ifdef HAVE_TX_MQ -#include -#ifndef CONFIG_NETDEVICES_MULTIQUEUE -void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int); -#define netif_set_real_num_tx_queues _kc_netif_set_real_num_tx_queues -#else /* CONFIG_NETDEVICES_MULTI_QUEUE */ -#define netif_set_real_num_tx_queues(_netdev, _count) \ - do { \ - (_netdev)->egress_subqueue_count = _count; \ - } while (0) -#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */ -#else -#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0) -#endif /* HAVE_TX_MQ */ -#ifndef ETH_FLAG_RXHASH -#define ETH_FLAG_RXHASH (1<<28) -#endif /* ETH_FLAG_RXHASH */ -#else /* < 2.6.35 */ -#define HAVE_PM_QOS_REQUEST_LIST -#define HAVE_IRQ_AFFINITY_HINT -#endif /* < 2.6.35 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) -extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32); -#define ethtool_op_set_flags _kc_ethtool_op_set_flags -extern u32 _kc_ethtool_op_get_flags(struct net_device *); -#define ethtool_op_get_flags _kc_ethtool_op_get_flags - -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -#ifdef NET_IP_ALIGN -#undef NET_IP_ALIGN -#endif -#define NET_IP_ALIGN 0 -#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ - -#ifdef NET_SKB_PAD -#undef NET_SKB_PAD -#endif - -#if (L1_CACHE_BYTES > 32) -#define NET_SKB_PAD L1_CACHE_BYTES -#else -#define NET_SKB_PAD 32 -#endif - -static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev, - unsigned int length) -{ - struct sk_buff *skb; - - skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC); - if (skb) { -#if (NET_IP_ALIGN + NET_SKB_PAD) - skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD); -#endif - skb->dev = dev; - } - return skb; -} - -#ifdef netdev_alloc_skb_ip_align -#undef netdev_alloc_skb_ip_align -#endif -#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l) - -#undef netif_level -#define netif_level(level, priv, type, dev, fmt, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - netdev_##level(dev, fmt, ##args); \ -} while (0) - -#undef usleep_range -#define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000)) - -#else /* < 2.6.36 */ -#define HAVE_PM_QOS_REQUEST_ACTIVE -#define HAVE_8021P_SUPPORT -#define HAVE_NDO_GET_STATS64 -#endif /* < 2.6.36 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) ) -#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR -#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) -#endif -#ifndef VLAN_N_VID -#define VLAN_N_VID VLAN_GROUP_ARRAY_LEN -#endif /* VLAN_N_VID */ -#ifndef ETH_FLAG_TXVLAN -#define ETH_FLAG_TXVLAN (1 << 7) -#endif /* ETH_FLAG_TXVLAN */ -#ifndef ETH_FLAG_RXVLAN -#define ETH_FLAG_RXVLAN (1 << 8) -#endif /* ETH_FLAG_RXVLAN */ - -static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb) -{ - WARN_ON(skb->ip_summed != CHECKSUM_NONE); -} -#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb) - -static inline void *_kc_vzalloc_node(unsigned long size, int node) -{ - void *addr = vmalloc_node(size, node); - if (addr) - memset(addr, 0, size); - return addr; -} -#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node) - -static inline void *_kc_vzalloc(unsigned long size) -{ - void *addr = vmalloc(size); - if (addr) - memset(addr, 0, size); - return addr; -} -#define vzalloc(_size) _kc_vzalloc(_size) - -#ifndef vlan_get_protocol -static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb) -{ - if (vlan_tx_tag_present(skb) || - skb->protocol != cpu_to_be16(ETH_P_8021Q)) - return skb->protocol; - - if (skb_headlen(skb) < sizeof(struct vlan_ethhdr)) - return 0; - - return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto; -} -#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb) -#endif -#ifdef HAVE_HW_TIME_STAMP -#define SKBTX_HW_TSTAMP (1 << 0) -#define SKBTX_IN_PROGRESS (1 << 2) -#define SKB_SHARED_TX_IS_UNION -#endif -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) ) -#ifndef HAVE_VLAN_RX_REGISTER -#define HAVE_VLAN_RX_REGISTER -#endif -#endif /* > 2.4.18 */ -#endif /* < 2.6.37 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) ) -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) -#define skb_checksum_start_offset(skb) skb_transport_offset(skb) -#else /* 2.6.22 -> 2.6.37 */ -static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb) -{ - return skb->csum_start - skb_headroom(skb); -} -#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb) -#endif /* 2.6.22 -> 2.6.37 */ -#ifdef CONFIG_DCB -#ifndef IEEE_8021QAZ_MAX_TCS -#define IEEE_8021QAZ_MAX_TCS 8 -#endif -#ifndef DCB_CAP_DCBX_HOST -#define DCB_CAP_DCBX_HOST 0x01 -#endif -#ifndef DCB_CAP_DCBX_LLD_MANAGED -#define DCB_CAP_DCBX_LLD_MANAGED 0x02 -#endif -#ifndef DCB_CAP_DCBX_VER_CEE -#define DCB_CAP_DCBX_VER_CEE 0x04 -#endif -#ifndef DCB_CAP_DCBX_VER_IEEE -#define DCB_CAP_DCBX_VER_IEEE 0x08 -#endif -#ifndef DCB_CAP_DCBX_STATIC -#define DCB_CAP_DCBX_STATIC 0x10 -#endif -#endif /* CONFIG_DCB */ -#else /* < 2.6.38 */ -#endif /* < 2.6.38 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) -#ifndef skb_queue_reverse_walk_safe -#define skb_queue_reverse_walk_safe(queue, skb, tmp) \ - for (skb = (queue)->prev, tmp = skb->prev; \ - skb != (struct sk_buff *)(queue); \ - skb = tmp, tmp = skb->prev) -#endif -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) -extern u8 _kc_netdev_get_num_tc(struct net_device *dev); -#define netdev_get_num_tc(dev) _kc_netdev_get_num_tc(dev) -extern u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up); -#define netdev_get_prio_tc_map(dev, up) _kc_netdev_get_prio_tc_map(dev, up) -#define netdev_set_prio_tc_map(dev, up, tc) do {} while (0) -#else /* RHEL6.1 or greater */ -#ifndef HAVE_MQPRIO -#define HAVE_MQPRIO -#endif /* HAVE_MQPRIO */ -#ifdef CONFIG_DCB -#ifndef HAVE_DCBNL_IEEE -#define HAVE_DCBNL_IEEE -#ifndef IEEE_8021QAZ_TSA_STRICT -#define IEEE_8021QAZ_TSA_STRICT 0 -#endif -#ifndef IEEE_8021QAZ_TSA_ETS -#define IEEE_8021QAZ_TSA_ETS 2 -#endif -#ifndef IEEE_8021QAZ_APP_SEL_ETHERTYPE -#define IEEE_8021QAZ_APP_SEL_ETHERTYPE 1 -#endif -#endif -#endif /* CONFIG_DCB */ -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ -#else /* < 2.6.39 */ -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET -#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET -#endif -#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ -#ifndef HAVE_MQPRIO -#define HAVE_MQPRIO -#endif -#ifndef HAVE_SETUP_TC -#define HAVE_SETUP_TC -#endif -#ifdef CONFIG_DCB -#ifndef HAVE_DCBNL_IEEE -#define HAVE_DCBNL_IEEE -#endif -#endif /* CONFIG_DCB */ -#ifndef HAVE_NDO_SET_FEATURES -#define HAVE_NDO_SET_FEATURES -#endif -#endif /* < 2.6.39 */ - -/*****************************************************************************/ -/* use < 2.6.40 because of a Fedora 15 kernel update where they - * updated the kernel version to 2.6.40.x and they back-ported 3.0 features - * like set_phys_id for ethtool. - */ -#undef ETHTOOL_GRXRINGS -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) ) -#ifdef ETHTOOL_GRXRINGS -#ifndef FLOW_EXT -#define FLOW_EXT 0x80000000 -union _kc_ethtool_flow_union { - struct ethtool_tcpip4_spec tcp_ip4_spec; - struct ethtool_usrip4_spec usr_ip4_spec; - __u8 hdata[60]; -}; -struct _kc_ethtool_flow_ext { - __be16 vlan_etype; - __be16 vlan_tci; - __be32 data[2]; -}; -struct _kc_ethtool_rx_flow_spec { - __u32 flow_type; - union _kc_ethtool_flow_union h_u; - struct _kc_ethtool_flow_ext h_ext; - union _kc_ethtool_flow_union m_u; - struct _kc_ethtool_flow_ext m_ext; - __u64 ring_cookie; - __u32 location; -}; -#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec -#endif /* FLOW_EXT */ -#endif - -#define pci_disable_link_state_locked pci_disable_link_state - -#ifndef PCI_LTR_VALUE_MASK -#define PCI_LTR_VALUE_MASK 0x000003ff -#endif -#ifndef PCI_LTR_SCALE_MASK -#define PCI_LTR_SCALE_MASK 0x00001c00 -#endif -#ifndef PCI_LTR_SCALE_SHIFT -#define PCI_LTR_SCALE_SHIFT 10 -#endif - -#else /* < 2.6.40 */ -#define HAVE_ETHTOOL_SET_PHYS_ID -#endif /* < 2.6.40 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) -#ifndef __netdev_alloc_skb_ip_align -#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l) -#endif /* __netdev_alloc_skb_ip_align */ -#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app) -#define dcb_ieee_delapp(dev, app) 0 -#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority) -#else /* < 3.1.0 */ -#ifndef HAVE_DCBNL_IEEE_DELAPP -#define HAVE_DCBNL_IEEE_DELAPP -#endif -#endif /* < 3.1.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) -#ifdef ETHTOOL_GRXRINGS -#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS -#endif /* ETHTOOL_GRXRINGS */ - -#ifndef skb_frag_size -#define skb_frag_size(frag) _kc_skb_frag_size(frag) -static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag) -{ - return frag->size; -} -#endif /* skb_frag_size */ - -#ifndef skb_frag_size_sub -#define skb_frag_size_sub(frag, delta) _kc_skb_frag_size_sub(frag, delta) -static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta) -{ - frag->size -= delta; -} -#endif /* skb_frag_size_sub */ - -#ifndef skb_frag_page -#define skb_frag_page(frag) _kc_skb_frag_page(frag) -static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag) -{ - return frag->page; -} -#endif /* skb_frag_page */ - -#ifndef skb_frag_address -#define skb_frag_address(frag) _kc_skb_frag_address(frag) -static inline void *_kc_skb_frag_address(const skb_frag_t *frag) -{ - return page_address(skb_frag_page(frag)) + frag->page_offset; -} -#endif /* skb_frag_address */ - -#ifndef skb_frag_dma_map -#define skb_frag_dma_map(dev,frag,offset,size,dir) \ - _kc_skb_frag_dma_map(dev,frag,offset,size,dir) -static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev, - const skb_frag_t *frag, - size_t offset, size_t size, - enum dma_data_direction dir) -{ - return dma_map_page(dev, skb_frag_page(frag), - frag->page_offset + offset, size, dir); -} -#endif /* skb_frag_dma_map */ - -#ifndef __skb_frag_unref -#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag) -static inline void __kc_skb_frag_unref(skb_frag_t *frag) -{ - put_page(skb_frag_page(frag)); -} -#endif /* __skb_frag_unref */ -#else /* < 3.2.0 */ -#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED -#define HAVE_PCI_DEV_FLAGS_ASSIGNED -#define HAVE_VF_SPOOFCHK_CONFIGURE -#endif -#endif /* < 3.2.0 */ - -#if (RHEL_RELEASE_CODE && \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)) && \ - (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))) -#undef ixgbe_get_netdev_tc_txq -#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc]) -#endif - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) ) -typedef u32 kni_netdev_features_t; -#else /* ! < 3.3.0 */ -typedef netdev_features_t kni_netdev_features_t; -#define HAVE_INT_NDO_VLAN_RX_ADD_VID -#ifdef ETHTOOL_SRXNTUPLE -#undef ETHTOOL_SRXNTUPLE -#endif -#endif /* < 3.3.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) -#ifndef NETIF_F_RXFCS -#define NETIF_F_RXFCS 0 -#endif /* NETIF_F_RXFCS */ -#ifndef NETIF_F_RXALL -#define NETIF_F_RXALL 0 -#endif /* NETIF_F_RXALL */ - -#define NUMTCS_RETURNS_U8 - - -#endif /* < 3.4.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) ) -static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2) -{ - return !compare_ether_addr(addr1, addr2); -} -#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2)) -#else -#define HAVE_FDB_OPS -#endif /* < 3.5.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) ) -#define NETIF_F_HW_VLAN_TX NETIF_F_HW_VLAN_CTAG_TX -#define NETIF_F_HW_VLAN_RX NETIF_F_HW_VLAN_CTAG_RX -#define NETIF_F_HW_VLAN_FILTER NETIF_F_HW_VLAN_CTAG_FILTER -#endif /* >= 3.10.0 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) -#ifdef CONFIG_PCI_IOV -extern int __kc_pci_vfs_assigned(struct pci_dev *dev); -#else -static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) -{ - return 0; -} -#endif -#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev) - -#endif - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) ) -#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) -#endif /* >= 3.16.0 */ - -/* - * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4) - * For older kernels backported this commit, need to use renamed functions. - * This fix is specific to RedHat/CentOS kernels. - */ -#if (defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8) && \ - LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)) -#define vlan_tx_tag_get skb_vlan_tag_get -#define vlan_tx_tag_present skb_vlan_tag_present -#endif - -#endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h deleted file mode 100644 index c9393d89..00000000 --- a/lib/librte_eal/linuxapp/kni/kni_dev.h +++ /dev/null @@ -1,106 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright(c) 2010-2014 Intel Corporation. - */ - -#ifndef _KNI_DEV_H_ -#define _KNI_DEV_H_ - -#ifdef pr_fmt -#undef pr_fmt -#endif -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include "compat.h" - -#include -#include -#ifdef HAVE_SIGNAL_FUNCTIONS_OWN_HEADER -#include -#else -#include -#endif -#include -#include -#include - -#include -#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ - -#define MBUF_BURST_SZ 32 - -/** - * A structure describing the private information for a kni device. - */ -struct kni_dev { - /* kni list */ - struct list_head list; - - struct net_device_stats stats; - int status; - uint16_t group_id; /* Group ID of a group of KNI devices */ - uint32_t core_id; /* Core ID to bind */ - char name[RTE_KNI_NAMESIZE]; /* Network device name */ - struct task_struct *pthread; - - /* wait queue for req/resp */ - wait_queue_head_t wq; - struct mutex sync_lock; - - /* PCI device id */ - uint16_t device_id; - - /* kni device */ - struct net_device *net_dev; - struct net_device *lad_dev; - struct pci_dev *pci_dev; - - /* queue for packets to be sent out */ - void *tx_q; - - /* queue for the packets received */ - void *rx_q; - - /* queue for the allocated mbufs those can be used to save sk buffs */ - void *alloc_q; - - /* free queue for the mbufs to be freed */ - void *free_q; - - /* request queue */ - void *req_q; - - /* response queue */ - void *resp_q; - - void *sync_kva; - void *sync_va; - - void *mbuf_kva; - void *mbuf_va; - - /* mbuf size */ - uint32_t mbuf_size; - - /* synchro for request processing */ - unsigned long synchro; - - /* buffers */ - void *pa[MBUF_BURST_SZ]; - void *va[MBUF_BURST_SZ]; - void *alloc_pa[MBUF_BURST_SZ]; - void *alloc_va[MBUF_BURST_SZ]; -}; - -void kni_net_rx(struct kni_dev *kni); -void kni_net_init(struct net_device *dev); -void kni_net_config_lo_mode(char *lo_str); -void kni_net_poll_resp(struct kni_dev *kni); -void kni_set_ethtool_ops(struct net_device *netdev); - -int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); -void ixgbe_kni_remove(struct pci_dev *pdev); -int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); -void igb_kni_remove(struct pci_dev *pdev); - -#endif diff --git a/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/lib/librte_eal/linuxapp/kni/kni_ethtool.c deleted file mode 100644 index a44e7d94..00000000 --- a/lib/librte_eal/linuxapp/kni/kni_ethtool.c +++ /dev/null @@ -1,219 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright(c) 2010-2014 Intel Corporation. - */ - -#include -#include -#include -#include "kni_dev.h" - -static int -kni_check_if_running(struct net_device *dev) -{ - struct kni_dev *priv = netdev_priv(dev); - - if (priv->lad_dev) - return 0; - else - return -EOPNOTSUPP; -} - -static void -kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) -{ - struct kni_dev *priv = netdev_priv(dev); - - priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info); -} - -static int -kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd); -} - -static int -kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd); -} - -static void -kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) -{ - struct kni_dev *priv = netdev_priv(dev); - - priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol); -} - -static int -kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol); -} - -static int -kni_nway_reset(struct net_device *dev) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev); -} - -static int -kni_get_eeprom_len(struct net_device *dev) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev); -} - -static int -kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, - u8 *bytes) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom, - bytes); -} - -static int -kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, - u8 *bytes) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom, - bytes); -} - -static void -kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) -{ - struct kni_dev *priv = netdev_priv(dev); - - priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring); -} - -static int -kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring); -} - -static void -kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) -{ - struct kni_dev *priv = netdev_priv(dev); - - priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause); -} - -static int -kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev, - pause); -} - -static u32 -kni_get_msglevel(struct net_device *dev) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev); -} - -static void -kni_set_msglevel(struct net_device *dev, u32 data) -{ - struct kni_dev *priv = netdev_priv(dev); - - priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data); -} - -static int -kni_get_regs_len(struct net_device *dev) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev); -} - -static void -kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p) -{ - struct kni_dev *priv = netdev_priv(dev); - - priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p); -} - -static void -kni_get_strings(struct net_device *dev, u32 stringset, u8 *data) -{ - struct kni_dev *priv = netdev_priv(dev); - - priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset, - data); -} - -static int -kni_get_sset_count(struct net_device *dev, int sset) -{ - struct kni_dev *priv = netdev_priv(dev); - - return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset); -} - -static void -kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, - u64 *data) -{ - struct kni_dev *priv = netdev_priv(dev); - - priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats, - data); -} - -struct ethtool_ops kni_ethtool_ops = { - .begin = kni_check_if_running, - .get_drvinfo = kni_get_drvinfo, - .get_settings = kni_get_settings, - .set_settings = kni_set_settings, - .get_regs_len = kni_get_regs_len, - .get_regs = kni_get_regs, - .get_wol = kni_get_wol, - .set_wol = kni_set_wol, - .nway_reset = kni_nway_reset, - .get_link = ethtool_op_get_link, - .get_eeprom_len = kni_get_eeprom_len, - .get_eeprom = kni_get_eeprom, - .set_eeprom = kni_set_eeprom, - .get_ringparam = kni_get_ringparam, - .set_ringparam = kni_set_ringparam, - .get_pauseparam = kni_get_pauseparam, - .set_pauseparam = kni_set_pauseparam, - .get_msglevel = kni_get_msglevel, - .set_msglevel = kni_set_msglevel, - .get_strings = kni_get_strings, - .get_sset_count = kni_get_sset_count, - .get_ethtool_stats = kni_get_ethtool_stats, -}; - -void -kni_set_ethtool_ops(struct net_device *netdev) -{ - netdev->ethtool_ops = &kni_ethtool_ops; -} diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h deleted file mode 100644 index 9a4762de..00000000 --- a/lib/librte_eal/linuxapp/kni/kni_fifo.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright(c) 2010-2014 Intel Corporation. - */ - -#ifndef _KNI_FIFO_H_ -#define _KNI_FIFO_H_ - -#include - -/** - * Adds num elements into the fifo. Return the number actually written - */ -static inline uint32_t -kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num) -{ - uint32_t i = 0; - uint32_t fifo_write = fifo->write; - uint32_t fifo_read = fifo->read; - uint32_t new_write = fifo_write; - - for (i = 0; i < num; i++) { - new_write = (new_write + 1) & (fifo->len - 1); - - if (new_write == fifo_read) - break; - fifo->buffer[fifo_write] = data[i]; - fifo_write = new_write; - } - fifo->write = fifo_write; - - return i; -} - -/** - * Get up to num elements from the fifo. Return the number actully read - */ -static inline uint32_t -kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num) -{ - uint32_t i = 0; - uint32_t new_read = fifo->read; - uint32_t fifo_write = fifo->write; - - for (i = 0; i < num; i++) { - if (new_read == fifo_write) - break; - - data[i] = fifo->buffer[new_read]; - new_read = (new_read + 1) & (fifo->len - 1); - } - fifo->read = new_read; - - return i; -} - -/** - * Get the num of elements in the fifo - */ -static inline uint32_t -kni_fifo_count(struct rte_kni_fifo *fifo) -{ - return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1); -} - -/** - * Get the num of available elements in the fifo - */ -static inline uint32_t -kni_fifo_free_count(struct rte_kni_fifo *fifo) -{ - return (fifo->read - fifo->write - 1) & (fifo->len - 1); -} - -#endif /* _KNI_FIFO_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c deleted file mode 100644 index 01574ecf..00000000 --- a/lib/librte_eal/linuxapp/kni/kni_misc.c +++ /dev/null @@ -1,663 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright(c) 2010-2014 Intel Corporation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "compat.h" -#include "kni_dev.h" - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("Kernel Module for managing kni devices"); - -#define KNI_RX_LOOP_NUM 1000 - -#define KNI_MAX_DEVICES 32 - -extern const struct pci_device_id ixgbe_pci_tbl[]; -extern const struct pci_device_id igb_pci_tbl[]; - -/* loopback mode */ -static char *lo_mode; - -/* Kernel thread mode */ -static char *kthread_mode; -static uint32_t multiple_kthread_on; - -#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ - -static int kni_net_id; - -struct kni_net { - unsigned long device_in_use; /* device in use flag */ - struct mutex kni_kthread_lock; - struct task_struct *kni_kthread; - struct rw_semaphore kni_list_lock; - struct list_head kni_list_head; -}; - -static int __net_init -kni_init_net(struct net *net) -{ -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - struct kni_net *knet = net_generic(net, kni_net_id); - - memset(knet, 0, sizeof(*knet)); -#else - struct kni_net *knet; - int ret; - - knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL); - if (!knet) { - ret = -ENOMEM; - return ret; - } -#endif - - /* Clear the bit of device in use */ - clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); - - mutex_init(&knet->kni_kthread_lock); - - init_rwsem(&knet->kni_list_lock); - INIT_LIST_HEAD(&knet->kni_list_head); - -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - return 0; -#else - ret = net_assign_generic(net, kni_net_id, knet); - if (ret < 0) - kfree(knet); - - return ret; -#endif -} - -static void __net_exit -kni_exit_net(struct net *net) -{ - struct kni_net *knet __maybe_unused; - - knet = net_generic(net, kni_net_id); - mutex_destroy(&knet->kni_kthread_lock); - -#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS - kfree(knet); -#endif -} - -static struct pernet_operations kni_net_ops = { - .init = kni_init_net, - .exit = kni_exit_net, -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - .id = &kni_net_id, - .size = sizeof(struct kni_net), -#endif -}; - -static int -kni_thread_single(void *data) -{ - struct kni_net *knet = data; - int j; - struct kni_dev *dev; - - while (!kthread_should_stop()) { - down_read(&knet->kni_list_lock); - for (j = 0; j < KNI_RX_LOOP_NUM; j++) { - list_for_each_entry(dev, &knet->kni_list_head, list) { - kni_net_rx(dev); - kni_net_poll_resp(dev); - } - } - up_read(&knet->kni_list_lock); -#ifdef RTE_KNI_PREEMPT_DEFAULT - /* reschedule out for a while */ - schedule_timeout_interruptible( - usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif - } - - return 0; -} - -static int -kni_thread_multiple(void *param) -{ - int j; - struct kni_dev *dev = param; - - while (!kthread_should_stop()) { - for (j = 0; j < KNI_RX_LOOP_NUM; j++) { - kni_net_rx(dev); - kni_net_poll_resp(dev); - } -#ifdef RTE_KNI_PREEMPT_DEFAULT - schedule_timeout_interruptible( - usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif - } - - return 0; -} - -static int -kni_open(struct inode *inode, struct file *file) -{ - struct net *net = current->nsproxy->net_ns; - struct kni_net *knet = net_generic(net, kni_net_id); - - /* kni device can be opened by one user only per netns */ - if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) - return -EBUSY; - - file->private_data = get_net(net); - pr_debug("/dev/kni opened\n"); - - return 0; -} - -static int -kni_dev_remove(struct kni_dev *dev) -{ - if (!dev) - return -ENODEV; - -#ifdef RTE_KNI_KMOD_ETHTOOL - if (dev->pci_dev) { - if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev)) - ixgbe_kni_remove(dev->pci_dev); - else if (pci_match_id(igb_pci_tbl, dev->pci_dev)) - igb_kni_remove(dev->pci_dev); - } -#endif - - if (dev->net_dev) { - unregister_netdev(dev->net_dev); - free_netdev(dev->net_dev); - } - - return 0; -} - -static int -kni_release(struct inode *inode, struct file *file) -{ - struct net *net = file->private_data; - struct kni_net *knet = net_generic(net, kni_net_id); - struct kni_dev *dev, *n; - - /* Stop kernel thread for single mode */ - if (multiple_kthread_on == 0) { - mutex_lock(&knet->kni_kthread_lock); - /* Stop kernel thread */ - if (knet->kni_kthread != NULL) { - kthread_stop(knet->kni_kthread); - knet->kni_kthread = NULL; - } - mutex_unlock(&knet->kni_kthread_lock); - } - - down_write(&knet->kni_list_lock); - list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { - /* Stop kernel thread for multiple mode */ - if (multiple_kthread_on && dev->pthread != NULL) { - kthread_stop(dev->pthread); - dev->pthread = NULL; - } - - kni_dev_remove(dev); - list_del(&dev->list); - } - up_write(&knet->kni_list_lock); - - /* Clear the bit of device in use */ - clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); - - put_net(net); - pr_debug("/dev/kni closed\n"); - - return 0; -} - -static int -kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) -{ - if (!kni || !dev) - return -1; - - /* Check if network name has been used */ - if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { - pr_err("KNI name %s duplicated\n", dev->name); - return -1; - } - - return 0; -} - -static int -kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind) -{ - /** - * Create a new kernel thread for multiple mode, set its core affinity, - * and finally wake it up. - */ - if (multiple_kthread_on) { - kni->pthread = kthread_create(kni_thread_multiple, - (void *)kni, "kni_%s", kni->name); - if (IS_ERR(kni->pthread)) { - kni_dev_remove(kni); - return -ECANCELED; - } - - if (force_bind) - kthread_bind(kni->pthread, kni->core_id); - wake_up_process(kni->pthread); - } else { - mutex_lock(&knet->kni_kthread_lock); - - if (knet->kni_kthread == NULL) { - knet->kni_kthread = kthread_create(kni_thread_single, - (void *)knet, "kni_single"); - if (IS_ERR(knet->kni_kthread)) { - mutex_unlock(&knet->kni_kthread_lock); - kni_dev_remove(kni); - return -ECANCELED; - } - - if (force_bind) - kthread_bind(knet->kni_kthread, kni->core_id); - wake_up_process(knet->kni_kthread); - } - - mutex_unlock(&knet->kni_kthread_lock); - } - - return 0; -} - -static int -kni_ioctl_create(struct net *net, uint32_t ioctl_num, - unsigned long ioctl_param) -{ - struct kni_net *knet = net_generic(net, kni_net_id); - int ret; - struct rte_kni_device_info dev_info; - struct net_device *net_dev = NULL; - struct kni_dev *kni, *dev, *n; -#ifdef RTE_KNI_KMOD_ETHTOOL - struct pci_dev *found_pci = NULL; - struct net_device *lad_dev = NULL; - struct pci_dev *pci = NULL; -#endif - - pr_info("Creating kni...\n"); - /* Check the buffer size, to avoid warning */ - if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) - return -EINVAL; - - /* Copy kni info from user space */ - ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); - if (ret) { - pr_err("copy_from_user in kni_ioctl_create"); - return -EIO; - } - - /* Check if name is zero-ended */ - if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) { - pr_err("kni.name not zero-terminated"); - return -EINVAL; - } - - /** - * Check if the cpu core id is valid for binding. - */ - if (dev_info.force_bind && !cpu_online(dev_info.core_id)) { - pr_err("cpu %u is not online\n", dev_info.core_id); - return -EINVAL; - } - - /* Check if it has been created */ - down_read(&knet->kni_list_lock); - list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { - if (kni_check_param(dev, &dev_info) < 0) { - up_read(&knet->kni_list_lock); - return -EINVAL; - } - } - up_read(&knet->kni_list_lock); - - net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, -#ifdef NET_NAME_USER - NET_NAME_USER, -#endif - kni_net_init); - if (net_dev == NULL) { - pr_err("error allocating device \"%s\"\n", dev_info.name); - return -EBUSY; - } - - dev_net_set(net_dev, net); - - kni = netdev_priv(net_dev); - - kni->net_dev = net_dev; - kni->group_id = dev_info.group_id; - kni->core_id = dev_info.core_id; - strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE); - - /* Translate user space info into kernel space info */ - kni->tx_q = phys_to_virt(dev_info.tx_phys); - kni->rx_q = phys_to_virt(dev_info.rx_phys); - kni->alloc_q = phys_to_virt(dev_info.alloc_phys); - kni->free_q = phys_to_virt(dev_info.free_phys); - - kni->req_q = phys_to_virt(dev_info.req_phys); - kni->resp_q = phys_to_virt(dev_info.resp_phys); - kni->sync_va = dev_info.sync_va; - kni->sync_kva = phys_to_virt(dev_info.sync_phys); - - kni->mbuf_size = dev_info.mbuf_size; - - pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", - (unsigned long long) dev_info.tx_phys, kni->tx_q); - pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", - (unsigned long long) dev_info.rx_phys, kni->rx_q); - pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", - (unsigned long long) dev_info.alloc_phys, kni->alloc_q); - pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n", - (unsigned long long) dev_info.free_phys, kni->free_q); - pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n", - (unsigned long long) dev_info.req_phys, kni->req_q); - pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", - (unsigned long long) dev_info.resp_phys, kni->resp_q); - pr_debug("mbuf_size: %u\n", kni->mbuf_size); - - pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n", - dev_info.bus, - dev_info.devid, - dev_info.function, - dev_info.vendor_id, - dev_info.device_id); -#ifdef RTE_KNI_KMOD_ETHTOOL - pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); - - /* Support Ethtool */ - while (pci) { - pr_debug("pci_bus: %02x:%02x:%02x\n", - pci->bus->number, - PCI_SLOT(pci->devfn), - PCI_FUNC(pci->devfn)); - - if ((pci->bus->number == dev_info.bus) && - (PCI_SLOT(pci->devfn) == dev_info.devid) && - (PCI_FUNC(pci->devfn) == dev_info.function)) { - found_pci = pci; - - if (pci_match_id(ixgbe_pci_tbl, found_pci)) - ret = ixgbe_kni_probe(found_pci, &lad_dev); - else if (pci_match_id(igb_pci_tbl, found_pci)) - ret = igb_kni_probe(found_pci, &lad_dev); - else - ret = -1; - - pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n", - pci, lad_dev); - if (ret == 0) { - kni->lad_dev = lad_dev; - kni_set_ethtool_ops(kni->net_dev); - } else { - pr_err("Device not supported by ethtool"); - kni->lad_dev = NULL; - } - - kni->pci_dev = found_pci; - kni->device_id = dev_info.device_id; - break; - } - pci = pci_get_device(dev_info.vendor_id, - dev_info.device_id, pci); - } - if (pci) - pci_dev_put(pci); -#endif - - if (kni->lad_dev) - ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr); - else { - /* if user has provided a valid mac address */ - if (is_valid_ether_addr((unsigned char *)(dev_info.mac_addr))) - memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN); - else - /* - * Generate random mac address. eth_random_addr() is the - * newer version of generating mac address in kernel. - */ - random_ether_addr(net_dev->dev_addr); - } - - if (dev_info.mtu) - net_dev->mtu = dev_info.mtu; - - ret = register_netdev(net_dev); - if (ret) { - pr_err("error %i registering device \"%s\"\n", - ret, dev_info.name); - kni->net_dev = NULL; - kni_dev_remove(kni); - free_netdev(net_dev); - return -ENODEV; - } - - ret = kni_run_thread(knet, kni, dev_info.force_bind); - if (ret != 0) - return ret; - - down_write(&knet->kni_list_lock); - list_add(&kni->list, &knet->kni_list_head); - up_write(&knet->kni_list_lock); - - return 0; -} - -static int -kni_ioctl_release(struct net *net, uint32_t ioctl_num, - unsigned long ioctl_param) -{ - struct kni_net *knet = net_generic(net, kni_net_id); - int ret = -EINVAL; - struct kni_dev *dev, *n; - struct rte_kni_device_info dev_info; - - if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) - return -EINVAL; - - ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); - if (ret) { - pr_err("copy_from_user in kni_ioctl_release"); - return -EIO; - } - - /* Release the network device according to its name */ - if (strlen(dev_info.name) == 0) - return ret; - - down_write(&knet->kni_list_lock); - list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { - if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0) - continue; - - if (multiple_kthread_on && dev->pthread != NULL) { - kthread_stop(dev->pthread); - dev->pthread = NULL; - } - - kni_dev_remove(dev); - list_del(&dev->list); - ret = 0; - break; - } - up_write(&knet->kni_list_lock); - pr_info("%s release kni named %s\n", - (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name); - - return ret; -} - -static int -kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) -{ - int ret = -EINVAL; - struct net *net = current->nsproxy->net_ns; - - pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); - - /* - * Switch according to the ioctl called - */ - switch (_IOC_NR(ioctl_num)) { - case _IOC_NR(RTE_KNI_IOCTL_TEST): - /* For test only, not used */ - break; - case _IOC_NR(RTE_KNI_IOCTL_CREATE): - ret = kni_ioctl_create(net, ioctl_num, ioctl_param); - break; - case _IOC_NR(RTE_KNI_IOCTL_RELEASE): - ret = kni_ioctl_release(net, ioctl_num, ioctl_param); - break; - default: - pr_debug("IOCTL default\n"); - break; - } - - return ret; -} - -static int -kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num, - unsigned long ioctl_param) -{ - /* 32 bits app on 64 bits OS to be supported later */ - pr_debug("Not implemented.\n"); - - return -EINVAL; -} - -static const struct file_operations kni_fops = { - .owner = THIS_MODULE, - .open = kni_open, - .release = kni_release, - .unlocked_ioctl = (void *)kni_ioctl, - .compat_ioctl = (void *)kni_compat_ioctl, -}; - -static struct miscdevice kni_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = KNI_DEVICE, - .fops = &kni_fops, -}; - -static int __init -kni_parse_kthread_mode(void) -{ - if (!kthread_mode) - return 0; - - if (strcmp(kthread_mode, "single") == 0) - return 0; - else if (strcmp(kthread_mode, "multiple") == 0) - multiple_kthread_on = 1; - else - return -1; - - return 0; -} - -static int __init -kni_init(void) -{ - int rc; - - if (kni_parse_kthread_mode() < 0) { - pr_err("Invalid parameter for kthread_mode\n"); - return -EINVAL; - } - - if (multiple_kthread_on == 0) - pr_debug("Single kernel thread for all KNI devices\n"); - else - pr_debug("Multiple kernel thread mode enabled\n"); - -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - rc = register_pernet_subsys(&kni_net_ops); -#else - rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif - if (rc) - return -EPERM; - - rc = misc_register(&kni_misc); - if (rc != 0) { - pr_err("Misc registration failed\n"); - goto out; - } - - /* Configure the lo mode according to the input parameter */ - kni_net_config_lo_mode(lo_mode); - - return 0; - -out: -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - unregister_pernet_subsys(&kni_net_ops); -#else - unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); -#endif - return rc; -} - -static void __exit -kni_exit(void) -{ - misc_deregister(&kni_misc); -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - unregister_pernet_subsys(&kni_net_ops); -#else - unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); -#endif -} - -module_init(kni_init); -module_exit(kni_exit); - -module_param(lo_mode, charp, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(lo_mode, -"KNI loopback mode (default=lo_mode_none):\n" -" lo_mode_none Kernel loopback disabled\n" -" lo_mode_fifo Enable kernel loopback with fifo\n" -" lo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n" -"\n" -); - -module_param(kthread_mode, charp, S_IRUGO); -MODULE_PARM_DESC(kthread_mode, -"Kernel thread mode (default=single):\n" -" single Single kernel thread mode enabled.\n" -" multiple Multiple kernel thread mode enabled.\n" -"\n" -); diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c deleted file mode 100644 index 9f9b798c..00000000 --- a/lib/librte_eal/linuxapp/kni/kni_net.c +++ /dev/null @@ -1,757 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright(c) 2010-2014 Intel Corporation. - */ - -/* - * This code is inspired from the book "Linux Device Drivers" by - * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates - */ - -#include -#include -#include -#include -#include /* eth_type_trans */ -#include -#include -#include - -#include -#include - -#include "compat.h" -#include "kni_dev.h" - -#define WD_TIMEOUT 5 /*jiffies */ - -#define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */ - -/* typedef for rx function */ -typedef void (*kni_net_rx_t)(struct kni_dev *kni); - -static void kni_net_rx_normal(struct kni_dev *kni); - -/* kni rx function pointer, with default to normal rx */ -static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal; - -/* physical address to kernel virtual address */ -static void * -pa2kva(void *pa) -{ - return phys_to_virt((unsigned long)pa); -} - -/* physical address to virtual address */ -static void * -pa2va(void *pa, struct rte_kni_mbuf *m) -{ - void *va; - - va = (void *)((unsigned long)pa + - (unsigned long)m->buf_addr - - (unsigned long)m->buf_physaddr); - return va; -} - -/* mbuf data kernel virtual address from mbuf kernel virtual address */ -static void * -kva2data_kva(struct rte_kni_mbuf *m) -{ - return phys_to_virt(m->buf_physaddr + m->data_off); -} - -/* virtual address to physical address */ -static void * -va2pa(void *va, struct rte_kni_mbuf *m) -{ - void *pa; - - pa = (void *)((unsigned long)va - - ((unsigned long)m->buf_addr - - (unsigned long)m->buf_physaddr)); - return pa; -} - -/* - * It can be called to process the request. - */ -static int -kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) -{ - int ret = -1; - void *resp_va; - uint32_t num; - int ret_val; - - if (!kni || !req) { - pr_err("No kni instance or request\n"); - return -EINVAL; - } - - mutex_lock(&kni->sync_lock); - - /* Construct data */ - memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request)); - num = kni_fifo_put(kni->req_q, &kni->sync_va, 1); - if (num < 1) { - pr_err("Cannot send to req_q\n"); - ret = -EBUSY; - goto fail; - } - - ret_val = wait_event_interruptible_timeout(kni->wq, - kni_fifo_count(kni->resp_q), 3 * HZ); - if (signal_pending(current) || ret_val <= 0) { - ret = -ETIME; - goto fail; - } - num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1); - if (num != 1 || resp_va != kni->sync_va) { - /* This should never happen */ - pr_err("No data in resp_q\n"); - ret = -ENODATA; - goto fail; - } - - memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); - ret = 0; - -fail: - mutex_unlock(&kni->sync_lock); - return ret; -} - -/* - * Open and close - */ -static int -kni_net_open(struct net_device *dev) -{ - int ret; - struct rte_kni_request req; - struct kni_dev *kni = netdev_priv(dev); - - netif_start_queue(dev); - - memset(&req, 0, sizeof(req)); - req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF; - - /* Setting if_up to non-zero means up */ - req.if_up = 1; - ret = kni_net_process_request(kni, &req); - - return (ret == 0) ? req.result : ret; -} - -static int -kni_net_release(struct net_device *dev) -{ - int ret; - struct rte_kni_request req; - struct kni_dev *kni = netdev_priv(dev); - - netif_stop_queue(dev); /* can't transmit any more */ - - memset(&req, 0, sizeof(req)); - req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF; - - /* Setting if_up to 0 means down */ - req.if_up = 0; - ret = kni_net_process_request(kni, &req); - - return (ret == 0) ? req.result : ret; -} - -/* - * Configuration changes (passed on by ifconfig) - */ -static int -kni_net_config(struct net_device *dev, struct ifmap *map) -{ - if (dev->flags & IFF_UP) /* can't act on a running interface */ - return -EBUSY; - - /* ignore other fields */ - return 0; -} - -/* - * Transmit a packet (called by the kernel) - */ -static int -kni_net_tx(struct sk_buff *skb, struct net_device *dev) -{ - int len = 0; - uint32_t ret; - struct kni_dev *kni = netdev_priv(dev); - struct rte_kni_mbuf *pkt_kva = NULL; - void *pkt_pa = NULL; - void *pkt_va = NULL; - - /* save the timestamp */ -#ifdef HAVE_TRANS_START_HELPER - netif_trans_update(dev); -#else - dev->trans_start = jiffies; -#endif - - /* Check if the length of skb is less than mbuf size */ - if (skb->len > kni->mbuf_size) - goto drop; - - /** - * Check if it has at least one free entry in tx_q and - * one entry in alloc_q. - */ - if (kni_fifo_free_count(kni->tx_q) == 0 || - kni_fifo_count(kni->alloc_q) == 0) { - /** - * If no free entry in tx_q or no entry in alloc_q, - * drops skb and goes out. - */ - goto drop; - } - - /* dequeue a mbuf from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1); - if (likely(ret == 1)) { - void *data_kva; - - pkt_kva = pa2kva(pkt_pa); - data_kva = kva2data_kva(pkt_kva); - pkt_va = pa2va(pkt_pa, pkt_kva); - - len = skb->len; - memcpy(data_kva, skb->data, len); - if (unlikely(len < ETH_ZLEN)) { - memset(data_kva + len, 0, ETH_ZLEN - len); - len = ETH_ZLEN; - } - pkt_kva->pkt_len = len; - pkt_kva->data_len = len; - - /* enqueue mbuf into tx_q */ - ret = kni_fifo_put(kni->tx_q, &pkt_va, 1); - if (unlikely(ret != 1)) { - /* Failing should not happen */ - pr_err("Fail to enqueue mbuf into tx_q\n"); - goto drop; - } - } else { - /* Failing should not happen */ - pr_err("Fail to dequeue mbuf from alloc_q\n"); - goto drop; - } - - /* Free skb and update statistics */ - dev_kfree_skb(skb); - kni->stats.tx_bytes += len; - kni->stats.tx_packets++; - - return NETDEV_TX_OK; - -drop: - /* Free skb and update statistics */ - dev_kfree_skb(skb); - kni->stats.tx_dropped++; - - return NETDEV_TX_OK; -} - -/* - * RX: normal working mode - */ -static void -kni_net_rx_normal(struct kni_dev *kni) -{ - uint32_t ret; - uint32_t len; - uint32_t i, num_rx, num_fq; - struct rte_kni_mbuf *kva; - void *data_kva; - struct sk_buff *skb; - struct net_device *dev = kni->net_dev; - - /* Get the number of free entries in free_q */ - num_fq = kni_fifo_free_count(kni->free_q); - if (num_fq == 0) { - /* No room on the free_q, bail out */ - return; - } - - /* Calculate the number of entries to dequeue from rx_q */ - num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ); - - /* Burst dequeue from rx_q */ - num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx); - if (num_rx == 0) - return; - - /* Transfer received packets to netif */ - for (i = 0; i < num_rx; i++) { - kva = pa2kva(kni->pa[i]); - len = kva->pkt_len; - data_kva = kva2data_kva(kva); - kni->va[i] = pa2va(kni->pa[i], kva); - - skb = dev_alloc_skb(len + 2); - if (!skb) { - /* Update statistics */ - kni->stats.rx_dropped++; - continue; - } - - /* Align IP on 16B boundary */ - skb_reserve(skb, 2); - - if (kva->nb_segs == 1) { - memcpy(skb_put(skb, len), data_kva, len); - } else { - int nb_segs; - int kva_nb_segs = kva->nb_segs; - - for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) { - memcpy(skb_put(skb, kva->data_len), - data_kva, kva->data_len); - - if (!kva->next) - break; - - kva = pa2kva(va2pa(kva->next, kva)); - data_kva = kva2data_kva(kva); - } - } - - skb->dev = dev; - skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* Call netif interface */ - netif_rx_ni(skb); - - /* Update statistics */ - kni->stats.rx_bytes += len; - kni->stats.rx_packets++; - } - - /* Burst enqueue mbufs into free_q */ - ret = kni_fifo_put(kni->free_q, kni->va, num_rx); - if (ret != num_rx) - /* Failing should not happen */ - pr_err("Fail to enqueue entries into free_q\n"); -} - -/* - * RX: loopback with enqueue/dequeue fifos. - */ -static void -kni_net_rx_lo_fifo(struct kni_dev *kni) -{ - uint32_t ret; - uint32_t len; - uint32_t i, num, num_rq, num_tq, num_aq, num_fq; - struct rte_kni_mbuf *kva; - void *data_kva; - struct rte_kni_mbuf *alloc_kva; - void *alloc_data_kva; - - /* Get the number of entries in rx_q */ - num_rq = kni_fifo_count(kni->rx_q); - - /* Get the number of free entrie in tx_q */ - num_tq = kni_fifo_free_count(kni->tx_q); - - /* Get the number of entries in alloc_q */ - num_aq = kni_fifo_count(kni->alloc_q); - - /* Get the number of free entries in free_q */ - num_fq = kni_fifo_free_count(kni->free_q); - - /* Calculate the number of entries to be dequeued from rx_q */ - num = min(num_rq, num_tq); - num = min(num, num_aq); - num = min(num, num_fq); - num = min_t(uint32_t, num, MBUF_BURST_SZ); - - /* Return if no entry to dequeue from rx_q */ - if (num == 0) - return; - - /* Burst dequeue from rx_q */ - ret = kni_fifo_get(kni->rx_q, kni->pa, num); - if (ret == 0) - return; /* Failing should not happen */ - - /* Dequeue entries from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num); - if (ret) { - num = ret; - /* Copy mbufs */ - for (i = 0; i < num; i++) { - kva = pa2kva(kni->pa[i]); - len = kva->pkt_len; - data_kva = kva2data_kva(kva); - kni->va[i] = pa2va(kni->pa[i], kva); - - alloc_kva = pa2kva(kni->alloc_pa[i]); - alloc_data_kva = kva2data_kva(alloc_kva); - kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva); - - memcpy(alloc_data_kva, data_kva, len); - alloc_kva->pkt_len = len; - alloc_kva->data_len = len; - - kni->stats.tx_bytes += len; - kni->stats.rx_bytes += len; - } - - /* Burst enqueue mbufs into tx_q */ - ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num); - if (ret != num) - /* Failing should not happen */ - pr_err("Fail to enqueue mbufs into tx_q\n"); - } - - /* Burst enqueue mbufs into free_q */ - ret = kni_fifo_put(kni->free_q, kni->va, num); - if (ret != num) - /* Failing should not happen */ - pr_err("Fail to enqueue mbufs into free_q\n"); - - /** - * Update statistic, and enqueue/dequeue failure is impossible, - * as all queues are checked at first. - */ - kni->stats.tx_packets += num; - kni->stats.rx_packets += num; -} - -/* - * RX: loopback with enqueue/dequeue fifos and sk buffer copies. - */ -static void -kni_net_rx_lo_fifo_skb(struct kni_dev *kni) -{ - uint32_t ret; - uint32_t len; - uint32_t i, num_rq, num_fq, num; - struct rte_kni_mbuf *kva; - void *data_kva; - struct sk_buff *skb; - struct net_device *dev = kni->net_dev; - - /* Get the number of entries in rx_q */ - num_rq = kni_fifo_count(kni->rx_q); - - /* Get the number of free entries in free_q */ - num_fq = kni_fifo_free_count(kni->free_q); - - /* Calculate the number of entries to dequeue from rx_q */ - num = min(num_rq, num_fq); - num = min_t(uint32_t, num, MBUF_BURST_SZ); - - /* Return if no entry to dequeue from rx_q */ - if (num == 0) - return; - - /* Burst dequeue mbufs from rx_q */ - ret = kni_fifo_get(kni->rx_q, kni->pa, num); - if (ret == 0) - return; - - /* Copy mbufs to sk buffer and then call tx interface */ - for (i = 0; i < num; i++) { - kva = pa2kva(kni->pa[i]); - len = kva->pkt_len; - data_kva = kva2data_kva(kva); - kni->va[i] = pa2va(kni->pa[i], kva); - - skb = dev_alloc_skb(len + 2); - if (skb) { - /* Align IP on 16B boundary */ - skb_reserve(skb, 2); - memcpy(skb_put(skb, len), data_kva, len); - skb->dev = dev; - skb->ip_summed = CHECKSUM_UNNECESSARY; - dev_kfree_skb(skb); - } - - /* Simulate real usage, allocate/copy skb twice */ - skb = dev_alloc_skb(len + 2); - if (skb == NULL) { - kni->stats.rx_dropped++; - continue; - } - - /* Align IP on 16B boundary */ - skb_reserve(skb, 2); - - if (kva->nb_segs == 1) { - memcpy(skb_put(skb, len), data_kva, len); - } else { - int nb_segs; - int kva_nb_segs = kva->nb_segs; - - for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) { - memcpy(skb_put(skb, kva->data_len), - data_kva, kva->data_len); - - if (!kva->next) - break; - - kva = pa2kva(va2pa(kva->next, kva)); - data_kva = kva2data_kva(kva); - } - } - - skb->dev = dev; - skb->ip_summed = CHECKSUM_UNNECESSARY; - - kni->stats.rx_bytes += len; - kni->stats.rx_packets++; - - /* call tx interface */ - kni_net_tx(skb, dev); - } - - /* enqueue all the mbufs from rx_q into free_q */ - ret = kni_fifo_put(kni->free_q, kni->va, num); - if (ret != num) - /* Failing should not happen */ - pr_err("Fail to enqueue mbufs into free_q\n"); -} - -/* rx interface */ -void -kni_net_rx(struct kni_dev *kni) -{ - /** - * It doesn't need to check if it is NULL pointer, - * as it has a default value - */ - (*kni_net_rx_func)(kni); -} - -/* - * Deal with a transmit timeout. - */ -static void -kni_net_tx_timeout(struct net_device *dev) -{ - struct kni_dev *kni = netdev_priv(dev); - - pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies, - jiffies - dev_trans_start(dev)); - - kni->stats.tx_errors++; - netif_wake_queue(dev); -} - -/* - * Ioctl commands - */ -static int -kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) -{ - pr_debug("kni_net_ioctl group:%d cmd:%d\n", - ((struct kni_dev *)netdev_priv(dev))->group_id, cmd); - - return 0; -} - -static void -kni_net_set_rx_mode(struct net_device *dev) -{ -} - -static int -kni_net_change_mtu(struct net_device *dev, int new_mtu) -{ - int ret; - struct rte_kni_request req; - struct kni_dev *kni = netdev_priv(dev); - - pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu); - - memset(&req, 0, sizeof(req)); - req.req_id = RTE_KNI_REQ_CHANGE_MTU; - req.new_mtu = new_mtu; - ret = kni_net_process_request(kni, &req); - if (ret == 0 && req.result == 0) - dev->mtu = new_mtu; - - return (ret == 0) ? req.result : ret; -} - -static void -kni_net_set_promiscusity(struct net_device *netdev, int flags) -{ - struct rte_kni_request req; - struct kni_dev *kni = netdev_priv(netdev); - - memset(&req, 0, sizeof(req)); - req.req_id = RTE_KNI_REQ_CHANGE_PROMISC; - - if (netdev->flags & IFF_PROMISC) - req.promiscusity = 1; - else - req.promiscusity = 0; - kni_net_process_request(kni, &req); -} - -/* - * Checks if the user space application provided the resp message - */ -void -kni_net_poll_resp(struct kni_dev *kni) -{ - if (kni_fifo_count(kni->resp_q)) - wake_up_interruptible(&kni->wq); -} - -/* - * Return statistics to the caller - */ -static struct net_device_stats * -kni_net_stats(struct net_device *dev) -{ - struct kni_dev *kni = netdev_priv(dev); - - return &kni->stats; -} - -/* - * Fill the eth header - */ -static int -kni_net_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, uint32_t len) -{ - struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); - - memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len); - memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len); - eth->h_proto = htons(type); - - return dev->hard_header_len; -} - -/* - * Re-fill the eth header - */ -#ifdef HAVE_REBUILD_HEADER -static int -kni_net_rebuild_header(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct ethhdr *eth = (struct ethhdr *) skb->data; - - memcpy(eth->h_source, dev->dev_addr, dev->addr_len); - memcpy(eth->h_dest, dev->dev_addr, dev->addr_len); - - return 0; -} -#endif /* < 4.1.0 */ - -/** - * kni_net_set_mac - Change the Ethernet Address of the KNI NIC - * @netdev: network interface device structure - * @p: pointer to an address structure - * - * Returns 0 on success, negative on failure - **/ -static int -kni_net_set_mac(struct net_device *netdev, void *p) -{ - int ret; - struct rte_kni_request req; - struct kni_dev *kni; - struct sockaddr *addr = p; - - memset(&req, 0, sizeof(req)); - req.req_id = RTE_KNI_REQ_CHANGE_MAC_ADDR; - - if (!is_valid_ether_addr((unsigned char *)(addr->sa_data))) - return -EADDRNOTAVAIL; - - memcpy(req.mac_addr, addr->sa_data, netdev->addr_len); - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - - kni = netdev_priv(netdev); - ret = kni_net_process_request(kni, &req); - - return (ret == 0 ? req.result : ret); -} - -#ifdef HAVE_CHANGE_CARRIER_CB -static int -kni_net_change_carrier(struct net_device *dev, bool new_carrier) -{ - if (new_carrier) - netif_carrier_on(dev); - else - netif_carrier_off(dev); - return 0; -} -#endif - -static const struct header_ops kni_net_header_ops = { - .create = kni_net_header, -#ifdef HAVE_REBUILD_HEADER - .rebuild = kni_net_rebuild_header, -#endif /* < 4.1.0 */ - .cache = NULL, /* disable caching */ -}; - -static const struct net_device_ops kni_net_netdev_ops = { - .ndo_open = kni_net_open, - .ndo_stop = kni_net_release, - .ndo_set_config = kni_net_config, - .ndo_change_rx_flags = kni_net_set_promiscusity, - .ndo_start_xmit = kni_net_tx, - .ndo_change_mtu = kni_net_change_mtu, - .ndo_do_ioctl = kni_net_ioctl, - .ndo_set_rx_mode = kni_net_set_rx_mode, - .ndo_get_stats = kni_net_stats, - .ndo_tx_timeout = kni_net_tx_timeout, - .ndo_set_mac_address = kni_net_set_mac, -#ifdef HAVE_CHANGE_CARRIER_CB - .ndo_change_carrier = kni_net_change_carrier, -#endif -}; - -void -kni_net_init(struct net_device *dev) -{ - struct kni_dev *kni = netdev_priv(dev); - - init_waitqueue_head(&kni->wq); - mutex_init(&kni->sync_lock); - - ether_setup(dev); /* assign some of the fields */ - dev->netdev_ops = &kni_net_netdev_ops; - dev->header_ops = &kni_net_header_ops; - dev->watchdog_timeo = WD_TIMEOUT; -} - -void -kni_net_config_lo_mode(char *lo_str) -{ - if (!lo_str) { - pr_debug("loopback disabled"); - return; - } - - if (!strcmp(lo_str, "lo_mode_none")) - pr_debug("loopback disabled"); - else if (!strcmp(lo_str, "lo_mode_fifo")) { - pr_debug("loopback mode=lo_mode_fifo enabled"); - kni_net_rx_func = kni_net_rx_lo_fifo; - } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) { - pr_debug("loopback mode=lo_mode_fifo_skb enabled"); - kni_net_rx_func = kni_net_rx_lo_fifo_skb; - } else - pr_debug("Incognizant parameter, loopback disabled"); -} diff --git a/lib/librte_eal/meson.build b/lib/librte_eal/meson.build index d9ba3853..e1fde15d 100644 --- a/lib/librte_eal/meson.build +++ b/lib/librte_eal/meson.build @@ -12,40 +12,19 @@ subdir('common') # defines common_sources, common_objs, etc. if host_machine.system() == 'linux' dpdk_conf.set('RTE_EXEC_ENV_LINUXAPP', 1) subdir('linuxapp/eal') - subdir('linuxapp/igb_uio') elif host_machine.system() == 'freebsd' dpdk_conf.set('RTE_EXEC_ENV_BSDAPP', 1) subdir('bsdapp/eal') - kmods = ['contigmem', 'nic_uio'] - # for building kernel modules, we use kernel build system using make, as - # with Linux. We have a skeleton BSDmakefile, which pulls many of its - # values from the environment. Each module only has a single source file - # right now, which allows us to simplify things. We pull in the sourcer - # files from the individual meson.build files, and then use a custom - # target to call make, passing in the values as env parameters. - kmod_cflags = ['-I' + meson.build_root(), - '-I' + join_paths(meson.source_root(), 'config'), - '-include rte_config.h'] - foreach k:kmods - subdir(join_paths('bsdapp', k)) - custom_target(k, - input: [files('bsdapp/BSDmakefile.meson'), sources], - output: k + '.ko', - command: ['make', '-f', '@INPUT0@', - 'KMOD_SRC=@INPUT1@', - 'KMOD=' + k, - 'KMOD_CFLAGS=' + ' '.join(kmod_cflags)], - build_by_default: get_option('enable_kmods')) - endforeach else - error('unsupported system type @0@'.format(hostmachine.system())) + error('unsupported system type "@0@"'.format(host_machine.system())) endif -version = 6 # the version of the EAL API +version = 8 # the version of the EAL API allow_experimental_apis = true deps += 'compat' +deps += 'kvargs' cflags += '-D_GNU_SOURCE' sources = common_sources + env_sources objs = common_objs + env_objs diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index d1236023..344a43d3 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -2,7 +2,6 @@ DPDK_2.0 { global: __rte_panic; - devargs_list; eal_parse_sysfs_value; eal_timer_source; lcore_config; @@ -25,7 +24,6 @@ DPDK_2.0 { rte_eal_devargs_type_count; rte_eal_get_configuration; rte_eal_get_lcore_state; - rte_eal_get_physmem_layout; rte_eal_get_physmem_size; rte_eal_has_hugepages; rte_eal_hpet_init; @@ -163,9 +161,6 @@ DPDK_17.05 { rte_log_set_global_level; rte_log_set_level; rte_log_set_level_regexp; - vfio_get_container_fd; - vfio_get_group_fd; - vfio_get_group_no; } DPDK_17.02; @@ -186,7 +181,6 @@ DPDK_17.11 { rte_bus_get_iommu_class; rte_eal_has_pci; rte_eal_iova_mode; - rte_eal_mbuf_default_mempool_ops; rte_eal_using_phys_addrs; rte_eal_vfio_intr_mode; rte_lcore_has_role; @@ -211,26 +205,15 @@ DPDK_18.02 { } DPDK_17.11; -EXPERIMENTAL { +DPDK_18.05 { global: - rte_eal_cleanup; - rte_eal_devargs_insert; - rte_eal_devargs_parse; - rte_eal_devargs_remove; - rte_eal_hotplug_add; - rte_eal_hotplug_remove; - rte_eal_mbuf_user_pool_ops; - rte_mp_action_register; - rte_mp_action_unregister; - rte_mp_sendmsg; - rte_mp_request; - rte_mp_reply; + rte_log_set_level_pattern; rte_service_attr_get; rte_service_attr_reset_all; rte_service_component_register; - rte_service_component_unregister; rte_service_component_runstate_set; + rte_service_component_unregister; rte_service_dump; rte_service_finalize; rte_service_get_by_id; @@ -256,3 +239,100 @@ EXPERIMENTAL { rte_service_start_with_defaults; } DPDK_18.02; + +DPDK_18.08 { + global: + + rte_eal_mbuf_user_pool_ops; + rte_uuid_compare; + rte_uuid_is_null; + rte_uuid_parse; + rte_uuid_unparse; + rte_vfio_container_create; + rte_vfio_container_destroy; + rte_vfio_container_dma_map; + rte_vfio_container_dma_unmap; + rte_vfio_container_group_bind; + rte_vfio_container_group_unbind; + rte_vfio_dma_map; + rte_vfio_dma_unmap; + rte_vfio_get_container_fd; + rte_vfio_get_group_fd; + rte_vfio_get_group_num; + +} DPDK_18.05; + +EXPERIMENTAL { + global: + + rte_class_find; + rte_class_find_by_name; + rte_class_register; + rte_class_unregister; + rte_ctrl_thread_create; + rte_dev_event_callback_register; + rte_dev_event_callback_unregister; + rte_dev_event_monitor_start; + rte_dev_event_monitor_stop; + rte_dev_iterator_init; + rte_dev_iterator_next; + rte_devargs_add; + rte_devargs_dump; + rte_devargs_insert; + rte_devargs_next; + rte_devargs_parse; + rte_devargs_parsef; + rte_devargs_remove; + rte_devargs_type_count; + rte_eal_cleanup; + rte_eal_hotplug_add; + rte_eal_hotplug_remove; + rte_fbarray_attach; + rte_fbarray_destroy; + rte_fbarray_detach; + rte_fbarray_dump_metadata; + rte_fbarray_find_idx; + rte_fbarray_find_next_free; + rte_fbarray_find_next_used; + rte_fbarray_find_next_n_free; + rte_fbarray_find_next_n_used; + rte_fbarray_find_prev_free; + rte_fbarray_find_prev_used; + rte_fbarray_find_prev_n_free; + rte_fbarray_find_prev_n_used; + rte_fbarray_find_contig_free; + rte_fbarray_find_contig_used; + rte_fbarray_find_rev_contig_free; + rte_fbarray_find_rev_contig_used; + rte_fbarray_get; + rte_fbarray_init; + rte_fbarray_is_used; + rte_fbarray_set_free; + rte_fbarray_set_used; + rte_log_register_type_and_pick_level; + rte_malloc_dump_heaps; + rte_mem_alloc_validator_register; + rte_mem_alloc_validator_unregister; + rte_mem_event_callback_register; + rte_mem_event_callback_unregister; + rte_mem_iova2virt; + rte_mem_virt2memseg; + rte_mem_virt2memseg_list; + rte_memseg_contig_walk; + rte_memseg_contig_walk_thread_unsafe; + rte_memseg_list_walk; + rte_memseg_list_walk_thread_unsafe; + rte_memseg_walk; + rte_memseg_walk_thread_unsafe; + rte_mp_action_register; + rte_mp_action_unregister; + rte_mp_reply; + rte_mp_request_sync; + rte_mp_request_async; + rte_mp_sendmsg; + rte_service_lcore_attr_get; + rte_service_lcore_attr_reset_all; + rte_service_may_be_active; + rte_socket_count; + rte_socket_id_by_idx; +}; diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile new file mode 100644 index 00000000..0935a275 --- /dev/null +++ b/lib/librte_ethdev/Makefile @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2017 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_ethdev.a + +CFLAGS += -DALLOW_EXPERIMENTAL_API +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_net -lrte_eal -lrte_mempool -lrte_ring +LDLIBS += -lrte_mbuf + +EXPORT_MAP := rte_ethdev_version.map + +LIBABIVER := 10 + +SRCS-y += rte_ethdev.c +SRCS-y += rte_flow.c +SRCS-y += rte_tm.c +SRCS-y += rte_mtr.c +SRCS-y += ethdev_profile.c + +# +# Export include files +# +SYMLINK-y-include += rte_ethdev.h +SYMLINK-y-include += rte_ethdev_driver.h +SYMLINK-y-include += rte_ethdev_core.h +SYMLINK-y-include += rte_ethdev_pci.h +SYMLINK-y-include += rte_ethdev_vdev.h +SYMLINK-y-include += rte_eth_ctrl.h +SYMLINK-y-include += rte_dev_info.h +SYMLINK-y-include += rte_flow.h +SYMLINK-y-include += rte_flow_driver.h +SYMLINK-y-include += rte_tm.h +SYMLINK-y-include += rte_tm_driver.h +SYMLINK-y-include += rte_mtr.h +SYMLINK-y-include += rte_mtr_driver.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ethdev/ethdev_profile.c b/lib/librte_ethdev/ethdev_profile.c new file mode 100644 index 00000000..0d1dcda3 --- /dev/null +++ b/lib/librte_ethdev/ethdev_profile.c @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#include "ethdev_profile.h" + +/** + * This conditional block enables RX queues profiling by tracking wasted + * iterations, i.e. iterations which yielded no RX packets. Profiling is + * performed using the Instrumentation and Tracing Technology (ITT) API, + * employed by the Intel (R) VTune (TM) Amplifier. + */ +#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS + +#include + +#define ITT_MAX_NAME_LEN (100) + +/** + * Auxiliary ITT structure belonging to Ethernet device and using to: + * - track RX queue state to determine whether it is wasting loop iterations + * - begin or end ITT task using task domain and task name (handle) + */ +struct itt_profile_rx_data { + /** + * ITT domains for each queue. + */ + __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT]; + /** + * ITT task names for each queue. + */ + __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT]; + /** + * Flags indicating the queues state. Possible values: + * 1 - queue is wasting iterations, + * 0 - otherwise. + */ + uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT]; +}; + +/** + * The pool of *itt_profile_rx_data* structures. + */ +struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS]; + + +/** + * This callback function manages ITT tasks collection on given port and queue. + * It must be registered with rte_eth_add_rx_callback() to be called from + * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function + * type declaration. + */ +static uint16_t +collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id, + __rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, __rte_unused void *user_param) +{ + if (unlikely(nb_pkts == 0)) { + if (!itt_rx_data[port_id].queue_state[queue_id]) { + __itt_task_begin( + itt_rx_data[port_id].domains[queue_id], + __itt_null, __itt_null, + itt_rx_data[port_id].handles[queue_id]); + itt_rx_data[port_id].queue_state[queue_id] = 1; + } + } else { + if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) { + __itt_task_end( + itt_rx_data[port_id].domains[queue_id]); + itt_rx_data[port_id].queue_state[queue_id] = 0; + } + } + return nb_pkts; +} + +/** + * Initialization of itt_profile_rx_data for a given Ethernet device. + * This function must be invoked when ethernet device is being configured. + * Result will be stored in the global array *itt_rx_data*. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param port_name + * The name of the Ethernet device. + * @param rx_queue_num + * The number of RX queues on specified port. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +static inline int +itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num) +{ + uint16_t q_id; + + for (q_id = 0; q_id < rx_queue_num; ++q_id) { + char domain_name[ITT_MAX_NAME_LEN]; + + snprintf(domain_name, sizeof(domain_name), + "RXBurst.WastedIterations.Port_%s.Queue_%d", + port_name, q_id); + itt_rx_data[port_id].domains[q_id] + = __itt_domain_create(domain_name); + + char task_name[ITT_MAX_NAME_LEN]; + + snprintf(task_name, sizeof(task_name), + "port id: %d; queue id: %d", + port_id, q_id); + itt_rx_data[port_id].handles[q_id] + = __itt_string_handle_create(task_name); + + itt_rx_data[port_id].queue_state[q_id] = 0; + + if (!rte_eth_add_rx_callback( + port_id, q_id, collect_itt_rx_burst_cb, NULL)) { + return -rte_errno; + } + } + + return 0; +} +#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */ + +int +__rte_eth_profile_rx_init(__rte_unused uint16_t port_id, + __rte_unused struct rte_eth_dev *dev) +{ +#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS + return itt_profile_rx_init( + port_id, dev->data->name, dev->data->nb_rx_queues); +#endif + return 0; +} diff --git a/lib/librte_ethdev/ethdev_profile.h b/lib/librte_ethdev/ethdev_profile.h new file mode 100644 index 00000000..e5ea3682 --- /dev/null +++ b/lib/librte_ethdev/ethdev_profile.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#ifndef _RTE_ETHDEV_PROFILE_H_ +#define _RTE_ETHDEV_PROFILE_H_ + +#include "rte_ethdev.h" + +/** + * Initialization of profiling RX queues for the Ethernet device. + * Implementation of this function depends on chosen profiling method, + * defined in configs. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dev + * Pointer to struct rte_eth_dev corresponding to given port_id. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev); + +#endif diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build new file mode 100644 index 00000000..596cd0f3 --- /dev/null +++ b/lib/librte_ethdev/meson.build @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +name = 'ethdev' +version = 10 +allow_experimental_apis = true +sources = files('ethdev_profile.c', + 'rte_ethdev.c', + 'rte_flow.c', + 'rte_mtr.c', + 'rte_tm.c') + +headers = files('rte_ethdev.h', + 'rte_ethdev_driver.h', + 'rte_ethdev_core.h', + 'rte_ethdev_pci.h', + 'rte_ethdev_vdev.h', + 'rte_eth_ctrl.h', + 'rte_dev_info.h', + 'rte_flow.h', + 'rte_flow_driver.h', + 'rte_mtr.h', + 'rte_mtr_driver.h', + 'rte_tm.h', + 'rte_tm_driver.h') + +deps += ['net', 'kvargs'] diff --git a/lib/librte_ethdev/rte_dev_info.h b/lib/librte_ethdev/rte_dev_info.h new file mode 100644 index 00000000..fea5da88 --- /dev/null +++ b/lib/librte_ethdev/rte_dev_info.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 Intel Corporation + */ + +#ifndef _RTE_DEV_INFO_H_ +#define _RTE_DEV_INFO_H_ + +#include + +/* + * Placeholder for accessing device registers + */ +struct rte_dev_reg_info { + void *data; /**< Buffer for return registers */ + uint32_t offset; /**< Start register table location for access */ + uint32_t length; /**< Number of registers to fetch */ + uint32_t width; /**< Size of device register */ + uint32_t version; /**< Device version */ +}; + +/* + * Placeholder for accessing device eeprom + */ +struct rte_dev_eeprom_info { + void *data; /**< Buffer for return eeprom */ + uint32_t offset; /**< Start eeprom address for access*/ + uint32_t length; /**< Length of eeprom region to access */ + uint32_t magic; /**< Device-specific key, such as device-id */ +}; + +/** + * Placeholder for accessing plugin module eeprom + */ +struct rte_eth_dev_module_info { + uint32_t type; /**< Type of plugin module eeprom */ + uint32_t eeprom_len; /**< Length of plugin module eeprom */ +}; + +/* EEPROM Standards for plug in modules */ +#define RTE_ETH_MODULE_SFF_8079 0x1 +#define RTE_ETH_MODULE_SFF_8079_LEN 256 +#define RTE_ETH_MODULE_SFF_8472 0x2 +#define RTE_ETH_MODULE_SFF_8472_LEN 512 +#define RTE_ETH_MODULE_SFF_8636 0x3 +#define RTE_ETH_MODULE_SFF_8636_LEN 256 +#define RTE_ETH_MODULE_SFF_8436 0x4 +#define RTE_ETH_MODULE_SFF_8436_LEN 256 + +#endif /* _RTE_DEV_INFO_H_ */ diff --git a/lib/librte_ethdev/rte_eth_ctrl.h b/lib/librte_ethdev/rte_eth_ctrl.h new file mode 100644 index 00000000..5ea8ae24 --- /dev/null +++ b/lib/librte_ethdev/rte_eth_ctrl.h @@ -0,0 +1,828 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + */ + +#ifndef _RTE_ETH_CTRL_H_ +#define _RTE_ETH_CTRL_H_ + +#include +#include +#include "rte_ether.h" + +/** + * @file + * + * Ethernet device features and related data structures used + * by control APIs should be defined in this file. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A packet can be identified by hardware as different flow types. Different + * NIC hardwares may support different flow types. + * Basically, the NIC hardware identifies the flow type as deep protocol as + * possible, and exclusively. For example, if a packet is identified as + * 'RTE_ETH_FLOW_NONFRAG_IPV4_TCP', it will not be any of other flow types, + * though it is an actual IPV4 packet. + * Note that the flow types are used to define RSS offload types in + * rte_ethdev.h. + */ +#define RTE_ETH_FLOW_UNKNOWN 0 +#define RTE_ETH_FLOW_RAW 1 +#define RTE_ETH_FLOW_IPV4 2 +#define RTE_ETH_FLOW_FRAG_IPV4 3 +#define RTE_ETH_FLOW_NONFRAG_IPV4_TCP 4 +#define RTE_ETH_FLOW_NONFRAG_IPV4_UDP 5 +#define RTE_ETH_FLOW_NONFRAG_IPV4_SCTP 6 +#define RTE_ETH_FLOW_NONFRAG_IPV4_OTHER 7 +#define RTE_ETH_FLOW_IPV6 8 +#define RTE_ETH_FLOW_FRAG_IPV6 9 +#define RTE_ETH_FLOW_NONFRAG_IPV6_TCP 10 +#define RTE_ETH_FLOW_NONFRAG_IPV6_UDP 11 +#define RTE_ETH_FLOW_NONFRAG_IPV6_SCTP 12 +#define RTE_ETH_FLOW_NONFRAG_IPV6_OTHER 13 +#define RTE_ETH_FLOW_L2_PAYLOAD 14 +#define RTE_ETH_FLOW_IPV6_EX 15 +#define RTE_ETH_FLOW_IPV6_TCP_EX 16 +#define RTE_ETH_FLOW_IPV6_UDP_EX 17 +#define RTE_ETH_FLOW_PORT 18 + /**< Consider device port number as a flow differentiator */ +#define RTE_ETH_FLOW_VXLAN 19 /**< VXLAN protocol based flow */ +#define RTE_ETH_FLOW_GENEVE 20 /**< GENEVE protocol based flow */ +#define RTE_ETH_FLOW_NVGRE 21 /**< NVGRE protocol based flow */ +#define RTE_ETH_FLOW_VXLAN_GPE 22 /**< VXLAN-GPE protocol based flow */ +#define RTE_ETH_FLOW_MAX 23 + +/** + * Feature filter types + */ +enum rte_filter_type { + RTE_ETH_FILTER_NONE = 0, + RTE_ETH_FILTER_MACVLAN, + RTE_ETH_FILTER_ETHERTYPE, + RTE_ETH_FILTER_FLEXIBLE, + RTE_ETH_FILTER_SYN, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_TUNNEL, + RTE_ETH_FILTER_FDIR, + RTE_ETH_FILTER_HASH, + RTE_ETH_FILTER_L2_TUNNEL, + RTE_ETH_FILTER_GENERIC, + RTE_ETH_FILTER_MAX +}; + +/** + * Generic operations on filters + */ +enum rte_filter_op { + /** used to check whether the type filter is supported */ + RTE_ETH_FILTER_NOP = 0, + RTE_ETH_FILTER_ADD, /**< add filter entry */ + RTE_ETH_FILTER_UPDATE, /**< update filter entry */ + RTE_ETH_FILTER_DELETE, /**< delete filter entry */ + RTE_ETH_FILTER_FLUSH, /**< flush all entries */ + RTE_ETH_FILTER_GET, /**< get filter entry */ + RTE_ETH_FILTER_SET, /**< configurations */ + RTE_ETH_FILTER_INFO, /**< retrieve information */ + RTE_ETH_FILTER_STATS, /**< retrieve statistics */ + RTE_ETH_FILTER_OP_MAX +}; + +/** + * MAC filter type + */ +enum rte_mac_filter_type { + RTE_MAC_PERFECT_MATCH = 1, /**< exact match of MAC addr. */ + RTE_MACVLAN_PERFECT_MATCH, /**< exact match of MAC addr and VLAN ID. */ + RTE_MAC_HASH_MATCH, /**< hash match of MAC addr. */ + /** hash match of MAC addr and exact match of VLAN ID. */ + RTE_MACVLAN_HASH_MATCH, +}; + +/** + * MAC filter info + */ +struct rte_eth_mac_filter { + uint8_t is_vf; /**< 1 for VF, 0 for port dev */ + uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ + enum rte_mac_filter_type filter_type; /**< MAC filter type */ + struct ether_addr mac_addr; +}; + +/** + * Define all structures for Ethertype Filter type. + */ + +#define RTE_ETHTYPE_FLAGS_MAC 0x0001 /**< If set, compare mac */ +#define RTE_ETHTYPE_FLAGS_DROP 0x0002 /**< If set, drop packet when match */ + +/** + * A structure used to define the ethertype filter entry + * to support RTE_ETH_FILTER_ETHERTYPE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_ethertype_filter { + struct ether_addr mac_addr; /**< Mac address to match. */ + uint16_t ether_type; /**< Ether type to match */ + uint16_t flags; /**< Flags from RTE_ETHTYPE_FLAGS_* */ + uint16_t queue; /**< Queue assigned to when match*/ +}; + +#define RTE_FLEX_FILTER_MAXLEN 128 /**< bytes to use in flex filter. */ +#define RTE_FLEX_FILTER_MASK_SIZE \ + (RTE_ALIGN(RTE_FLEX_FILTER_MAXLEN, CHAR_BIT) / CHAR_BIT) + /**< mask bytes in flex filter. */ + +/** + * A structure used to define the flex filter entry + * to support RTE_ETH_FILTER_FLEXIBLE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_flex_filter { + uint16_t len; + uint8_t bytes[RTE_FLEX_FILTER_MAXLEN]; /**< flex bytes in big endian.*/ + uint8_t mask[RTE_FLEX_FILTER_MASK_SIZE]; /**< if mask bit is 1b, do + not compare corresponding byte. */ + uint8_t priority; + uint16_t queue; /**< Queue assigned to when match. */ +}; + +/** + * A structure used to define the TCP syn filter entry + * to support RTE_ETH_FILTER_SYN with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_syn_filter { + uint8_t hig_pri; /**< 1 - higher priority than other filters, + 0 - lower priority. */ + uint16_t queue; /**< Queue assigned to when match */ +}; + +/** + * Define all structures for ntuple Filter type. + */ + +#define RTE_NTUPLE_FLAGS_DST_IP 0x0001 /**< If set, dst_ip is part of ntuple */ +#define RTE_NTUPLE_FLAGS_SRC_IP 0x0002 /**< If set, src_ip is part of ntuple */ +#define RTE_NTUPLE_FLAGS_DST_PORT 0x0004 /**< If set, dst_port is part of ntuple */ +#define RTE_NTUPLE_FLAGS_SRC_PORT 0x0008 /**< If set, src_port is part of ntuple */ +#define RTE_NTUPLE_FLAGS_PROTO 0x0010 /**< If set, protocol is part of ntuple */ +#define RTE_NTUPLE_FLAGS_TCP_FLAG 0x0020 /**< If set, tcp flag is involved */ + +#define RTE_5TUPLE_FLAGS ( \ + RTE_NTUPLE_FLAGS_DST_IP | \ + RTE_NTUPLE_FLAGS_SRC_IP | \ + RTE_NTUPLE_FLAGS_DST_PORT | \ + RTE_NTUPLE_FLAGS_SRC_PORT | \ + RTE_NTUPLE_FLAGS_PROTO) + +#define RTE_2TUPLE_FLAGS ( \ + RTE_NTUPLE_FLAGS_DST_PORT | \ + RTE_NTUPLE_FLAGS_PROTO) + +#define TCP_URG_FLAG 0x20 +#define TCP_ACK_FLAG 0x10 +#define TCP_PSH_FLAG 0x08 +#define TCP_RST_FLAG 0x04 +#define TCP_SYN_FLAG 0x02 +#define TCP_FIN_FLAG 0x01 +#define TCP_FLAG_ALL 0x3F + +/** + * A structure used to define the ntuple filter entry + * to support RTE_ETH_FILTER_NTUPLE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_ntuple_filter { + uint16_t flags; /**< Flags from RTE_NTUPLE_FLAGS_* */ + uint32_t dst_ip; /**< Destination IP address in big endian. */ + uint32_t dst_ip_mask; /**< Mask of destination IP address. */ + uint32_t src_ip; /**< Source IP address in big endian. */ + uint32_t src_ip_mask; /**< Mask of destination IP address. */ + uint16_t dst_port; /**< Destination port in big endian. */ + uint16_t dst_port_mask; /**< Mask of destination port. */ + uint16_t src_port; /**< Source Port in big endian. */ + uint16_t src_port_mask; /**< Mask of source port. */ + uint8_t proto; /**< L4 protocol. */ + uint8_t proto_mask; /**< Mask of L4 protocol. */ + /** tcp_flags only meaningful when the proto is TCP. + The packet matched above ntuple fields and contain + any set bit in tcp_flags will hit this filter. */ + uint8_t tcp_flags; + uint16_t priority; /**< seven levels (001b-111b), 111b is highest, + used when more than one filter matches. */ + uint16_t queue; /**< Queue assigned to when match*/ +}; + +/** + * Tunneled type. + */ +enum rte_eth_tunnel_type { + RTE_TUNNEL_TYPE_NONE = 0, + RTE_TUNNEL_TYPE_VXLAN, + RTE_TUNNEL_TYPE_GENEVE, + RTE_TUNNEL_TYPE_TEREDO, + RTE_TUNNEL_TYPE_NVGRE, + RTE_TUNNEL_TYPE_IP_IN_GRE, + RTE_L2_TUNNEL_TYPE_E_TAG, + RTE_TUNNEL_TYPE_MAX, +}; + +/** + * filter type of tunneling packet + */ +#define ETH_TUNNEL_FILTER_OMAC 0x01 /**< filter by outer MAC addr */ +#define ETH_TUNNEL_FILTER_OIP 0x02 /**< filter by outer IP Addr */ +#define ETH_TUNNEL_FILTER_TENID 0x04 /**< filter by tenant ID */ +#define ETH_TUNNEL_FILTER_IMAC 0x08 /**< filter by inner MAC addr */ +#define ETH_TUNNEL_FILTER_IVLAN 0x10 /**< filter by inner VLAN ID */ +#define ETH_TUNNEL_FILTER_IIP 0x20 /**< filter by inner IP addr */ + +#define RTE_TUNNEL_FILTER_IMAC_IVLAN (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_IVLAN) +#define RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_IVLAN | \ + ETH_TUNNEL_FILTER_TENID) +#define RTE_TUNNEL_FILTER_IMAC_TENID (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_TENID) +#define RTE_TUNNEL_FILTER_OMAC_TENID_IMAC (ETH_TUNNEL_FILTER_OMAC | \ + ETH_TUNNEL_FILTER_TENID | \ + ETH_TUNNEL_FILTER_IMAC) + +/** + * Select IPv4 or IPv6 for tunnel filters. + */ +enum rte_tunnel_iptype { + RTE_TUNNEL_IPTYPE_IPV4 = 0, /**< IPv4. */ + RTE_TUNNEL_IPTYPE_IPV6, /**< IPv6. */ +}; + +/** + * Tunneling Packet filter configuration. + */ +struct rte_eth_tunnel_filter_conf { + struct ether_addr outer_mac; /**< Outer MAC address to match. */ + struct ether_addr inner_mac; /**< Inner MAC address to match. */ + uint16_t inner_vlan; /**< Inner VLAN to match. */ + enum rte_tunnel_iptype ip_type; /**< IP address type. */ + /** Outer destination IP address to match if ETH_TUNNEL_FILTER_OIP + is set in filter_type, or inner destination IP address to match + if ETH_TUNNEL_FILTER_IIP is set in filter_type . */ + union { + uint32_t ipv4_addr; /**< IPv4 address in big endian. */ + uint32_t ipv6_addr[4]; /**< IPv6 address in big endian. */ + } ip_addr; + /** Flags from ETH_TUNNEL_FILTER_XX - see above. */ + uint16_t filter_type; + enum rte_eth_tunnel_type tunnel_type; /**< Tunnel Type. */ + uint32_t tenant_id; /**< Tenant ID to match. VNI, GRE key... */ + uint16_t queue_id; /**< Queue assigned to if match. */ +}; + +/** + * Global eth device configuration type. + */ +enum rte_eth_global_cfg_type { + RTE_ETH_GLOBAL_CFG_TYPE_UNKNOWN = 0, + RTE_ETH_GLOBAL_CFG_TYPE_GRE_KEY_LEN, + RTE_ETH_GLOBAL_CFG_TYPE_MAX, +}; + +/** + * Global eth device configuration. + */ +struct rte_eth_global_cfg { + enum rte_eth_global_cfg_type cfg_type; /**< Global config type. */ + union { + uint8_t gre_key_len; /**< Valid GRE key length in byte. */ + uint64_t reserved; /**< Reserve space for future use. */ + } cfg; +}; + +#define RTE_ETH_FDIR_MAX_FLEXLEN 16 /**< Max length of flexbytes. */ +#define RTE_ETH_INSET_SIZE_MAX 128 /**< Max length of input set. */ + +/** + * Input set fields for Flow Director and Hash filters + */ +enum rte_eth_input_set_field { + RTE_ETH_INPUT_SET_UNKNOWN = 0, + + /* L2 */ + RTE_ETH_INPUT_SET_L2_SRC_MAC = 1, + RTE_ETH_INPUT_SET_L2_DST_MAC, + RTE_ETH_INPUT_SET_L2_OUTER_VLAN, + RTE_ETH_INPUT_SET_L2_INNER_VLAN, + RTE_ETH_INPUT_SET_L2_ETHERTYPE, + + /* L3 */ + RTE_ETH_INPUT_SET_L3_SRC_IP4 = 129, + RTE_ETH_INPUT_SET_L3_DST_IP4, + RTE_ETH_INPUT_SET_L3_SRC_IP6, + RTE_ETH_INPUT_SET_L3_DST_IP6, + RTE_ETH_INPUT_SET_L3_IP4_TOS, + RTE_ETH_INPUT_SET_L3_IP4_PROTO, + RTE_ETH_INPUT_SET_L3_IP6_TC, + RTE_ETH_INPUT_SET_L3_IP6_NEXT_HEADER, + RTE_ETH_INPUT_SET_L3_IP4_TTL, + RTE_ETH_INPUT_SET_L3_IP6_HOP_LIMITS, + + /* L4 */ + RTE_ETH_INPUT_SET_L4_UDP_SRC_PORT = 257, + RTE_ETH_INPUT_SET_L4_UDP_DST_PORT, + RTE_ETH_INPUT_SET_L4_TCP_SRC_PORT, + RTE_ETH_INPUT_SET_L4_TCP_DST_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_SRC_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_DST_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_VERIFICATION_TAG, + + /* Tunnel */ + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_DST_MAC = 385, + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_SRC_MAC, + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_VLAN, + RTE_ETH_INPUT_SET_TUNNEL_L4_UDP_KEY, + RTE_ETH_INPUT_SET_TUNNEL_GRE_KEY, + + /* Flexible Payload */ + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_1ST_WORD = 641, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_2ND_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_3RD_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_4TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_5TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_6TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_7TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_8TH_WORD, + + RTE_ETH_INPUT_SET_DEFAULT = 65533, + RTE_ETH_INPUT_SET_NONE = 65534, + RTE_ETH_INPUT_SET_MAX = 65535, +}; + +/** + * Filters input set operations + */ +enum rte_filter_input_set_op { + RTE_ETH_INPUT_SET_OP_UNKNOWN, + RTE_ETH_INPUT_SET_SELECT, /**< select input set */ + RTE_ETH_INPUT_SET_ADD, /**< add input set entry */ + RTE_ETH_INPUT_SET_OP_MAX +}; + + +/** + * A structure used to define the input set configuration for + * flow director and hash filters + */ +struct rte_eth_input_set_conf { + uint16_t flow_type; + uint16_t inset_size; + enum rte_eth_input_set_field field[RTE_ETH_INSET_SIZE_MAX]; + enum rte_filter_input_set_op op; +}; + +/** + * A structure used to define the input for L2 flow + */ +struct rte_eth_l2_flow { + uint16_t ether_type; /**< Ether type in big endian */ +}; + +/** + * A structure used to define the input for IPV4 flow + */ +struct rte_eth_ipv4_flow { + uint32_t src_ip; /**< IPv4 source address in big endian. */ + uint32_t dst_ip; /**< IPv4 destination address in big endian. */ + uint8_t tos; /**< Type of service to match. */ + uint8_t ttl; /**< Time to live to match. */ + uint8_t proto; /**< Protocol, next header in big endian. */ +}; + +/** + * A structure used to define the input for IPV4 UDP flow + */ +struct rte_eth_udpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< UDP source port in big endian. */ + uint16_t dst_port; /**< UDP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV4 TCP flow + */ +struct rte_eth_tcpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< TCP source port in big endian. */ + uint16_t dst_port; /**< TCP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV4 SCTP flow + */ +struct rte_eth_sctpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< SCTP source port in big endian. */ + uint16_t dst_port; /**< SCTP destination port in big endian. */ + uint32_t verify_tag; /**< Verify tag in big endian */ +}; + +/** + * A structure used to define the input for IPV6 flow + */ +struct rte_eth_ipv6_flow { + uint32_t src_ip[4]; /**< IPv6 source address in big endian. */ + uint32_t dst_ip[4]; /**< IPv6 destination address in big endian. */ + uint8_t tc; /**< Traffic class to match. */ + uint8_t proto; /**< Protocol, next header to match. */ + uint8_t hop_limits; /**< Hop limits to match. */ +}; + +/** + * A structure used to define the input for IPV6 UDP flow + */ +struct rte_eth_udpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< UDP source port in big endian. */ + uint16_t dst_port; /**< UDP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV6 TCP flow + */ +struct rte_eth_tcpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< TCP source port to in big endian. */ + uint16_t dst_port; /**< TCP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV6 SCTP flow + */ +struct rte_eth_sctpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< SCTP source port in big endian. */ + uint16_t dst_port; /**< SCTP destination port in big endian. */ + uint32_t verify_tag; /**< Verify tag in big endian. */ +}; + +/** + * A structure used to define the input for MAC VLAN flow + */ +struct rte_eth_mac_vlan_flow { + struct ether_addr mac_addr; /**< Mac address to match. */ +}; + +/** + * Tunnel type for flow director. + */ +enum rte_eth_fdir_tunnel_type { + RTE_FDIR_TUNNEL_TYPE_UNKNOWN = 0, + RTE_FDIR_TUNNEL_TYPE_NVGRE, + RTE_FDIR_TUNNEL_TYPE_VXLAN, +}; + +/** + * A structure used to define the input for tunnel flow, now it's VxLAN or + * NVGRE + */ +struct rte_eth_tunnel_flow { + enum rte_eth_fdir_tunnel_type tunnel_type; /**< Tunnel type to match. */ + /** Tunnel ID to match. TNI, VNI... in big endian. */ + uint32_t tunnel_id; + struct ether_addr mac_addr; /**< Mac address to match. */ +}; + +/** + * An union contains the inputs for all types of flow + * Items in flows need to be in big endian + */ +union rte_eth_fdir_flow { + struct rte_eth_l2_flow l2_flow; + struct rte_eth_udpv4_flow udp4_flow; + struct rte_eth_tcpv4_flow tcp4_flow; + struct rte_eth_sctpv4_flow sctp4_flow; + struct rte_eth_ipv4_flow ip4_flow; + struct rte_eth_udpv6_flow udp6_flow; + struct rte_eth_tcpv6_flow tcp6_flow; + struct rte_eth_sctpv6_flow sctp6_flow; + struct rte_eth_ipv6_flow ipv6_flow; + struct rte_eth_mac_vlan_flow mac_vlan_flow; + struct rte_eth_tunnel_flow tunnel_flow; +}; + +/** + * A structure used to contain extend input of flow + */ +struct rte_eth_fdir_flow_ext { + uint16_t vlan_tci; + uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< It is filled by the flexible payload to match. */ + uint8_t is_vf; /**< 1 for VF, 0 for port dev */ + uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ +}; + +/** + * A structure used to define the input for a flow director filter entry + */ +struct rte_eth_fdir_input { + uint16_t flow_type; + union rte_eth_fdir_flow flow; + /**< Flow fields to match, dependent on flow_type */ + struct rte_eth_fdir_flow_ext flow_ext; + /**< Additional fields to match */ +}; + +/** + * Behavior will be taken if FDIR match + */ +enum rte_eth_fdir_behavior { + RTE_ETH_FDIR_ACCEPT = 0, + RTE_ETH_FDIR_REJECT, + RTE_ETH_FDIR_PASSTHRU, +}; + +/** + * Flow director report status + * It defines what will be reported if FDIR entry is matched. + */ +enum rte_eth_fdir_status { + RTE_ETH_FDIR_NO_REPORT_STATUS = 0, /**< Report nothing. */ + RTE_ETH_FDIR_REPORT_ID, /**< Only report FD ID. */ + RTE_ETH_FDIR_REPORT_ID_FLEX_4, /**< Report FD ID and 4 flex bytes. */ + RTE_ETH_FDIR_REPORT_FLEX_8, /**< Report 8 flex bytes. */ +}; + +/** + * A structure used to define an action when match FDIR packet filter. + */ +struct rte_eth_fdir_action { + uint16_t rx_queue; /**< Queue assigned to if FDIR match. */ + enum rte_eth_fdir_behavior behavior; /**< Behavior will be taken */ + enum rte_eth_fdir_status report_status; /**< Status report option */ + uint8_t flex_off; + /**< If report_status is RTE_ETH_FDIR_REPORT_ID_FLEX_4 or + RTE_ETH_FDIR_REPORT_FLEX_8, flex_off specifies where the reported + flex bytes start from in flexible payload. */ +}; + +/** + * A structure used to define the flow director filter entry by filter_ctrl API + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_ADD and + * RTE_ETH_FILTER_DELETE operations. + */ +struct rte_eth_fdir_filter { + uint32_t soft_id; + /**< ID, an unique value is required when deal with FDIR entry */ + struct rte_eth_fdir_input input; /**< Input set */ + struct rte_eth_fdir_action action; /**< Action taken when match */ +}; + +/** + * A structure used to configure FDIR masks that are used by the device + * to match the various fields of RX packet headers. + */ +struct rte_eth_fdir_masks { + uint16_t vlan_tci_mask; /**< Bit mask for vlan_tci in big endian */ + /** Bit mask for ipv4 flow in big endian. */ + struct rte_eth_ipv4_flow ipv4_mask; + /** Bit maks for ipv6 flow in big endian. */ + struct rte_eth_ipv6_flow ipv6_mask; + /** Bit mask for L4 source port in big endian. */ + uint16_t src_port_mask; + /** Bit mask for L4 destination port in big endian. */ + uint16_t dst_port_mask; + /** 6 bit mask for proper 6 bytes of Mac address, bit 0 matches the + first byte on the wire */ + uint8_t mac_addr_byte_mask; + /** Bit mask for tunnel ID in big endian. */ + uint32_t tunnel_id_mask; + uint8_t tunnel_type_mask; /**< 1 - Match tunnel type, + 0 - Ignore tunnel type. */ +}; + +/** + * Payload type + */ +enum rte_eth_payload_type { + RTE_ETH_PAYLOAD_UNKNOWN = 0, + RTE_ETH_RAW_PAYLOAD, + RTE_ETH_L2_PAYLOAD, + RTE_ETH_L3_PAYLOAD, + RTE_ETH_L4_PAYLOAD, + RTE_ETH_PAYLOAD_MAX = 8, +}; + +/** + * A structure used to select bytes extracted from the protocol layers to + * flexible payload for filter + */ +struct rte_eth_flex_payload_cfg { + enum rte_eth_payload_type type; /**< Payload type */ + uint16_t src_offset[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< Offset in bytes from the beginning of packet's payload + src_offset[i] indicates the flexbyte i's offset in original + packet payload. This value should be less than + flex_payload_limit in struct rte_eth_fdir_info.*/ +}; + +/** + * A structure used to define FDIR masks for flexible payload + * for each flow type + */ +struct rte_eth_fdir_flex_mask { + uint16_t flow_type; + uint8_t mask[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< Mask for the whole flexible payload */ +}; + +/** + * A structure used to define all flexible payload related setting + * include flex payload and flex mask + */ +struct rte_eth_fdir_flex_conf { + uint16_t nb_payloads; /**< The number of following payload cfg */ + uint16_t nb_flexmasks; /**< The number of following mask */ + struct rte_eth_flex_payload_cfg flex_set[RTE_ETH_PAYLOAD_MAX]; + /**< Flex payload configuration for each payload type */ + struct rte_eth_fdir_flex_mask flex_mask[RTE_ETH_FLOW_MAX]; + /**< Flex mask configuration for each flow type */ +}; + +/** + * Flow Director setting modes: none, signature or perfect. + */ +enum rte_fdir_mode { + RTE_FDIR_MODE_NONE = 0, /**< Disable FDIR support. */ + RTE_FDIR_MODE_SIGNATURE, /**< Enable FDIR signature filter mode. */ + RTE_FDIR_MODE_PERFECT, /**< Enable FDIR perfect filter mode. */ + RTE_FDIR_MODE_PERFECT_MAC_VLAN, /**< Enable FDIR filter mode - MAC VLAN. */ + RTE_FDIR_MODE_PERFECT_TUNNEL, /**< Enable FDIR filter mode - tunnel. */ +}; + +#define UINT64_BIT (CHAR_BIT * sizeof(uint64_t)) +#define RTE_FLOW_MASK_ARRAY_SIZE \ + (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT64_BIT)/UINT64_BIT) + +/** + * A structure used to get the information of flow director filter. + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_INFO operation. + * It includes the mode, flexible payload configuration information, + * capabilities and supported flow types, flexible payload characters. + * It can be gotten to help taking specific configurations per device. + */ +struct rte_eth_fdir_info { + enum rte_fdir_mode mode; /**< Flow director mode */ + struct rte_eth_fdir_masks mask; + /** Flex payload configuration information */ + struct rte_eth_fdir_flex_conf flex_conf; + uint32_t guarant_spc; /**< Guaranteed spaces.*/ + uint32_t best_spc; /**< Best effort spaces.*/ + /** Bit mask for every supported flow type. */ + uint64_t flow_types_mask[RTE_FLOW_MASK_ARRAY_SIZE]; + uint32_t max_flexpayload; /**< Total flex payload in bytes. */ + /** Flexible payload unit in bytes. Size and alignments of all flex + payload segments should be multiplies of this value. */ + uint32_t flex_payload_unit; + /** Max number of flexible payload continuous segments. + Each segment should be a multiple of flex_payload_unit.*/ + uint32_t max_flex_payload_segment_num; + /** Maximum src_offset in bytes allowed. It indicates that + src_offset[i] in struct rte_eth_flex_payload_cfg should be less + than this value. */ + uint16_t flex_payload_limit; + /** Flex bitmask unit in bytes. Size of flex bitmasks should be a + multiply of this value. */ + uint32_t flex_bitmask_unit; + /** Max supported size of flex bitmasks in flex_bitmask_unit */ + uint32_t max_flex_bitmask_num; +}; + +/** + * A structure used to define the statistics of flow director. + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_STATS operation. + */ +struct rte_eth_fdir_stats { + uint32_t collision; /**< Number of filters with collision. */ + uint32_t free; /**< Number of free filters. */ + uint32_t maxhash; + /**< The lookup hash value of the added filter that updated the value + of the MAXLEN field */ + uint32_t maxlen; /**< Longest linked list of filters. */ + uint64_t add; /**< Number of added filters. */ + uint64_t remove; /**< Number of removed filters. */ + uint64_t f_add; /**< Number of failed added filters. */ + uint64_t f_remove; /**< Number of failed removed filters. */ + uint32_t guarant_cnt; /**< Number of filters in guaranteed spaces. */ + uint32_t best_cnt; /**< Number of filters in best effort spaces. */ +}; + +/** + * Flow Director filter information types. + */ +enum rte_eth_fdir_filter_info_type { + RTE_ETH_FDIR_FILTER_INFO_TYPE_UNKNOWN = 0, + /** Flow Director filter input set configuration */ + RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT, + RTE_ETH_FDIR_FILTER_INFO_TYPE_MAX, +}; + +/** + * A structure used to set FDIR filter information, to support filter type + * of 'RTE_ETH_FILTER_FDIR' RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT operation. + */ +struct rte_eth_fdir_filter_info { + enum rte_eth_fdir_filter_info_type info_type; /**< Information type */ + /** Details of fdir filter information */ + union { + /** Flow Director input set configuration per port */ + struct rte_eth_input_set_conf input_set_conf; + } info; +}; + +/** + * Hash filter information types. + * - RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT is for getting/setting the + * information/configuration of 'symmetric hash enable' per port. + * - RTE_ETH_HASH_FILTER_GLOBAL_CONFIG is for getting/setting the global + * configurations of hash filters. Those global configurations are valid + * for all ports of the same NIC. + * - RTE_ETH_HASH_FILTER_INPUT_SET_SELECT is for setting the global + * hash input set fields + */ +enum rte_eth_hash_filter_info_type { + RTE_ETH_HASH_FILTER_INFO_TYPE_UNKNOWN = 0, + /** Symmetric hash enable per port */ + RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT, + /** Configure globally for hash filter */ + RTE_ETH_HASH_FILTER_GLOBAL_CONFIG, + /** Global Hash filter input set configuration */ + RTE_ETH_HASH_FILTER_INPUT_SET_SELECT, + RTE_ETH_HASH_FILTER_INFO_TYPE_MAX, +}; + +/** + * Hash function types. + */ +enum rte_eth_hash_function { + RTE_ETH_HASH_FUNCTION_DEFAULT = 0, + RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */ + RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */ + RTE_ETH_HASH_FUNCTION_MAX, +}; + +#define RTE_SYM_HASH_MASK_ARRAY_SIZE \ + (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT64_BIT)/UINT64_BIT) +/** + * A structure used to set or get global hash function configurations which + * include symmetric hash enable per flow type and hash function type. + * Each bit in sym_hash_enable_mask[] indicates if the symmetric hash of the + * corresponding flow type is enabled or not. + * Each bit in valid_bit_mask[] indicates if the corresponding bit in + * sym_hash_enable_mask[] is valid or not. For the configurations gotten, it + * also means if the flow type is supported by hardware or not. + */ +struct rte_eth_hash_global_conf { + enum rte_eth_hash_function hash_func; /**< Hash function type */ + /** Bit mask for symmetric hash enable per flow type */ + uint64_t sym_hash_enable_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; + /** Bit mask indicates if the corresponding bit is valid */ + uint64_t valid_bit_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; +}; + +/** + * A structure used to set or get hash filter information, to support filter + * type of 'RTE_ETH_FILTER_HASH' and its operations. + */ +struct rte_eth_hash_filter_info { + enum rte_eth_hash_filter_info_type info_type; /**< Information type */ + /** Details of hash filter information */ + union { + /** For RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT */ + uint8_t enable; + /** Global configurations of hash filter */ + struct rte_eth_hash_global_conf global_conf; + /** Global configurations of hash filter input set */ + struct rte_eth_input_set_conf input_set_conf; + } info; +}; + +/** + * l2 tunnel configuration. + */ +struct rte_eth_l2_tunnel_conf { + enum rte_eth_tunnel_type l2_tunnel_type; + uint16_t ether_type; /* ether type in l2 header */ + uint32_t tunnel_id; /* port tag id for e-tag */ + uint16_t vf_id; /* VF id for tag insertion */ + uint32_t pool; /* destination pool for tag based forwarding */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETH_CTRL_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c new file mode 100644 index 00000000..4c320250 --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev.c @@ -0,0 +1,4425 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_ether.h" +#include "rte_ethdev.h" +#include "rte_ethdev_driver.h" +#include "ethdev_profile.h" + +int rte_eth_dev_logtype; + +static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; +struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; +static uint16_t eth_dev_last_created_port; + +/* spinlock for eth device callbacks */ +static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER; + +/* spinlock for add/remove rx callbacks */ +static rte_spinlock_t rte_eth_rx_cb_lock = RTE_SPINLOCK_INITIALIZER; + +/* spinlock for add/remove tx callbacks */ +static rte_spinlock_t rte_eth_tx_cb_lock = RTE_SPINLOCK_INITIALIZER; + +/* spinlock for shared data allocation */ +static rte_spinlock_t rte_eth_shared_data_lock = RTE_SPINLOCK_INITIALIZER; + +/* store statistics names and its offset in stats structure */ +struct rte_eth_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned offset; +}; + +/* Shared memory between primary and secondary processes. */ +static struct { + uint64_t next_owner_id; + rte_spinlock_t ownership_lock; + struct rte_eth_dev_data data[RTE_MAX_ETHPORTS]; +} *rte_eth_dev_shared_data; + +static const struct rte_eth_xstats_name_off rte_stats_strings[] = { + {"rx_good_packets", offsetof(struct rte_eth_stats, ipackets)}, + {"tx_good_packets", offsetof(struct rte_eth_stats, opackets)}, + {"rx_good_bytes", offsetof(struct rte_eth_stats, ibytes)}, + {"tx_good_bytes", offsetof(struct rte_eth_stats, obytes)}, + {"rx_missed_errors", offsetof(struct rte_eth_stats, imissed)}, + {"rx_errors", offsetof(struct rte_eth_stats, ierrors)}, + {"tx_errors", offsetof(struct rte_eth_stats, oerrors)}, + {"rx_mbuf_allocation_errors", offsetof(struct rte_eth_stats, + rx_nombuf)}, +}; + +#define RTE_NB_STATS (sizeof(rte_stats_strings) / sizeof(rte_stats_strings[0])) + +static const struct rte_eth_xstats_name_off rte_rxq_stats_strings[] = { + {"packets", offsetof(struct rte_eth_stats, q_ipackets)}, + {"bytes", offsetof(struct rte_eth_stats, q_ibytes)}, + {"errors", offsetof(struct rte_eth_stats, q_errors)}, +}; + +#define RTE_NB_RXQ_STATS (sizeof(rte_rxq_stats_strings) / \ + sizeof(rte_rxq_stats_strings[0])) + +static const struct rte_eth_xstats_name_off rte_txq_stats_strings[] = { + {"packets", offsetof(struct rte_eth_stats, q_opackets)}, + {"bytes", offsetof(struct rte_eth_stats, q_obytes)}, +}; +#define RTE_NB_TXQ_STATS (sizeof(rte_txq_stats_strings) / \ + sizeof(rte_txq_stats_strings[0])) + +#define RTE_RX_OFFLOAD_BIT2STR(_name) \ + { DEV_RX_OFFLOAD_##_name, #_name } + +static const struct { + uint64_t offload; + const char *name; +} rte_rx_offload_names[] = { + RTE_RX_OFFLOAD_BIT2STR(VLAN_STRIP), + RTE_RX_OFFLOAD_BIT2STR(IPV4_CKSUM), + RTE_RX_OFFLOAD_BIT2STR(UDP_CKSUM), + RTE_RX_OFFLOAD_BIT2STR(TCP_CKSUM), + RTE_RX_OFFLOAD_BIT2STR(TCP_LRO), + RTE_RX_OFFLOAD_BIT2STR(QINQ_STRIP), + RTE_RX_OFFLOAD_BIT2STR(OUTER_IPV4_CKSUM), + RTE_RX_OFFLOAD_BIT2STR(MACSEC_STRIP), + RTE_RX_OFFLOAD_BIT2STR(HEADER_SPLIT), + RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER), + RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND), + RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME), + RTE_RX_OFFLOAD_BIT2STR(CRC_STRIP), + RTE_RX_OFFLOAD_BIT2STR(SCATTER), + RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP), + RTE_RX_OFFLOAD_BIT2STR(SECURITY), + RTE_RX_OFFLOAD_BIT2STR(KEEP_CRC), +}; + +#undef RTE_RX_OFFLOAD_BIT2STR + +#define RTE_TX_OFFLOAD_BIT2STR(_name) \ + { DEV_TX_OFFLOAD_##_name, #_name } + +static const struct { + uint64_t offload; + const char *name; +} rte_tx_offload_names[] = { + RTE_TX_OFFLOAD_BIT2STR(VLAN_INSERT), + RTE_TX_OFFLOAD_BIT2STR(IPV4_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(UDP_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(TCP_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(SCTP_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(TCP_TSO), + RTE_TX_OFFLOAD_BIT2STR(UDP_TSO), + RTE_TX_OFFLOAD_BIT2STR(OUTER_IPV4_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(QINQ_INSERT), + RTE_TX_OFFLOAD_BIT2STR(VXLAN_TNL_TSO), + RTE_TX_OFFLOAD_BIT2STR(GRE_TNL_TSO), + RTE_TX_OFFLOAD_BIT2STR(IPIP_TNL_TSO), + RTE_TX_OFFLOAD_BIT2STR(GENEVE_TNL_TSO), + RTE_TX_OFFLOAD_BIT2STR(MACSEC_INSERT), + RTE_TX_OFFLOAD_BIT2STR(MT_LOCKFREE), + RTE_TX_OFFLOAD_BIT2STR(MULTI_SEGS), + RTE_TX_OFFLOAD_BIT2STR(MBUF_FAST_FREE), + RTE_TX_OFFLOAD_BIT2STR(SECURITY), +}; + +#undef RTE_TX_OFFLOAD_BIT2STR + +/** + * The user application callback description. + * + * It contains callback address to be registered by user application, + * the pointer to the parameters for callback, and the event type. + */ +struct rte_eth_dev_callback { + TAILQ_ENTRY(rte_eth_dev_callback) next; /**< Callbacks list */ + rte_eth_dev_cb_fn cb_fn; /**< Callback address */ + void *cb_arg; /**< Parameter for callback */ + void *ret_param; /**< Return parameter */ + enum rte_eth_event_type event; /**< Interrupt event type */ + uint32_t active; /**< Callback is executing */ +}; + +enum { + STAT_QMAP_TX = 0, + STAT_QMAP_RX +}; + +uint16_t +rte_eth_find_next(uint16_t port_id) +{ + while (port_id < RTE_MAX_ETHPORTS && + rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED && + rte_eth_devices[port_id].state != RTE_ETH_DEV_REMOVED) + port_id++; + + if (port_id >= RTE_MAX_ETHPORTS) + return RTE_MAX_ETHPORTS; + + return port_id; +} + +static void +rte_eth_dev_shared_data_prepare(void) +{ + const unsigned flags = 0; + const struct rte_memzone *mz; + + rte_spinlock_lock(&rte_eth_shared_data_lock); + + if (rte_eth_dev_shared_data == NULL) { + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* Allocate port data and ownership shared memory. */ + mz = rte_memzone_reserve(MZ_RTE_ETH_DEV_DATA, + sizeof(*rte_eth_dev_shared_data), + rte_socket_id(), flags); + } else + mz = rte_memzone_lookup(MZ_RTE_ETH_DEV_DATA); + if (mz == NULL) + rte_panic("Cannot allocate ethdev shared data\n"); + + rte_eth_dev_shared_data = mz->addr; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_eth_dev_shared_data->next_owner_id = + RTE_ETH_DEV_NO_OWNER + 1; + rte_spinlock_init(&rte_eth_dev_shared_data->ownership_lock); + memset(rte_eth_dev_shared_data->data, 0, + sizeof(rte_eth_dev_shared_data->data)); + } + } + + rte_spinlock_unlock(&rte_eth_shared_data_lock); +} + +static bool +is_allocated(const struct rte_eth_dev *ethdev) +{ + return ethdev->data->name[0] != '\0'; +} + +static struct rte_eth_dev * +_rte_eth_dev_allocated(const char *name) +{ + unsigned i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (rte_eth_devices[i].data != NULL && + strcmp(rte_eth_devices[i].data->name, name) == 0) + return &rte_eth_devices[i]; + } + return NULL; +} + +struct rte_eth_dev * +rte_eth_dev_allocated(const char *name) +{ + struct rte_eth_dev *ethdev; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + ethdev = _rte_eth_dev_allocated(name); + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + + return ethdev; +} + +static uint16_t +rte_eth_dev_find_free_port(void) +{ + unsigned i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + /* Using shared name field to find a free port. */ + if (rte_eth_dev_shared_data->data[i].name[0] == '\0') { + RTE_ASSERT(rte_eth_devices[i].state == + RTE_ETH_DEV_UNUSED); + return i; + } + } + return RTE_MAX_ETHPORTS; +} + +static struct rte_eth_dev * +eth_dev_get(uint16_t port_id) +{ + struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id]; + + eth_dev->data = &rte_eth_dev_shared_data->data[port_id]; + + eth_dev_last_created_port = port_id; + + return eth_dev; +} + +struct rte_eth_dev * +rte_eth_dev_allocate(const char *name) +{ + uint16_t port_id; + struct rte_eth_dev *eth_dev = NULL; + + rte_eth_dev_shared_data_prepare(); + + /* Synchronize port creation between primary and secondary threads. */ + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + if (_rte_eth_dev_allocated(name) != NULL) { + RTE_ETHDEV_LOG(ERR, + "Ethernet device with name %s already allocated\n", + name); + goto unlock; + } + + port_id = rte_eth_dev_find_free_port(); + if (port_id == RTE_MAX_ETHPORTS) { + RTE_ETHDEV_LOG(ERR, + "Reached maximum number of Ethernet ports\n"); + goto unlock; + } + + eth_dev = eth_dev_get(port_id); + snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name); + eth_dev->data->port_id = port_id; + eth_dev->data->mtu = ETHER_MTU; + +unlock: + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + + return eth_dev; +} + +/* + * Attach to a port already registered by the primary process, which + * makes sure that the same device would have the same port id both + * in the primary and secondary process. + */ +struct rte_eth_dev * +rte_eth_dev_attach_secondary(const char *name) +{ + uint16_t i; + struct rte_eth_dev *eth_dev = NULL; + + rte_eth_dev_shared_data_prepare(); + + /* Synchronize port attachment to primary port creation and release. */ + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (strcmp(rte_eth_dev_shared_data->data[i].name, name) == 0) + break; + } + if (i == RTE_MAX_ETHPORTS) { + RTE_ETHDEV_LOG(ERR, + "Device %s is not driven by the primary process\n", + name); + } else { + eth_dev = eth_dev_get(i); + RTE_ASSERT(eth_dev->data->port_id == i); + } + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return eth_dev; +} + +int +rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) +{ + if (eth_dev == NULL) + return -EINVAL; + + rte_eth_dev_shared_data_prepare(); + + _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + eth_dev->state = RTE_ETH_DEV_UNUSED; + + memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data)); + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + + return 0; +} + +int +rte_eth_dev_is_valid_port(uint16_t port_id) +{ + if (port_id >= RTE_MAX_ETHPORTS || + (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)) + return 0; + else + return 1; +} + +static int +rte_eth_is_valid_owner_id(uint64_t owner_id) +{ + if (owner_id == RTE_ETH_DEV_NO_OWNER || + rte_eth_dev_shared_data->next_owner_id <= owner_id) { + RTE_ETHDEV_LOG(ERR, "Invalid owner_id=%016"PRIx64"\n", + owner_id); + return 0; + } + return 1; +} + +uint64_t +rte_eth_find_next_owned_by(uint16_t port_id, const uint64_t owner_id) +{ + while (port_id < RTE_MAX_ETHPORTS && + ((rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED && + rte_eth_devices[port_id].state != RTE_ETH_DEV_REMOVED) || + rte_eth_devices[port_id].data->owner.id != owner_id)) + port_id++; + + if (port_id >= RTE_MAX_ETHPORTS) + return RTE_MAX_ETHPORTS; + + return port_id; +} + +int __rte_experimental +rte_eth_dev_owner_new(uint64_t *owner_id) +{ + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + *owner_id = rte_eth_dev_shared_data->next_owner_id++; + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return 0; +} + +static int +_rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id, + const struct rte_eth_dev_owner *new_owner) +{ + struct rte_eth_dev *ethdev = &rte_eth_devices[port_id]; + struct rte_eth_dev_owner *port_owner; + int sret; + + if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) { + RTE_ETHDEV_LOG(ERR, "Port id %"PRIu16" is not allocated\n", + port_id); + return -ENODEV; + } + + if (!rte_eth_is_valid_owner_id(new_owner->id) && + !rte_eth_is_valid_owner_id(old_owner_id)) + return -EINVAL; + + port_owner = &rte_eth_devices[port_id].data->owner; + if (port_owner->id != old_owner_id) { + RTE_ETHDEV_LOG(ERR, + "Cannot set owner to port %u already owned by %s_%016"PRIX64"\n", + port_id, port_owner->name, port_owner->id); + return -EPERM; + } + + sret = snprintf(port_owner->name, RTE_ETH_MAX_OWNER_NAME_LEN, "%s", + new_owner->name); + if (sret < 0 || sret >= RTE_ETH_MAX_OWNER_NAME_LEN) + RTE_ETHDEV_LOG(ERR, "Port %u owner name was truncated\n", + port_id); + + port_owner->id = new_owner->id; + + RTE_ETHDEV_LOG(DEBUG, "Port %u owner is %s_%016"PRIx64"\n", + port_id, new_owner->name, new_owner->id); + + return 0; +} + +int __rte_experimental +rte_eth_dev_owner_set(const uint16_t port_id, + const struct rte_eth_dev_owner *owner) +{ + int ret; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + ret = _rte_eth_dev_owner_set(port_id, RTE_ETH_DEV_NO_OWNER, owner); + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return ret; +} + +int __rte_experimental +rte_eth_dev_owner_unset(const uint16_t port_id, const uint64_t owner_id) +{ + const struct rte_eth_dev_owner new_owner = (struct rte_eth_dev_owner) + {.id = RTE_ETH_DEV_NO_OWNER, .name = ""}; + int ret; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + ret = _rte_eth_dev_owner_set(port_id, owner_id, &new_owner); + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return ret; +} + +void __rte_experimental +rte_eth_dev_owner_delete(const uint64_t owner_id) +{ + uint16_t port_id; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + if (rte_eth_is_valid_owner_id(owner_id)) { + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) + if (rte_eth_devices[port_id].data->owner.id == owner_id) + memset(&rte_eth_devices[port_id].data->owner, 0, + sizeof(struct rte_eth_dev_owner)); + RTE_ETHDEV_LOG(ERR, + "All port owners owned by %016"PRIx64" identifier have removed\n", + owner_id); + } + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); +} + +int __rte_experimental +rte_eth_dev_owner_get(const uint16_t port_id, struct rte_eth_dev_owner *owner) +{ + int ret = 0; + struct rte_eth_dev *ethdev = &rte_eth_devices[port_id]; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) { + RTE_ETHDEV_LOG(ERR, "Port id %"PRIu16" is not allocated\n", + port_id); + ret = -ENODEV; + } else { + rte_memcpy(owner, ðdev->data->owner, sizeof(*owner)); + } + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return ret; +} + +int +rte_eth_dev_socket_id(uint16_t port_id) +{ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1); + return rte_eth_devices[port_id].data->numa_node; +} + +void * +rte_eth_dev_get_sec_ctx(uint16_t port_id) +{ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); + return rte_eth_devices[port_id].security_ctx; +} + +uint16_t +rte_eth_dev_count(void) +{ + return rte_eth_dev_count_avail(); +} + +uint16_t +rte_eth_dev_count_avail(void) +{ + uint16_t p; + uint16_t count; + + count = 0; + + RTE_ETH_FOREACH_DEV(p) + count++; + + return count; +} + +uint16_t __rte_experimental +rte_eth_dev_count_total(void) +{ + uint16_t port, count = 0; + + for (port = 0; port < RTE_MAX_ETHPORTS; port++) + if (rte_eth_devices[port].state != RTE_ETH_DEV_UNUSED) + count++; + + return count; +} + +int +rte_eth_dev_get_name_by_port(uint16_t port_id, char *name) +{ + char *tmp; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (name == NULL) { + RTE_ETHDEV_LOG(ERR, "Null pointer is specified\n"); + return -EINVAL; + } + + /* shouldn't check 'rte_eth_devices[i].data', + * because it might be overwritten by VDEV PMD */ + tmp = rte_eth_dev_shared_data->data[port_id].name; + strcpy(name, tmp); + return 0; +} + +int +rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id) +{ + uint32_t pid; + + if (name == NULL) { + RTE_ETHDEV_LOG(ERR, "Null pointer is specified\n"); + return -EINVAL; + } + + for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) { + if (rte_eth_devices[pid].state != RTE_ETH_DEV_UNUSED && + !strcmp(name, rte_eth_dev_shared_data->data[pid].name)) { + *port_id = pid; + return 0; + } + } + + return -ENODEV; +} + +static int +eth_err(uint16_t port_id, int ret) +{ + if (ret == 0) + return 0; + if (rte_eth_dev_is_removed(port_id)) + return -EIO; + return ret; +} + +/* attach the new device, then store port_id of the device */ +int +rte_eth_dev_attach(const char *devargs, uint16_t *port_id) +{ + int current = rte_eth_dev_count_total(); + struct rte_devargs da; + int ret = -1; + + memset(&da, 0, sizeof(da)); + + if ((devargs == NULL) || (port_id == NULL)) { + ret = -EINVAL; + goto err; + } + + /* parse devargs */ + if (rte_devargs_parse(&da, devargs)) + goto err; + + ret = rte_eal_hotplug_add(da.bus->name, da.name, da.args); + if (ret < 0) + goto err; + + /* no point looking at the port count if no port exists */ + if (!rte_eth_dev_count_total()) { + RTE_ETHDEV_LOG(ERR, "No port found for device (%s)\n", da.name); + ret = -1; + goto err; + } + + /* if nothing happened, there is a bug here, since some driver told us + * it did attach a device, but did not create a port. + * FIXME: race condition in case of plug-out of another device + */ + if (current == rte_eth_dev_count_total()) { + ret = -1; + goto err; + } + + *port_id = eth_dev_last_created_port; + ret = 0; + +err: + free(da.args); + return ret; +} + +/* detach the device, then store the name of the device */ +int +rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused) +{ + struct rte_device *dev; + struct rte_bus *bus; + uint32_t dev_flags; + int ret = -1; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev_flags = rte_eth_devices[port_id].data->dev_flags; + if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) { + RTE_ETHDEV_LOG(ERR, + "Port %"PRIu16" is bonded, cannot detach\n", port_id); + return -ENOTSUP; + } + + dev = rte_eth_devices[port_id].device; + if (dev == NULL) + return -EINVAL; + + bus = rte_bus_find_by_device(dev); + if (bus == NULL) + return -ENOENT; + + ret = rte_eal_hotplug_remove(bus->name, dev->name); + if (ret < 0) + return ret; + + rte_eth_dev_release_port(&rte_eth_devices[port_id]); + return 0; +} + +static int +rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + uint16_t old_nb_queues = dev->data->nb_rx_queues; + void **rxq; + unsigned i; + + if (dev->data->rx_queues == NULL && nb_queues != 0) { /* first time configuration */ + dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues", + sizeof(dev->data->rx_queues[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (dev->data->rx_queues == NULL) { + dev->data->nb_rx_queues = 0; + return -(ENOMEM); + } + } else if (dev->data->rx_queues != NULL && nb_queues != 0) { /* re-configure */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP); + + rxq = dev->data->rx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->rx_queue_release)(rxq[i]); + rxq = rte_realloc(rxq, sizeof(rxq[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (rxq == NULL) + return -(ENOMEM); + if (nb_queues > old_nb_queues) { + uint16_t new_qs = nb_queues - old_nb_queues; + + memset(rxq + old_nb_queues, 0, + sizeof(rxq[0]) * new_qs); + } + + dev->data->rx_queues = rxq; + + } else if (dev->data->rx_queues != NULL && nb_queues == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP); + + rxq = dev->data->rx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->rx_queue_release)(rxq[i]); + + rte_free(dev->data->rx_queues); + dev->data->rx_queues = NULL; + } + dev->data->nb_rx_queues = nb_queues; + return 0; +} + +int +rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (!dev->data->dev_started) { + RTE_ETHDEV_LOG(ERR, + "Port %u must be started before start any queue\n", + port_id); + return -EINVAL; + } + + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP); + + if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_ETHDEV_LOG(INFO, + "Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n", + rx_queue_id, port_id); + return 0; + } + + return eth_err(port_id, dev->dev_ops->rx_queue_start(dev, + rx_queue_id)); + +} + +int +rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP); + + if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_ETHDEV_LOG(INFO, + "Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n", + rx_queue_id, port_id); + return 0; + } + + return eth_err(port_id, dev->dev_ops->rx_queue_stop(dev, rx_queue_id)); + +} + +int +rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (!dev->data->dev_started) { + RTE_ETHDEV_LOG(ERR, + "Port %u must be started before start any queue\n", + port_id); + return -EINVAL; + } + + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP); + + if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_ETHDEV_LOG(INFO, + "Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n", + tx_queue_id, port_id); + return 0; + } + + return eth_err(port_id, dev->dev_ops->tx_queue_start(dev, tx_queue_id)); +} + +int +rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP); + + if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_ETHDEV_LOG(INFO, + "Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n", + tx_queue_id, port_id); + return 0; + } + + return eth_err(port_id, dev->dev_ops->tx_queue_stop(dev, tx_queue_id)); + +} + +static int +rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + uint16_t old_nb_queues = dev->data->nb_tx_queues; + void **txq; + unsigned i; + + if (dev->data->tx_queues == NULL && nb_queues != 0) { /* first time configuration */ + dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues", + sizeof(dev->data->tx_queues[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (dev->data->tx_queues == NULL) { + dev->data->nb_tx_queues = 0; + return -(ENOMEM); + } + } else if (dev->data->tx_queues != NULL && nb_queues != 0) { /* re-configure */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP); + + txq = dev->data->tx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->tx_queue_release)(txq[i]); + txq = rte_realloc(txq, sizeof(txq[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (txq == NULL) + return -ENOMEM; + if (nb_queues > old_nb_queues) { + uint16_t new_qs = nb_queues - old_nb_queues; + + memset(txq + old_nb_queues, 0, + sizeof(txq[0]) * new_qs); + } + + dev->data->tx_queues = txq; + + } else if (dev->data->tx_queues != NULL && nb_queues == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP); + + txq = dev->data->tx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->tx_queue_release)(txq[i]); + + rte_free(dev->data->tx_queues); + dev->data->tx_queues = NULL; + } + dev->data->nb_tx_queues = nb_queues; + return 0; +} + +uint32_t +rte_eth_speed_bitflag(uint32_t speed, int duplex) +{ + switch (speed) { + case ETH_SPEED_NUM_10M: + return duplex ? ETH_LINK_SPEED_10M : ETH_LINK_SPEED_10M_HD; + case ETH_SPEED_NUM_100M: + return duplex ? ETH_LINK_SPEED_100M : ETH_LINK_SPEED_100M_HD; + case ETH_SPEED_NUM_1G: + return ETH_LINK_SPEED_1G; + case ETH_SPEED_NUM_2_5G: + return ETH_LINK_SPEED_2_5G; + case ETH_SPEED_NUM_5G: + return ETH_LINK_SPEED_5G; + case ETH_SPEED_NUM_10G: + return ETH_LINK_SPEED_10G; + case ETH_SPEED_NUM_20G: + return ETH_LINK_SPEED_20G; + case ETH_SPEED_NUM_25G: + return ETH_LINK_SPEED_25G; + case ETH_SPEED_NUM_40G: + return ETH_LINK_SPEED_40G; + case ETH_SPEED_NUM_50G: + return ETH_LINK_SPEED_50G; + case ETH_SPEED_NUM_56G: + return ETH_LINK_SPEED_56G; + case ETH_SPEED_NUM_100G: + return ETH_LINK_SPEED_100G; + default: + return 0; + } +} + +const char * __rte_experimental +rte_eth_dev_rx_offload_name(uint64_t offload) +{ + const char *name = "UNKNOWN"; + unsigned int i; + + for (i = 0; i < RTE_DIM(rte_rx_offload_names); ++i) { + if (offload == rte_rx_offload_names[i].offload) { + name = rte_rx_offload_names[i].name; + break; + } + } + + return name; +} + +const char * __rte_experimental +rte_eth_dev_tx_offload_name(uint64_t offload) +{ + const char *name = "UNKNOWN"; + unsigned int i; + + for (i = 0; i < RTE_DIM(rte_tx_offload_names); ++i) { + if (offload == rte_tx_offload_names[i].offload) { + name = rte_tx_offload_names[i].name; + break; + } + } + + return name; +} + +int +rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, + const struct rte_eth_conf *dev_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_conf local_conf = *dev_conf; + int diag; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP); + + rte_eth_dev_info_get(port_id, &dev_info); + + /* If number of queues specified by application for both Rx and Tx is + * zero, use driver preferred values. This cannot be done individually + * as it is valid for either Tx or Rx (but not both) to be zero. + * If driver does not provide any preferred valued, fall back on + * EAL defaults. + */ + if (nb_rx_q == 0 && nb_tx_q == 0) { + nb_rx_q = dev_info.default_rxportconf.nb_queues; + if (nb_rx_q == 0) + nb_rx_q = RTE_ETH_DEV_FALLBACK_RX_NBQUEUES; + nb_tx_q = dev_info.default_txportconf.nb_queues; + if (nb_tx_q == 0) + nb_tx_q = RTE_ETH_DEV_FALLBACK_TX_NBQUEUES; + } + + if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) { + RTE_ETHDEV_LOG(ERR, + "Number of RX queues requested (%u) is greater than max supported(%d)\n", + nb_rx_q, RTE_MAX_QUEUES_PER_PORT); + return -EINVAL; + } + + if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) { + RTE_ETHDEV_LOG(ERR, + "Number of TX queues requested (%u) is greater than max supported(%d)\n", + nb_tx_q, RTE_MAX_QUEUES_PER_PORT); + return -EINVAL; + } + + if (dev->data->dev_started) { + RTE_ETHDEV_LOG(ERR, + "Port %u must be stopped to allow configuration\n", + port_id); + return -EBUSY; + } + + /* Copy the dev_conf parameter into the dev structure */ + memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf)); + + /* + * Check that the numbers of RX and TX queues are not greater + * than the maximum number of RX and TX queues supported by the + * configured device. + */ + if (nb_rx_q > dev_info.max_rx_queues) { + RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_rx_queues=%u > %u\n", + port_id, nb_rx_q, dev_info.max_rx_queues); + return -EINVAL; + } + + if (nb_tx_q > dev_info.max_tx_queues) { + RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_tx_queues=%u > %u\n", + port_id, nb_tx_q, dev_info.max_tx_queues); + return -EINVAL; + } + + /* Check that the device supports requested interrupts */ + if ((dev_conf->intr_conf.lsc == 1) && + (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) { + RTE_ETHDEV_LOG(ERR, "Driver %s does not support lsc\n", + dev->device->driver->name); + return -EINVAL; + } + if ((dev_conf->intr_conf.rmv == 1) && + (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) { + RTE_ETHDEV_LOG(ERR, "Driver %s does not support rmv\n", + dev->device->driver->name); + return -EINVAL; + } + + /* + * If jumbo frames are enabled, check that the maximum RX packet + * length is supported by the configured device. + */ + if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { + if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) { + RTE_ETHDEV_LOG(ERR, + "Ethdev port_id=%u max_rx_pkt_len %u > max valid value %u\n", + port_id, dev_conf->rxmode.max_rx_pkt_len, + dev_info.max_rx_pktlen); + return -EINVAL; + } else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) { + RTE_ETHDEV_LOG(ERR, + "Ethdev port_id=%u max_rx_pkt_len %u < min valid value %u\n", + port_id, dev_conf->rxmode.max_rx_pkt_len, + (unsigned)ETHER_MIN_LEN); + return -EINVAL; + } + } else { + if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN || + dev_conf->rxmode.max_rx_pkt_len > ETHER_MAX_LEN) + /* Use default value */ + dev->data->dev_conf.rxmode.max_rx_pkt_len = + ETHER_MAX_LEN; + } + + /* Any requested offloading must be within its device capabilities */ + if ((local_conf.rxmode.offloads & dev_info.rx_offload_capa) != + local_conf.rxmode.offloads) { + RTE_ETHDEV_LOG(ERR, + "Ethdev port_id=%u requested Rx offloads 0x%"PRIx64" doesn't match Rx offloads " + "capabilities 0x%"PRIx64" in %s()\n", + port_id, local_conf.rxmode.offloads, + dev_info.rx_offload_capa, + __func__); + return -EINVAL; + } + if ((local_conf.txmode.offloads & dev_info.tx_offload_capa) != + local_conf.txmode.offloads) { + RTE_ETHDEV_LOG(ERR, + "Ethdev port_id=%u requested Tx offloads 0x%"PRIx64" doesn't match Tx offloads " + "capabilities 0x%"PRIx64" in %s()\n", + port_id, local_conf.txmode.offloads, + dev_info.tx_offload_capa, + __func__); + return -EINVAL; + } + + if ((local_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP) && + (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)) { + RTE_ETHDEV_LOG(ERR, + "Port id=%u not allowed to set both CRC STRIP and KEEP CRC offload flags\n", + port_id); + return -EINVAL; + } + + /* Check that device supports requested rss hash functions. */ + if ((dev_info.flow_type_rss_offloads | + dev_conf->rx_adv_conf.rss_conf.rss_hf) != + dev_info.flow_type_rss_offloads) { + RTE_ETHDEV_LOG(ERR, + "Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n", + port_id, dev_conf->rx_adv_conf.rss_conf.rss_hf, + dev_info.flow_type_rss_offloads); + return -EINVAL; + } + + /* + * Setup new number of RX/TX queues and reconfigure device. + */ + diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q); + if (diag != 0) { + RTE_ETHDEV_LOG(ERR, + "Port%u rte_eth_dev_rx_queue_config = %d\n", + port_id, diag); + return diag; + } + + diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q); + if (diag != 0) { + RTE_ETHDEV_LOG(ERR, + "Port%u rte_eth_dev_tx_queue_config = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + return diag; + } + + diag = (*dev->dev_ops->dev_configure)(dev); + if (diag != 0) { + RTE_ETHDEV_LOG(ERR, "Port%u dev_configure = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + return eth_err(port_id, diag); + } + + /* Initialize Rx profiling if enabled at compilation time. */ + diag = __rte_eth_profile_rx_init(port_id, dev); + if (diag != 0) { + RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + return eth_err(port_id, diag); + } + + return 0; +} + +void +_rte_eth_dev_reset(struct rte_eth_dev *dev) +{ + if (dev->data->dev_started) { + RTE_ETHDEV_LOG(ERR, "Port %u must be stopped to allow reset\n", + dev->data->port_id); + return; + } + + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + + memset(&dev->data->dev_conf, 0, sizeof(dev->data->dev_conf)); +} + +static void +rte_eth_dev_config_restore(uint16_t port_id) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct ether_addr *addr; + uint16_t i; + uint32_t pool = 0; + uint64_t pool_mask; + + dev = &rte_eth_devices[port_id]; + + rte_eth_dev_info_get(port_id, &dev_info); + + /* replay MAC address configuration including default MAC */ + addr = &dev->data->mac_addrs[0]; + if (*dev->dev_ops->mac_addr_set != NULL) + (*dev->dev_ops->mac_addr_set)(dev, addr); + else if (*dev->dev_ops->mac_addr_add != NULL) + (*dev->dev_ops->mac_addr_add)(dev, addr, 0, pool); + + if (*dev->dev_ops->mac_addr_add != NULL) { + for (i = 1; i < dev_info.max_mac_addrs; i++) { + addr = &dev->data->mac_addrs[i]; + + /* skip zero address */ + if (is_zero_ether_addr(addr)) + continue; + + pool = 0; + pool_mask = dev->data->mac_pool_sel[i]; + + do { + if (pool_mask & 1ULL) + (*dev->dev_ops->mac_addr_add)(dev, + addr, i, pool); + pool_mask >>= 1; + pool++; + } while (pool_mask); + } + } + + /* replay promiscuous configuration */ + if (rte_eth_promiscuous_get(port_id) == 1) + rte_eth_promiscuous_enable(port_id); + else if (rte_eth_promiscuous_get(port_id) == 0) + rte_eth_promiscuous_disable(port_id); + + /* replay all multicast configuration */ + if (rte_eth_allmulticast_get(port_id) == 1) + rte_eth_allmulticast_enable(port_id); + else if (rte_eth_allmulticast_get(port_id) == 0) + rte_eth_allmulticast_disable(port_id); +} + +int +rte_eth_dev_start(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int diag; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP); + + if (dev->data->dev_started != 0) { + RTE_ETHDEV_LOG(INFO, + "Device with port_id=%"PRIu16" already started\n", + port_id); + return 0; + } + + diag = (*dev->dev_ops->dev_start)(dev); + if (diag == 0) + dev->data->dev_started = 1; + else + return eth_err(port_id, diag); + + rte_eth_dev_config_restore(port_id); + + if (dev->data->dev_conf.intr_conf.lsc == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP); + (*dev->dev_ops->link_update)(dev, 0); + } + return 0; +} + +void +rte_eth_dev_stop(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop); + + if (dev->data->dev_started == 0) { + RTE_ETHDEV_LOG(INFO, + "Device with port_id=%"PRIu16" already stopped\n", + port_id); + return; + } + + dev->data->dev_started = 0; + (*dev->dev_ops->dev_stop)(dev); +} + +int +rte_eth_dev_set_link_up(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_up, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->dev_set_link_up)(dev)); +} + +int +rte_eth_dev_set_link_down(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_down, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->dev_set_link_down)(dev)); +} + +void +rte_eth_dev_close(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_close); + dev->data->dev_started = 0; + (*dev->dev_ops->dev_close)(dev); + + dev->data->nb_rx_queues = 0; + rte_free(dev->data->rx_queues); + dev->data->rx_queues = NULL; + dev->data->nb_tx_queues = 0; + rte_free(dev->data->tx_queues); + dev->data->tx_queues = NULL; +} + +int +rte_eth_dev_reset(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_reset, -ENOTSUP); + + rte_eth_dev_stop(port_id); + ret = dev->dev_ops->dev_reset(dev); + + return eth_err(port_id, ret); +} + +int __rte_experimental +rte_eth_dev_is_removed(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + + dev = &rte_eth_devices[port_id]; + + if (dev->state == RTE_ETH_DEV_REMOVED) + return 1; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->is_removed, 0); + + ret = dev->dev_ops->is_removed(dev); + if (ret != 0) + /* Device is physically removed. */ + dev->state = RTE_ETH_DEV_REMOVED; + + return ret; +} + +int +rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + int ret; + uint32_t mbp_buf_size; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_rxconf local_conf; + void **rxq; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP); + + /* + * Check the size of the mbuf data buffer. + * This value must be provided in the private data of the memory pool. + * First check that the memory pool has a valid private data. + */ + rte_eth_dev_info_get(port_id, &dev_info); + if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) { + RTE_ETHDEV_LOG(ERR, "%s private_data_size %d < %d\n", + mp->name, (int)mp->private_data_size, + (int)sizeof(struct rte_pktmbuf_pool_private)); + return -ENOSPC; + } + mbp_buf_size = rte_pktmbuf_data_room_size(mp); + + if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) { + RTE_ETHDEV_LOG(ERR, + "%s mbuf_data_room_size %d < %d (RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)=%d)\n", + mp->name, (int)mbp_buf_size, + (int)(RTE_PKTMBUF_HEADROOM + dev_info.min_rx_bufsize), + (int)RTE_PKTMBUF_HEADROOM, + (int)dev_info.min_rx_bufsize); + return -EINVAL; + } + + /* Use default specified by driver, if nb_rx_desc is zero */ + if (nb_rx_desc == 0) { + nb_rx_desc = dev_info.default_rxportconf.ring_size; + /* If driver default is also zero, fall back on EAL default */ + if (nb_rx_desc == 0) + nb_rx_desc = RTE_ETH_DEV_FALLBACK_RX_RINGSIZE; + } + + if (nb_rx_desc > dev_info.rx_desc_lim.nb_max || + nb_rx_desc < dev_info.rx_desc_lim.nb_min || + nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) { + + RTE_ETHDEV_LOG(ERR, + "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n", + nb_rx_desc, dev_info.rx_desc_lim.nb_max, + dev_info.rx_desc_lim.nb_min, + dev_info.rx_desc_lim.nb_align); + return -EINVAL; + } + + if (dev->data->dev_started && + !(dev_info.dev_capa & + RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP)) + return -EBUSY; + + if (dev->data->dev_started && + (dev->data->rx_queue_state[rx_queue_id] != + RTE_ETH_QUEUE_STATE_STOPPED)) + return -EBUSY; + + rxq = dev->data->rx_queues; + if (rxq[rx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, + -ENOTSUP); + (*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]); + rxq[rx_queue_id] = NULL; + } + + if (rx_conf == NULL) + rx_conf = &dev_info.default_rxconf; + + local_conf = *rx_conf; + + /* + * If an offloading has already been enabled in + * rte_eth_dev_configure(), it has been enabled on all queues, + * so there is no need to enable it in this queue again. + * The local_conf.offloads input to underlying PMD only carries + * those offloadings which are only enabled on this queue and + * not enabled on all queues. + */ + local_conf.offloads &= ~dev->data->dev_conf.rxmode.offloads; + + /* + * New added offloadings for this queue are those not enabled in + * rte_eth_dev_configure() and they must be per-queue type. + * A pure per-port offloading can't be enabled on a queue while + * disabled on another queue. A pure per-port offloading can't + * be enabled for any queue as new added one if it hasn't been + * enabled in rte_eth_dev_configure(). + */ + if ((local_conf.offloads & dev_info.rx_queue_offload_capa) != + local_conf.offloads) { + RTE_ETHDEV_LOG(ERR, + "Ethdev port_id=%d rx_queue_id=%d, new added offloads 0x%"PRIx64" must be " + "within pre-queue offload capabilities 0x%"PRIx64" in %s()\n", + port_id, rx_queue_id, local_conf.offloads, + dev_info.rx_queue_offload_capa, + __func__); + return -EINVAL; + } + + ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc, + socket_id, &local_conf, mp); + if (!ret) { + if (!dev->data->min_rx_buf_size || + dev->data->min_rx_buf_size > mbp_buf_size) + dev->data->min_rx_buf_size = mbp_buf_size; + } + + return eth_err(port_id, ret); +} + +int +rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf local_conf; + void **txq; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP); + + rte_eth_dev_info_get(port_id, &dev_info); + + /* Use default specified by driver, if nb_tx_desc is zero */ + if (nb_tx_desc == 0) { + nb_tx_desc = dev_info.default_txportconf.ring_size; + /* If driver default is zero, fall back on EAL default */ + if (nb_tx_desc == 0) + nb_tx_desc = RTE_ETH_DEV_FALLBACK_TX_RINGSIZE; + } + if (nb_tx_desc > dev_info.tx_desc_lim.nb_max || + nb_tx_desc < dev_info.tx_desc_lim.nb_min || + nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) { + RTE_ETHDEV_LOG(ERR, + "Invalid value for nb_tx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n", + nb_tx_desc, dev_info.tx_desc_lim.nb_max, + dev_info.tx_desc_lim.nb_min, + dev_info.tx_desc_lim.nb_align); + return -EINVAL; + } + + if (dev->data->dev_started && + !(dev_info.dev_capa & + RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP)) + return -EBUSY; + + if (dev->data->dev_started && + (dev->data->tx_queue_state[tx_queue_id] != + RTE_ETH_QUEUE_STATE_STOPPED)) + return -EBUSY; + + txq = dev->data->tx_queues; + if (txq[tx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, + -ENOTSUP); + (*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]); + txq[tx_queue_id] = NULL; + } + + if (tx_conf == NULL) + tx_conf = &dev_info.default_txconf; + + local_conf = *tx_conf; + + /* + * If an offloading has already been enabled in + * rte_eth_dev_configure(), it has been enabled on all queues, + * so there is no need to enable it in this queue again. + * The local_conf.offloads input to underlying PMD only carries + * those offloadings which are only enabled on this queue and + * not enabled on all queues. + */ + local_conf.offloads &= ~dev->data->dev_conf.txmode.offloads; + + /* + * New added offloadings for this queue are those not enabled in + * rte_eth_dev_configure() and they must be per-queue type. + * A pure per-port offloading can't be enabled on a queue while + * disabled on another queue. A pure per-port offloading can't + * be enabled for any queue as new added one if it hasn't been + * enabled in rte_eth_dev_configure(). + */ + if ((local_conf.offloads & dev_info.tx_queue_offload_capa) != + local_conf.offloads) { + RTE_ETHDEV_LOG(ERR, + "Ethdev port_id=%d tx_queue_id=%d, new added offloads 0x%"PRIx64" must be " + "within pre-queue offload capabilities 0x%"PRIx64" in %s()\n", + port_id, tx_queue_id, local_conf.offloads, + dev_info.tx_queue_offload_capa, + __func__); + return -EINVAL; + } + + return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev, + tx_queue_id, nb_tx_desc, socket_id, &local_conf)); +} + +void +rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata __rte_unused) +{ + unsigned i; + + for (i = 0; i < unsent; i++) + rte_pktmbuf_free(pkts[i]); +} + +void +rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata) +{ + uint64_t *count = userdata; + unsigned i; + + for (i = 0; i < unsent; i++) + rte_pktmbuf_free(pkts[i]); + + *count += unsent; +} + +int +rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer, + buffer_tx_error_fn cbfn, void *userdata) +{ + buffer->error_callback = cbfn; + buffer->error_userdata = userdata; + return 0; +} + +int +rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size) +{ + int ret = 0; + + if (buffer == NULL) + return -EINVAL; + + buffer->size = size; + if (buffer->error_callback == NULL) { + ret = rte_eth_tx_buffer_set_err_callback( + buffer, rte_eth_tx_buffer_drop_callback, NULL); + } + + return ret; +} + +int +rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + int ret; + + /* Validate Input Data. Bail if not valid or not supported. */ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_done_cleanup, -ENOTSUP); + + /* Call driver to free pending mbufs. */ + ret = (*dev->dev_ops->tx_done_cleanup)(dev->data->tx_queues[queue_id], + free_cnt); + return eth_err(port_id, ret); +} + +void +rte_eth_promiscuous_enable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_enable); + (*dev->dev_ops->promiscuous_enable)(dev); + dev->data->promiscuous = 1; +} + +void +rte_eth_promiscuous_disable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_disable); + dev->data->promiscuous = 0; + (*dev->dev_ops->promiscuous_disable)(dev); +} + +int +rte_eth_promiscuous_get(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + return dev->data->promiscuous; +} + +void +rte_eth_allmulticast_enable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_enable); + (*dev->dev_ops->allmulticast_enable)(dev); + dev->data->all_multicast = 1; +} + +void +rte_eth_allmulticast_disable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_disable); + dev->data->all_multicast = 0; + (*dev->dev_ops->allmulticast_disable)(dev); +} + +int +rte_eth_allmulticast_get(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + return dev->data->all_multicast; +} + +void +rte_eth_link_get(uint16_t port_id, struct rte_eth_link *eth_link) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.intr_conf.lsc && + dev->data->dev_started) + rte_eth_linkstatus_get(dev, eth_link); + else { + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); + (*dev->dev_ops->link_update)(dev, 1); + *eth_link = dev->data->dev_link; + } +} + +void +rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *eth_link) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.intr_conf.lsc && + dev->data->dev_started) + rte_eth_linkstatus_get(dev, eth_link); + else { + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); + (*dev->dev_ops->link_update)(dev, 0); + *eth_link = dev->data->dev_link; + } +} + +int +rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + memset(stats, 0, sizeof(*stats)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP); + stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; + return eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats)); +} + +int +rte_eth_stats_reset(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_reset, -ENOTSUP); + (*dev->dev_ops->stats_reset)(dev); + dev->data->rx_mbuf_alloc_failed = 0; + + return 0; +} + +static inline int +get_xstats_basic_count(struct rte_eth_dev *dev) +{ + uint16_t nb_rxqs, nb_txqs; + int count; + + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + + count = RTE_NB_STATS; + count += nb_rxqs * RTE_NB_RXQ_STATS; + count += nb_txqs * RTE_NB_TXQ_STATS; + + return count; +} + +static int +get_xstats_count(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int count; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + dev = &rte_eth_devices[port_id]; + if (dev->dev_ops->xstats_get_names_by_id != NULL) { + count = (*dev->dev_ops->xstats_get_names_by_id)(dev, NULL, + NULL, 0); + if (count < 0) + return eth_err(port_id, count); + } + if (dev->dev_ops->xstats_get_names != NULL) { + count = (*dev->dev_ops->xstats_get_names)(dev, NULL, 0); + if (count < 0) + return eth_err(port_id, count); + } else + count = 0; + + + count += get_xstats_basic_count(dev); + + return count; +} + +int +rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name, + uint64_t *id) +{ + int cnt_xstats, idx_xstat; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (!id) { + RTE_ETHDEV_LOG(ERR, "Id pointer is NULL\n"); + return -ENOMEM; + } + + if (!xstat_name) { + RTE_ETHDEV_LOG(ERR, "xstat_name pointer is NULL\n"); + return -ENOMEM; + } + + /* Get count */ + cnt_xstats = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL); + if (cnt_xstats < 0) { + RTE_ETHDEV_LOG(ERR, "Cannot get count of xstats\n"); + return -ENODEV; + } + + /* Get id-name lookup table */ + struct rte_eth_xstat_name xstats_names[cnt_xstats]; + + if (cnt_xstats != rte_eth_xstats_get_names_by_id( + port_id, xstats_names, cnt_xstats, NULL)) { + RTE_ETHDEV_LOG(ERR, "Cannot get xstats lookup\n"); + return -1; + } + + for (idx_xstat = 0; idx_xstat < cnt_xstats; idx_xstat++) { + if (!strcmp(xstats_names[idx_xstat].name, xstat_name)) { + *id = idx_xstat; + return 0; + }; + } + + return -EINVAL; +} + +/* retrieve basic stats names */ +static int +rte_eth_basic_stats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names) +{ + int cnt_used_entries = 0; + uint32_t idx, id_queue; + uint16_t num_q; + + for (idx = 0; idx < RTE_NB_STATS; idx++) { + snprintf(xstats_names[cnt_used_entries].name, + sizeof(xstats_names[0].name), + "%s", rte_stats_strings[idx].name); + cnt_used_entries++; + } + num_q = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { + for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) { + snprintf(xstats_names[cnt_used_entries].name, + sizeof(xstats_names[0].name), + "rx_q%u%s", + id_queue, rte_rxq_stats_strings[idx].name); + cnt_used_entries++; + } + + } + num_q = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { + for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) { + snprintf(xstats_names[cnt_used_entries].name, + sizeof(xstats_names[0].name), + "tx_q%u%s", + id_queue, rte_txq_stats_strings[idx].name); + cnt_used_entries++; + } + } + return cnt_used_entries; +} + +/* retrieve ethdev extended statistics names */ +int +rte_eth_xstats_get_names_by_id(uint16_t port_id, + struct rte_eth_xstat_name *xstats_names, unsigned int size, + uint64_t *ids) +{ + struct rte_eth_xstat_name *xstats_names_copy; + unsigned int no_basic_stat_requested = 1; + unsigned int no_ext_stat_requested = 1; + unsigned int expected_entries; + unsigned int basic_count; + struct rte_eth_dev *dev; + unsigned int i; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + basic_count = get_xstats_basic_count(dev); + ret = get_xstats_count(port_id); + if (ret < 0) + return ret; + expected_entries = (unsigned int)ret; + + /* Return max number of stats if no ids given */ + if (!ids) { + if (!xstats_names) + return expected_entries; + else if (xstats_names && size < expected_entries) + return expected_entries; + } + + if (ids && !xstats_names) + return -EINVAL; + + if (ids && dev->dev_ops->xstats_get_names_by_id != NULL && size > 0) { + uint64_t ids_copy[size]; + + for (i = 0; i < size; i++) { + if (ids[i] < basic_count) { + no_basic_stat_requested = 0; + break; + } + + /* + * Convert ids to xstats ids that PMD knows. + * ids known by user are basic + extended stats. + */ + ids_copy[i] = ids[i] - basic_count; + } + + if (no_basic_stat_requested) + return (*dev->dev_ops->xstats_get_names_by_id)(dev, + xstats_names, ids_copy, size); + } + + /* Retrieve all stats */ + if (!ids) { + int num_stats = rte_eth_xstats_get_names(port_id, xstats_names, + expected_entries); + if (num_stats < 0 || num_stats > (int)expected_entries) + return num_stats; + else + return expected_entries; + } + + xstats_names_copy = calloc(expected_entries, + sizeof(struct rte_eth_xstat_name)); + + if (!xstats_names_copy) { + RTE_ETHDEV_LOG(ERR, "Can't allocate memory\n"); + return -ENOMEM; + } + + if (ids) { + for (i = 0; i < size; i++) { + if (ids[i] >= basic_count) { + no_ext_stat_requested = 0; + break; + } + } + } + + /* Fill xstats_names_copy structure */ + if (ids && no_ext_stat_requested) { + rte_eth_basic_stats_get_names(dev, xstats_names_copy); + } else { + ret = rte_eth_xstats_get_names(port_id, xstats_names_copy, + expected_entries); + if (ret < 0) { + free(xstats_names_copy); + return ret; + } + } + + /* Filter stats */ + for (i = 0; i < size; i++) { + if (ids[i] >= expected_entries) { + RTE_ETHDEV_LOG(ERR, "Id value isn't valid\n"); + free(xstats_names_copy); + return -1; + } + xstats_names[i] = xstats_names_copy[ids[i]]; + } + + free(xstats_names_copy); + return size; +} + +int +rte_eth_xstats_get_names(uint16_t port_id, + struct rte_eth_xstat_name *xstats_names, + unsigned int size) +{ + struct rte_eth_dev *dev; + int cnt_used_entries; + int cnt_expected_entries; + int cnt_driver_entries; + + cnt_expected_entries = get_xstats_count(port_id); + if (xstats_names == NULL || cnt_expected_entries < 0 || + (int)size < cnt_expected_entries) + return cnt_expected_entries; + + /* port_id checked in get_xstats_count() */ + dev = &rte_eth_devices[port_id]; + + cnt_used_entries = rte_eth_basic_stats_get_names( + dev, xstats_names); + + if (dev->dev_ops->xstats_get_names != NULL) { + /* If there are any driver-specific xstats, append them + * to end of list. + */ + cnt_driver_entries = (*dev->dev_ops->xstats_get_names)( + dev, + xstats_names + cnt_used_entries, + size - cnt_used_entries); + if (cnt_driver_entries < 0) + return eth_err(port_id, cnt_driver_entries); + cnt_used_entries += cnt_driver_entries; + } + + return cnt_used_entries; +} + + +static int +rte_eth_basic_stats_get(uint16_t port_id, struct rte_eth_xstat *xstats) +{ + struct rte_eth_dev *dev; + struct rte_eth_stats eth_stats; + unsigned int count = 0, i, q; + uint64_t val, *stats_ptr; + uint16_t nb_rxqs, nb_txqs; + int ret; + + ret = rte_eth_stats_get(port_id, ð_stats); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + + /* global stats */ + for (i = 0; i < RTE_NB_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_stats_strings[i].offset); + val = *stats_ptr; + xstats[count++].value = val; + } + + /* per-rxq stats */ + for (q = 0; q < nb_rxqs; q++) { + for (i = 0; i < RTE_NB_RXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_rxq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + xstats[count++].value = val; + } + } + + /* per-txq stats */ + for (q = 0; q < nb_txqs; q++) { + for (i = 0; i < RTE_NB_TXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_txq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + xstats[count++].value = val; + } + } + return count; +} + +/* retrieve ethdev extended statistics */ +int +rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids, + uint64_t *values, unsigned int size) +{ + unsigned int no_basic_stat_requested = 1; + unsigned int no_ext_stat_requested = 1; + unsigned int num_xstats_filled; + unsigned int basic_count; + uint16_t expected_entries; + struct rte_eth_dev *dev; + unsigned int i; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + ret = get_xstats_count(port_id); + if (ret < 0) + return ret; + expected_entries = (uint16_t)ret; + struct rte_eth_xstat xstats[expected_entries]; + dev = &rte_eth_devices[port_id]; + basic_count = get_xstats_basic_count(dev); + + /* Return max number of stats if no ids given */ + if (!ids) { + if (!values) + return expected_entries; + else if (values && size < expected_entries) + return expected_entries; + } + + if (ids && !values) + return -EINVAL; + + if (ids && dev->dev_ops->xstats_get_by_id != NULL && size) { + unsigned int basic_count = get_xstats_basic_count(dev); + uint64_t ids_copy[size]; + + for (i = 0; i < size; i++) { + if (ids[i] < basic_count) { + no_basic_stat_requested = 0; + break; + } + + /* + * Convert ids to xstats ids that PMD knows. + * ids known by user are basic + extended stats. + */ + ids_copy[i] = ids[i] - basic_count; + } + + if (no_basic_stat_requested) + return (*dev->dev_ops->xstats_get_by_id)(dev, ids_copy, + values, size); + } + + if (ids) { + for (i = 0; i < size; i++) { + if (ids[i] >= basic_count) { + no_ext_stat_requested = 0; + break; + } + } + } + + /* Fill the xstats structure */ + if (ids && no_ext_stat_requested) + ret = rte_eth_basic_stats_get(port_id, xstats); + else + ret = rte_eth_xstats_get(port_id, xstats, expected_entries); + + if (ret < 0) + return ret; + num_xstats_filled = (unsigned int)ret; + + /* Return all stats */ + if (!ids) { + for (i = 0; i < num_xstats_filled; i++) + values[i] = xstats[i].value; + return expected_entries; + } + + /* Filter stats */ + for (i = 0; i < size; i++) { + if (ids[i] >= expected_entries) { + RTE_ETHDEV_LOG(ERR, "Id value isn't valid\n"); + return -1; + } + values[i] = xstats[ids[i]].value; + } + return size; +} + +int +rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, + unsigned int n) +{ + struct rte_eth_dev *dev; + unsigned int count = 0, i; + signed int xcount = 0; + uint16_t nb_rxqs, nb_txqs; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + + /* Return generic statistics */ + count = RTE_NB_STATS + (nb_rxqs * RTE_NB_RXQ_STATS) + + (nb_txqs * RTE_NB_TXQ_STATS); + + /* implemented by the driver */ + if (dev->dev_ops->xstats_get != NULL) { + /* Retrieve the xstats from the driver at the end of the + * xstats struct. + */ + xcount = (*dev->dev_ops->xstats_get)(dev, + xstats ? xstats + count : NULL, + (n > count) ? n - count : 0); + + if (xcount < 0) + return eth_err(port_id, xcount); + } + + if (n < count + xcount || xstats == NULL) + return count + xcount; + + /* now fill the xstats structure */ + ret = rte_eth_basic_stats_get(port_id, xstats); + if (ret < 0) + return ret; + count = ret; + + for (i = 0; i < count; i++) + xstats[i].id = i; + /* add an offset to driver-specific stats */ + for ( ; i < count + xcount; i++) + xstats[i].id += count; + + return count + xcount; +} + +/* reset ethdev extended statistics */ +void +rte_eth_xstats_reset(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + /* implemented by the driver */ + if (dev->dev_ops->xstats_reset != NULL) { + (*dev->dev_ops->xstats_reset)(dev); + return; + } + + /* fallback to default */ + rte_eth_stats_reset(port_id); +} + +static int +set_queue_stats_mapping(uint16_t port_id, uint16_t queue_id, uint8_t stat_idx, + uint8_t is_rx) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_stats_mapping_set, -ENOTSUP); + + if (is_rx && (queue_id >= dev->data->nb_rx_queues)) + return -EINVAL; + + if (!is_rx && (queue_id >= dev->data->nb_tx_queues)) + return -EINVAL; + + if (stat_idx >= RTE_ETHDEV_QUEUE_STAT_CNTRS) + return -EINVAL; + + return (*dev->dev_ops->queue_stats_mapping_set) + (dev, queue_id, stat_idx, is_rx); +} + + +int +rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t tx_queue_id, + uint8_t stat_idx) +{ + return eth_err(port_id, set_queue_stats_mapping(port_id, tx_queue_id, + stat_idx, STAT_QMAP_TX)); +} + + +int +rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, uint16_t rx_queue_id, + uint8_t stat_idx) +{ + return eth_err(port_id, set_queue_stats_mapping(port_id, rx_queue_id, + stat_idx, STAT_QMAP_RX)); +} + +int +rte_eth_dev_fw_version_get(uint16_t port_id, char *fw_version, size_t fw_size) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fw_version_get, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->fw_version_get)(dev, + fw_version, fw_size)); +} + +void +rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info) +{ + struct rte_eth_dev *dev; + const struct rte_eth_desc_lim lim = { + .nb_max = UINT16_MAX, + .nb_min = 0, + .nb_align = 1, + }; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + memset(dev_info, 0, sizeof(struct rte_eth_dev_info)); + dev_info->rx_desc_lim = lim; + dev_info->tx_desc_lim = lim; + dev_info->device = dev->device; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); + (*dev->dev_ops->dev_infos_get)(dev, dev_info); + dev_info->driver_name = dev->device->driver->name; + dev_info->nb_rx_queues = dev->data->nb_rx_queues; + dev_info->nb_tx_queues = dev->data->nb_tx_queues; + + dev_info->dev_flags = &dev->data->dev_flags; +} + +int +rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask, + uint32_t *ptypes, int num) +{ + int i, j; + struct rte_eth_dev *dev; + const uint32_t *all_ptypes; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_supported_ptypes_get, 0); + all_ptypes = (*dev->dev_ops->dev_supported_ptypes_get)(dev); + + if (!all_ptypes) + return 0; + + for (i = 0, j = 0; all_ptypes[i] != RTE_PTYPE_UNKNOWN; ++i) + if (all_ptypes[i] & ptype_mask) { + if (j < num) + ptypes[j] = all_ptypes[i]; + j++; + } + + return j; +} + +void +rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + ether_addr_copy(&dev->data->mac_addrs[0], mac_addr); +} + + +int +rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + *mtu = dev->data->mtu; + return 0; +} + +int +rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu) +{ + int ret; + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP); + + ret = (*dev->dev_ops->mtu_set)(dev, mtu); + if (!ret) + dev->data->mtu = mtu; + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + if (!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER)) { + RTE_ETHDEV_LOG(ERR, "Port %u: vlan-filtering disabled\n", + port_id); + return -ENOSYS; + } + + if (vlan_id > 4095) { + RTE_ETHDEV_LOG(ERR, "Port_id=%u invalid vlan_id=%u > 4095\n", + port_id, vlan_id); + return -EINVAL; + } + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP); + + ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on); + if (ret == 0) { + struct rte_vlan_filter_conf *vfc; + int vidx; + int vbit; + + vfc = &dev->data->vlan_filter_conf; + vidx = vlan_id / 64; + vbit = vlan_id % 64; + + if (on) + vfc->ids[vidx] |= UINT64_C(1) << vbit; + else + vfc->ids[vidx] &= ~(UINT64_C(1) << vbit); + } + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id, + int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid rx_queue_id=%u\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_strip_queue_set, -ENOTSUP); + (*dev->dev_ops->vlan_strip_queue_set)(dev, rx_queue_id, on); + + return 0; +} + +int +rte_eth_dev_set_vlan_ether_type(uint16_t port_id, + enum rte_vlan_type vlan_type, + uint16_t tpid) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_tpid_set, -ENOTSUP); + + return eth_err(port_id, (*dev->dev_ops->vlan_tpid_set)(dev, vlan_type, + tpid)); +} + +int +rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask) +{ + struct rte_eth_dev *dev; + int ret = 0; + int mask = 0; + int cur, org = 0; + uint64_t orig_offloads; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + /* save original values in case of failure */ + orig_offloads = dev->data->dev_conf.rxmode.offloads; + + /*check which option changed by application*/ + cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_STRIP); + if (cur != org) { + if (cur) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_STRIP; + else + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_STRIP; + mask |= ETH_VLAN_STRIP_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_FILTER_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER); + if (cur != org) { + if (cur) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_FILTER; + else + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_FILTER; + mask |= ETH_VLAN_FILTER_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_EXTEND_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_EXTEND); + if (cur != org) { + if (cur) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_EXTEND; + else + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_EXTEND; + mask |= ETH_VLAN_EXTEND_MASK; + } + + /*no change*/ + if (mask == 0) + return ret; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP); + ret = (*dev->dev_ops->vlan_offload_set)(dev, mask); + if (ret) { + /* hit an error restore original values */ + dev->data->dev_conf.rxmode.offloads = orig_offloads; + } + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_get_vlan_offload(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int ret = 0; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_STRIP) + ret |= ETH_VLAN_STRIP_OFFLOAD; + + if (dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER) + ret |= ETH_VLAN_FILTER_OFFLOAD; + + if (dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_EXTEND) + ret |= ETH_VLAN_EXTEND_OFFLOAD; + + return ret; +} + +int +rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_pvid_set, -ENOTSUP); + + return eth_err(port_id, (*dev->dev_ops->vlan_pvid_set)(dev, pvid, on)); +} + +int +rte_eth_dev_flow_ctrl_get(uint16_t port_id, struct rte_eth_fc_conf *fc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_get, -ENOTSUP); + memset(fc_conf, 0, sizeof(*fc_conf)); + return eth_err(port_id, (*dev->dev_ops->flow_ctrl_get)(dev, fc_conf)); +} + +int +rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if ((fc_conf->send_xon != 0) && (fc_conf->send_xon != 1)) { + RTE_ETHDEV_LOG(ERR, "Invalid send_xon, only 0/1 allowed\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_set, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->flow_ctrl_set)(dev, fc_conf)); +} + +int +rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id, + struct rte_eth_pfc_conf *pfc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (pfc_conf->priority > (ETH_DCB_NUM_USER_PRIORITIES - 1)) { + RTE_ETHDEV_LOG(ERR, "Invalid priority, only 0-7 allowed\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + /* High water, low water validation are device specific */ + if (*dev->dev_ops->priority_flow_ctrl_set) + return eth_err(port_id, (*dev->dev_ops->priority_flow_ctrl_set) + (dev, pfc_conf)); + return -ENOTSUP; +} + +static int +rte_eth_check_reta_mask(struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + uint16_t i, num; + + if (!reta_conf) + return -EINVAL; + + num = (reta_size + RTE_RETA_GROUP_SIZE - 1) / RTE_RETA_GROUP_SIZE; + for (i = 0; i < num; i++) { + if (reta_conf[i].mask) + return 0; + } + + return -EINVAL; +} + +static int +rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size, + uint16_t max_rxq) +{ + uint16_t i, idx, shift; + + if (!reta_conf) + return -EINVAL; + + if (max_rxq == 0) { + RTE_ETHDEV_LOG(ERR, "No receive queue is available\n"); + return -EINVAL; + } + + for (i = 0; i < reta_size; i++) { + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + if ((reta_conf[idx].mask & (1ULL << shift)) && + (reta_conf[idx].reta[shift] >= max_rxq)) { + RTE_ETHDEV_LOG(ERR, + "reta_conf[%u]->reta[%u]: %u exceeds the maximum rxq index: %u\n", + idx, shift, + reta_conf[idx].reta[shift], max_rxq); + return -EINVAL; + } + } + + return 0; +} + +int +rte_eth_dev_rss_reta_update(uint16_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + /* Check mask bits */ + ret = rte_eth_check_reta_mask(reta_conf, reta_size); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + + /* Check entry value */ + ret = rte_eth_check_reta_entry(reta_conf, reta_size, + dev->data->nb_rx_queues); + if (ret < 0) + return ret; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_update, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->reta_update)(dev, reta_conf, + reta_size)); +} + +int +rte_eth_dev_rss_reta_query(uint16_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + /* Check mask bits */ + ret = rte_eth_check_reta_mask(reta_conf, reta_size); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_query, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->reta_query)(dev, reta_conf, + reta_size)); +} + +int +rte_eth_dev_rss_hash_update(uint16_t port_id, + struct rte_eth_rss_conf *rss_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info = { .flow_type_rss_offloads = 0, }; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + if ((dev_info.flow_type_rss_offloads | rss_conf->rss_hf) != + dev_info.flow_type_rss_offloads) { + RTE_ETHDEV_LOG(ERR, + "Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n", + port_id, rss_conf->rss_hf, + dev_info.flow_type_rss_offloads); + return -EINVAL; + } + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev, + rss_conf)); +} + +int +rte_eth_dev_rss_hash_conf_get(uint16_t port_id, + struct rte_eth_rss_conf *rss_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_conf_get, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->rss_hash_conf_get)(dev, + rss_conf)); +} + +int +rte_eth_dev_udp_tunnel_port_add(uint16_t port_id, + struct rte_eth_udp_tunnel *udp_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (udp_tunnel == NULL) { + RTE_ETHDEV_LOG(ERR, "Invalid udp_tunnel parameter\n"); + return -EINVAL; + } + + if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_add, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->udp_tunnel_port_add)(dev, + udp_tunnel)); +} + +int +rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id, + struct rte_eth_udp_tunnel *udp_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + if (udp_tunnel == NULL) { + RTE_ETHDEV_LOG(ERR, "Invalid udp_tunnel parameter\n"); + return -EINVAL; + } + + if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n"); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_del, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->udp_tunnel_port_del)(dev, + udp_tunnel)); +} + +int +rte_eth_led_on(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_on, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->dev_led_on)(dev)); +} + +int +rte_eth_led_off(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_off, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->dev_led_off)(dev)); +} + +/* + * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find + * an empty spot. + */ +static int +get_mac_addr_index(uint16_t port_id, const struct ether_addr *addr) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + unsigned i; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + rte_eth_dev_info_get(port_id, &dev_info); + + for (i = 0; i < dev_info.max_mac_addrs; i++) + if (memcmp(addr, &dev->data->mac_addrs[i], ETHER_ADDR_LEN) == 0) + return i; + + return -1; +} + +static const struct ether_addr null_mac_addr; + +int +rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr, + uint32_t pool) +{ + struct rte_eth_dev *dev; + int index; + uint64_t pool_mask; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_add, -ENOTSUP); + + if (is_zero_ether_addr(addr)) { + RTE_ETHDEV_LOG(ERR, "Port %u: Cannot add NULL MAC address\n", + port_id); + return -EINVAL; + } + if (pool >= ETH_64_POOLS) { + RTE_ETHDEV_LOG(ERR, "Pool id must be 0-%d\n", ETH_64_POOLS - 1); + return -EINVAL; + } + + index = get_mac_addr_index(port_id, addr); + if (index < 0) { + index = get_mac_addr_index(port_id, &null_mac_addr); + if (index < 0) { + RTE_ETHDEV_LOG(ERR, "Port %u: MAC address array full\n", + port_id); + return -ENOSPC; + } + } else { + pool_mask = dev->data->mac_pool_sel[index]; + + /* Check if both MAC address and pool is already there, and do nothing */ + if (pool_mask & (1ULL << pool)) + return 0; + } + + /* Update NIC */ + ret = (*dev->dev_ops->mac_addr_add)(dev, addr, index, pool); + + if (ret == 0) { + /* Update address in NIC data structure */ + ether_addr_copy(addr, &dev->data->mac_addrs[index]); + + /* Update pool bitmap in NIC data structure */ + dev->data->mac_pool_sel[index] |= (1ULL << pool); + } + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *addr) +{ + struct rte_eth_dev *dev; + int index; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_remove, -ENOTSUP); + + index = get_mac_addr_index(port_id, addr); + if (index == 0) { + RTE_ETHDEV_LOG(ERR, + "Port %u: Cannot remove default MAC address\n", + port_id); + return -EADDRINUSE; + } else if (index < 0) + return 0; /* Do nothing if address wasn't found */ + + /* Update NIC */ + (*dev->dev_ops->mac_addr_remove)(dev, index); + + /* Update address in NIC data structure */ + ether_addr_copy(&null_mac_addr, &dev->data->mac_addrs[index]); + + /* reset pool bitmap */ + dev->data->mac_pool_sel[index] = 0; + + return 0; +} + +int +rte_eth_dev_default_mac_addr_set(uint16_t port_id, struct ether_addr *addr) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (!is_valid_assigned_ether_addr(addr)) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_set, -ENOTSUP); + + ret = (*dev->dev_ops->mac_addr_set)(dev, addr); + if (ret < 0) + return ret; + + /* Update default address in NIC data structure */ + ether_addr_copy(addr, &dev->data->mac_addrs[0]); + + return 0; +} + + +/* + * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find + * an empty spot. + */ +static int +get_hash_mac_addr_index(uint16_t port_id, const struct ether_addr *addr) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + unsigned i; + + rte_eth_dev_info_get(port_id, &dev_info); + if (!dev->data->hash_mac_addrs) + return -1; + + for (i = 0; i < dev_info.max_hash_mac_addrs; i++) + if (memcmp(addr, &dev->data->hash_mac_addrs[i], + ETHER_ADDR_LEN) == 0) + return i; + + return -1; +} + +int +rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr, + uint8_t on) +{ + int index; + int ret; + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + if (is_zero_ether_addr(addr)) { + RTE_ETHDEV_LOG(ERR, "Port %u: Cannot add NULL MAC address\n", + port_id); + return -EINVAL; + } + + index = get_hash_mac_addr_index(port_id, addr); + /* Check if it's already there, and do nothing */ + if ((index >= 0) && on) + return 0; + + if (index < 0) { + if (!on) { + RTE_ETHDEV_LOG(ERR, + "Port %u: the MAC address was not set in UTA\n", + port_id); + return -EINVAL; + } + + index = get_hash_mac_addr_index(port_id, &null_mac_addr); + if (index < 0) { + RTE_ETHDEV_LOG(ERR, "Port %u: MAC address array full\n", + port_id); + return -ENOSPC; + } + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_hash_table_set, -ENOTSUP); + ret = (*dev->dev_ops->uc_hash_table_set)(dev, addr, on); + if (ret == 0) { + /* Update address in NIC data structure */ + if (on) + ether_addr_copy(addr, + &dev->data->hash_mac_addrs[index]); + else + ether_addr_copy(&null_mac_addr, + &dev->data->hash_mac_addrs[index]); + } + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_uc_all_hash_table_set(uint16_t port_id, uint8_t on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_all_hash_table_set, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->uc_all_hash_table_set)(dev, + on)); +} + +int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx, + uint16_t tx_rate) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_link link; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + link = dev->data->dev_link; + + if (queue_idx > dev_info.max_tx_queues) { + RTE_ETHDEV_LOG(ERR, + "Set queue rate limit:port %u: invalid queue id=%u\n", + port_id, queue_idx); + return -EINVAL; + } + + if (tx_rate > link.link_speed) { + RTE_ETHDEV_LOG(ERR, + "Set queue rate limit:invalid tx_rate=%u, bigger than link speed= %d\n", + tx_rate, link.link_speed); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_queue_rate_limit, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->set_queue_rate_limit)(dev, + queue_idx, tx_rate)); +} + +int +rte_eth_mirror_rule_set(uint16_t port_id, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, uint8_t on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (mirror_conf->rule_type == 0) { + RTE_ETHDEV_LOG(ERR, "Mirror rule type can not be 0\n"); + return -EINVAL; + } + + if (mirror_conf->dst_pool >= ETH_64_POOLS) { + RTE_ETHDEV_LOG(ERR, "Invalid dst pool, pool id must be 0-%d\n", + ETH_64_POOLS - 1); + return -EINVAL; + } + + if ((mirror_conf->rule_type & (ETH_MIRROR_VIRTUAL_POOL_UP | + ETH_MIRROR_VIRTUAL_POOL_DOWN)) && + (mirror_conf->pool_mask == 0)) { + RTE_ETHDEV_LOG(ERR, + "Invalid mirror pool, pool mask can not be 0\n"); + return -EINVAL; + } + + if ((mirror_conf->rule_type & ETH_MIRROR_VLAN) && + mirror_conf->vlan.vlan_mask == 0) { + RTE_ETHDEV_LOG(ERR, + "Invalid vlan mask, vlan mask can not be 0\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_set, -ENOTSUP); + + return eth_err(port_id, (*dev->dev_ops->mirror_rule_set)(dev, + mirror_conf, rule_id, on)); +} + +int +rte_eth_mirror_rule_reset(uint16_t port_id, uint8_t rule_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_reset, -ENOTSUP); + + return eth_err(port_id, (*dev->dev_ops->mirror_rule_reset)(dev, + rule_id)); +} + +RTE_INIT(eth_dev_init_cb_lists) +{ + int i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) + TAILQ_INIT(&rte_eth_devices[i].link_intr_cbs); +} + +int +rte_eth_dev_callback_register(uint16_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_callback *user_cb; + uint32_t next_port; /* size is 32-bit to prevent loop wrap-around */ + uint16_t last_port; + + if (!cb_fn) + return -EINVAL; + + if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) { + RTE_ETHDEV_LOG(ERR, "Invalid port_id=%d\n", port_id); + return -EINVAL; + } + + if (port_id == RTE_ETH_ALL) { + next_port = 0; + last_port = RTE_MAX_ETHPORTS - 1; + } else { + next_port = last_port = port_id; + } + + rte_spinlock_lock(&rte_eth_dev_cb_lock); + + do { + dev = &rte_eth_devices[next_port]; + + TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) { + if (user_cb->cb_fn == cb_fn && + user_cb->cb_arg == cb_arg && + user_cb->event == event) { + break; + } + } + + /* create a new callback. */ + if (user_cb == NULL) { + user_cb = rte_zmalloc("INTR_USER_CALLBACK", + sizeof(struct rte_eth_dev_callback), 0); + if (user_cb != NULL) { + user_cb->cb_fn = cb_fn; + user_cb->cb_arg = cb_arg; + user_cb->event = event; + TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), + user_cb, next); + } else { + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + rte_eth_dev_callback_unregister(port_id, event, + cb_fn, cb_arg); + return -ENOMEM; + } + + } + } while (++next_port <= last_port); + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return 0; +} + +int +rte_eth_dev_callback_unregister(uint16_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg) +{ + int ret; + struct rte_eth_dev *dev; + struct rte_eth_dev_callback *cb, *next; + uint32_t next_port; /* size is 32-bit to prevent loop wrap-around */ + uint16_t last_port; + + if (!cb_fn) + return -EINVAL; + + if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) { + RTE_ETHDEV_LOG(ERR, "Invalid port_id=%d\n", port_id); + return -EINVAL; + } + + if (port_id == RTE_ETH_ALL) { + next_port = 0; + last_port = RTE_MAX_ETHPORTS - 1; + } else { + next_port = last_port = port_id; + } + + rte_spinlock_lock(&rte_eth_dev_cb_lock); + + do { + dev = &rte_eth_devices[next_port]; + ret = 0; + for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; + cb = next) { + + next = TAILQ_NEXT(cb, next); + + if (cb->cb_fn != cb_fn || cb->event != event || + (cb->cb_arg != (void *)-1 && cb->cb_arg != cb_arg)) + continue; + + /* + * if this callback is not executing right now, + * then remove it. + */ + if (cb->active == 0) { + TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next); + rte_free(cb); + } else { + ret = -EAGAIN; + } + } + } while (++next_port <= last_port); + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return ret; +} + +int +_rte_eth_dev_callback_process(struct rte_eth_dev *dev, + enum rte_eth_event_type event, void *ret_param) +{ + struct rte_eth_dev_callback *cb_lst; + struct rte_eth_dev_callback dev_cb; + int rc = 0; + + rte_spinlock_lock(&rte_eth_dev_cb_lock); + TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) { + if (cb_lst->cb_fn == NULL || cb_lst->event != event) + continue; + dev_cb = *cb_lst; + cb_lst->active = 1; + if (ret_param != NULL) + dev_cb.ret_param = ret_param; + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + rc = dev_cb.cb_fn(dev->data->port_id, dev_cb.event, + dev_cb.cb_arg, dev_cb.ret_param); + rte_spinlock_lock(&rte_eth_dev_cb_lock); + cb_lst->active = 0; + } + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return rc; +} + +void +rte_eth_dev_probing_finish(struct rte_eth_dev *dev) +{ + if (dev == NULL) + return; + + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_NEW, NULL); + + dev->state = RTE_ETH_DEV_ATTACHED; +} + +int +rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data) +{ + uint32_t vec; + struct rte_eth_dev *dev; + struct rte_intr_handle *intr_handle; + uint16_t qid; + int rc; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + if (!dev->intr_handle) { + RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; + if (!intr_handle->intr_vec) { + RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n"); + return -EPERM; + } + + for (qid = 0; qid < dev->data->nb_rx_queues; qid++) { + vec = intr_handle->intr_vec[qid]; + rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); + if (rc && rc != -EEXIST) { + RTE_ETHDEV_LOG(ERR, + "p %u q %u rx ctl error op %d epfd %d vec %u\n", + port_id, qid, op, epfd, vec); + } + } + + return 0; +} + +const struct rte_memzone * +rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, + uint16_t queue_id, size_t size, unsigned align, + int socket_id) +{ + char z_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz; + + snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", + dev->device->driver->name, ring_name, + dev->data->port_id, queue_id); + + mz = rte_memzone_lookup(z_name); + if (mz) + return mz; + + return rte_memzone_reserve_aligned(z_name, size, socket_id, + RTE_MEMZONE_IOVA_CONTIG, align); +} + +int __rte_experimental +rte_eth_dev_create(struct rte_device *device, const char *name, + size_t priv_data_size, + ethdev_bus_specific_init ethdev_bus_specific_init, + void *bus_init_params, + ethdev_init_t ethdev_init, void *init_params) +{ + struct rte_eth_dev *ethdev; + int retval; + + RTE_FUNC_PTR_OR_ERR_RET(*ethdev_init, -EINVAL); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + ethdev = rte_eth_dev_allocate(name); + if (!ethdev) { + retval = -ENODEV; + goto probe_failed; + } + + if (priv_data_size) { + ethdev->data->dev_private = rte_zmalloc_socket( + name, priv_data_size, RTE_CACHE_LINE_SIZE, + device->numa_node); + + if (!ethdev->data->dev_private) { + RTE_LOG(ERR, EAL, "failed to allocate private data"); + retval = -ENOMEM; + goto probe_failed; + } + } + } else { + ethdev = rte_eth_dev_attach_secondary(name); + if (!ethdev) { + RTE_LOG(ERR, EAL, "secondary process attach failed, " + "ethdev doesn't exist"); + retval = -ENODEV; + goto probe_failed; + } + } + + ethdev->device = device; + + if (ethdev_bus_specific_init) { + retval = ethdev_bus_specific_init(ethdev, bus_init_params); + if (retval) { + RTE_LOG(ERR, EAL, + "ethdev bus specific initialisation failed"); + goto probe_failed; + } + } + + retval = ethdev_init(ethdev, init_params); + if (retval) { + RTE_LOG(ERR, EAL, "ethdev initialisation failed"); + goto probe_failed; + } + + rte_eth_dev_probing_finish(ethdev); + + return retval; +probe_failed: + /* free ports private data if primary process */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(ethdev->data->dev_private); + + rte_eth_dev_release_port(ethdev); + + return retval; +} + +int __rte_experimental +rte_eth_dev_destroy(struct rte_eth_dev *ethdev, + ethdev_uninit_t ethdev_uninit) +{ + int ret; + + ethdev = rte_eth_dev_allocated(ethdev->data->name); + if (!ethdev) + return -ENODEV; + + RTE_FUNC_PTR_OR_ERR_RET(*ethdev_uninit, -EINVAL); + if (ethdev_uninit) { + ret = ethdev_uninit(ethdev); + if (ret) + return ret; + } + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(ethdev->data->dev_private); + + ethdev->data->dev_private = NULL; + + return rte_eth_dev_release_port(ethdev); +} + +int +rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id, + int epfd, int op, void *data) +{ + uint32_t vec; + struct rte_eth_dev *dev; + struct rte_intr_handle *intr_handle; + int rc; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_rx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id); + return -EINVAL; + } + + if (!dev->intr_handle) { + RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; + if (!intr_handle->intr_vec) { + RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n"); + return -EPERM; + } + + vec = intr_handle->intr_vec[queue_id]; + rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); + if (rc && rc != -EEXIST) { + RTE_ETHDEV_LOG(ERR, + "p %u q %u rx ctl error op %d epfd %d vec %u\n", + port_id, queue_id, op, epfd, vec); + return rc; + } + + return 0; +} + +int +rte_eth_dev_rx_intr_enable(uint16_t port_id, + uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->rx_queue_intr_enable)(dev, + queue_id)); +} + +int +rte_eth_dev_rx_intr_disable(uint16_t port_id, + uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->rx_queue_intr_disable)(dev, + queue_id)); +} + + +int +rte_eth_dev_filter_supported(uint16_t port_id, + enum rte_filter_type filter_type) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); + return (*dev->dev_ops->filter_ctrl)(dev, filter_type, + RTE_ETH_FILTER_NOP, NULL); +} + +int +rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type, + enum rte_filter_op filter_op, void *arg) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->filter_ctrl)(dev, filter_type, + filter_op, arg)); +} + +const struct rte_eth_rxtx_callback * +rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { + rte_errno = EINVAL; + return NULL; + } + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.rx = fn; + cb->param = user_param; + + rte_spinlock_lock(&rte_eth_rx_cb_lock); + /* Add the callbacks in fifo order. */ + struct rte_eth_rxtx_callback *tail = + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; + + if (!tail) { + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; + + } else { + while (tail->next) + tail = tail->next; + tail->next = cb; + } + rte_spinlock_unlock(&rte_eth_rx_cb_lock); + + return cb; +} + +const struct rte_eth_rxtx_callback * +rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { + rte_errno = EINVAL; + return NULL; + } + + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.rx = fn; + cb->param = user_param; + + rte_spinlock_lock(&rte_eth_rx_cb_lock); + /* Add the callbacks at fisrt position*/ + cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; + rte_smp_wmb(); + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; + rte_spinlock_unlock(&rte_eth_rx_cb_lock); + + return cb; +} + +const struct rte_eth_rxtx_callback * +rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id, + rte_tx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) { + rte_errno = EINVAL; + return NULL; + } + + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.tx = fn; + cb->param = user_param; + + rte_spinlock_lock(&rte_eth_tx_cb_lock); + /* Add the callbacks in fifo order. */ + struct rte_eth_rxtx_callback *tail = + rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id]; + + if (!tail) { + rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb; + + } else { + while (tail->next) + tail = tail->next; + tail->next = cb; + } + rte_spinlock_unlock(&rte_eth_tx_cb_lock); + + return cb; +} + +int +rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id, + const struct rte_eth_rxtx_callback *user_cb) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + return -ENOTSUP; +#endif + /* Check input parameters. */ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + if (user_cb == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) + return -EINVAL; + + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_rxtx_callback *cb; + struct rte_eth_rxtx_callback **prev_cb; + int ret = -EINVAL; + + rte_spinlock_lock(&rte_eth_rx_cb_lock); + prev_cb = &dev->post_rx_burst_cbs[queue_id]; + for (; *prev_cb != NULL; prev_cb = &cb->next) { + cb = *prev_cb; + if (cb == user_cb) { + /* Remove the user cb from the callback list. */ + *prev_cb = cb->next; + ret = 0; + break; + } + } + rte_spinlock_unlock(&rte_eth_rx_cb_lock); + + return ret; +} + +int +rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id, + const struct rte_eth_rxtx_callback *user_cb) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + return -ENOTSUP; +#endif + /* Check input parameters. */ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + if (user_cb == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) + return -EINVAL; + + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + int ret = -EINVAL; + struct rte_eth_rxtx_callback *cb; + struct rte_eth_rxtx_callback **prev_cb; + + rte_spinlock_lock(&rte_eth_tx_cb_lock); + prev_cb = &dev->pre_tx_burst_cbs[queue_id]; + for (; *prev_cb != NULL; prev_cb = &cb->next) { + cb = *prev_cb; + if (cb == user_cb) { + /* Remove the user cb from the callback list. */ + *prev_cb = cb->next; + ret = 0; + break; + } + } + rte_spinlock_unlock(&rte_eth_tx_cb_lock); + + return ret; +} + +int +rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (qinfo == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_rx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rxq_info_get, -ENOTSUP); + + memset(qinfo, 0, sizeof(*qinfo)); + dev->dev_ops->rxq_info_get(dev, queue_id, qinfo); + return 0; +} + +int +rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (qinfo == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_tx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->txq_info_get, -ENOTSUP); + + memset(qinfo, 0, sizeof(*qinfo)); + dev->dev_ops->txq_info_get(dev, queue_id, qinfo); + + return 0; +} + +int +rte_eth_dev_set_mc_addr_list(uint16_t port_id, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_mc_addr_list, -ENOTSUP); + return eth_err(port_id, dev->dev_ops->set_mc_addr_list(dev, + mc_addr_set, nb_mc_addr)); +} + +int +rte_eth_timesync_enable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_enable, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_enable)(dev)); +} + +int +rte_eth_timesync_disable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_disable, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_disable)(dev)); +} + +int +rte_eth_timesync_read_rx_timestamp(uint16_t port_id, struct timespec *timestamp, + uint32_t flags) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_rx_timestamp, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_read_rx_timestamp) + (dev, timestamp, flags)); +} + +int +rte_eth_timesync_read_tx_timestamp(uint16_t port_id, + struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_tx_timestamp, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_read_tx_timestamp) + (dev, timestamp)); +} + +int +rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_adjust_time, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_adjust_time)(dev, + delta)); +} + +int +rte_eth_timesync_read_time(uint16_t port_id, struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_time, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_read_time)(dev, + timestamp)); +} + +int +rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_write_time, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_write_time)(dev, + timestamp)); +} + +int +rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->get_reg)(dev, info)); +} + +int +rte_eth_dev_get_eeprom_length(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom_length, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->get_eeprom_length)(dev)); +} + +int +rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->get_eeprom)(dev, info)); +} + +int +rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_eeprom, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->set_eeprom)(dev, info)); +} + +int __rte_experimental +rte_eth_dev_get_module_info(uint16_t port_id, + struct rte_eth_dev_module_info *modinfo) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_module_info, -ENOTSUP); + return (*dev->dev_ops->get_module_info)(dev, modinfo); +} + +int __rte_experimental +rte_eth_dev_get_module_eeprom(uint16_t port_id, + struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_module_eeprom, -ENOTSUP); + return (*dev->dev_ops->get_module_eeprom)(dev, info); +} + +int +rte_eth_dev_get_dcb_info(uint16_t port_id, + struct rte_eth_dcb_info *dcb_info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + memset(dcb_info, 0, sizeof(struct rte_eth_dcb_info)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_dcb_info, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->get_dcb_info)(dev, dcb_info)); +} + +int +rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (l2_tunnel == NULL) { + RTE_ETHDEV_LOG(ERR, "Invalid l2_tunnel parameter\n"); + return -EINVAL; + } + + if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_eth_type_conf, + -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->l2_tunnel_eth_type_conf)(dev, + l2_tunnel)); +} + +int +rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + uint32_t mask, + uint8_t en) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (l2_tunnel == NULL) { + RTE_ETHDEV_LOG(ERR, "Invalid l2_tunnel parameter\n"); + return -EINVAL; + } + + if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n"); + return -EINVAL; + } + + if (mask == 0) { + RTE_ETHDEV_LOG(ERR, "Mask should have a value\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_offload_set, + -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->l2_tunnel_offload_set)(dev, + l2_tunnel, mask, en)); +} + +static void +rte_eth_dev_adjust_nb_desc(uint16_t *nb_desc, + const struct rte_eth_desc_lim *desc_lim) +{ + if (desc_lim->nb_align != 0) + *nb_desc = RTE_ALIGN_CEIL(*nb_desc, desc_lim->nb_align); + + if (desc_lim->nb_max != 0) + *nb_desc = RTE_MIN(*nb_desc, desc_lim->nb_max); + + *nb_desc = RTE_MAX(*nb_desc, desc_lim->nb_min); +} + +int +rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id, + uint16_t *nb_rx_desc, + uint16_t *nb_tx_desc) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + + rte_eth_dev_info_get(port_id, &dev_info); + + if (nb_rx_desc != NULL) + rte_eth_dev_adjust_nb_desc(nb_rx_desc, &dev_info.rx_desc_lim); + + if (nb_tx_desc != NULL) + rte_eth_dev_adjust_nb_desc(nb_tx_desc, &dev_info.tx_desc_lim); + + return 0; +} + +int +rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (pool == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + + if (*dev->dev_ops->pool_ops_supported == NULL) + return 1; /* all pools are supported */ + + return (*dev->dev_ops->pool_ops_supported)(dev, pool); +} + +/** + * A set of values to describe the possible states of a switch domain. + */ +enum rte_eth_switch_domain_state { + RTE_ETH_SWITCH_DOMAIN_UNUSED = 0, + RTE_ETH_SWITCH_DOMAIN_ALLOCATED +}; + +/** + * Array of switch domains available for allocation. Array is sized to + * RTE_MAX_ETHPORTS elements as there cannot be more active switch domains than + * ethdev ports in a single process. + */ +struct rte_eth_dev_switch { + enum rte_eth_switch_domain_state state; +} rte_eth_switch_domains[RTE_MAX_ETHPORTS]; + +int __rte_experimental +rte_eth_switch_domain_alloc(uint16_t *domain_id) +{ + unsigned int i; + + *domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; + + for (i = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID + 1; + i < RTE_MAX_ETHPORTS; i++) { + if (rte_eth_switch_domains[i].state == + RTE_ETH_SWITCH_DOMAIN_UNUSED) { + rte_eth_switch_domains[i].state = + RTE_ETH_SWITCH_DOMAIN_ALLOCATED; + *domain_id = i; + return 0; + } + } + + return -ENOSPC; +} + +int __rte_experimental +rte_eth_switch_domain_free(uint16_t domain_id) +{ + if (domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID || + domain_id >= RTE_MAX_ETHPORTS) + return -EINVAL; + + if (rte_eth_switch_domains[domain_id].state != + RTE_ETH_SWITCH_DOMAIN_ALLOCATED) + return -EINVAL; + + rte_eth_switch_domains[domain_id].state = RTE_ETH_SWITCH_DOMAIN_UNUSED; + + return 0; +} + +typedef int (*rte_eth_devargs_callback_t)(char *str, void *data); + +static int +rte_eth_devargs_tokenise(struct rte_kvargs *arglist, const char *str_in) +{ + int state; + struct rte_kvargs_pair *pair; + char *letter; + + arglist->str = strdup(str_in); + if (arglist->str == NULL) + return -ENOMEM; + + letter = arglist->str; + state = 0; + arglist->count = 0; + pair = &arglist->pairs[0]; + while (1) { + switch (state) { + case 0: /* Initial */ + if (*letter == '=') + return -EINVAL; + else if (*letter == '\0') + return 0; + + state = 1; + pair->key = letter; + /* fall-thru */ + + case 1: /* Parsing key */ + if (*letter == '=') { + *letter = '\0'; + pair->value = letter + 1; + state = 2; + } else if (*letter == ',' || *letter == '\0') + return -EINVAL; + break; + + + case 2: /* Parsing value */ + if (*letter == '[') + state = 3; + else if (*letter == ',') { + *letter = '\0'; + arglist->count++; + pair = &arglist->pairs[arglist->count]; + state = 0; + } else if (*letter == '\0') { + letter--; + arglist->count++; + pair = &arglist->pairs[arglist->count]; + state = 0; + } + break; + + case 3: /* Parsing list */ + if (*letter == ']') + state = 2; + else if (*letter == '\0') + return -EINVAL; + break; + } + letter++; + } +} + +static int +rte_eth_devargs_parse_list(char *str, rte_eth_devargs_callback_t callback, + void *data) +{ + char *str_start; + int state; + int result; + + if (*str != '[') + /* Single element, not a list */ + return callback(str, data); + + /* Sanity check, then strip the brackets */ + str_start = &str[strlen(str) - 1]; + if (*str_start != ']') { + RTE_LOG(ERR, EAL, "(%s): List does not end with ']'", str); + return -EINVAL; + } + str++; + *str_start = '\0'; + + /* Process list elements */ + state = 0; + while (1) { + if (state == 0) { + if (*str == '\0') + break; + if (*str != ',') { + str_start = str; + state = 1; + } + } else if (state == 1) { + if (*str == ',' || *str == '\0') { + if (str > str_start) { + /* Non-empty string fragment */ + *str = '\0'; + result = callback(str_start, data); + if (result < 0) + return result; + } + state = 0; + } + } + str++; + } + return 0; +} + +static int +rte_eth_devargs_process_range(char *str, uint16_t *list, uint16_t *len_list, + const uint16_t max_list) +{ + uint16_t lo, hi, val; + int result; + + result = sscanf(str, "%hu-%hu", &lo, &hi); + if (result == 1) { + if (*len_list >= max_list) + return -ENOMEM; + list[(*len_list)++] = lo; + } else if (result == 2) { + if (lo >= hi || lo > RTE_MAX_ETHPORTS || hi > RTE_MAX_ETHPORTS) + return -EINVAL; + for (val = lo; val <= hi; val++) { + if (*len_list >= max_list) + return -ENOMEM; + list[(*len_list)++] = val; + } + } else + return -EINVAL; + return 0; +} + + +static int +rte_eth_devargs_parse_representor_ports(char *str, void *data) +{ + struct rte_eth_devargs *eth_da = data; + + return rte_eth_devargs_process_range(str, eth_da->representor_ports, + ð_da->nb_representor_ports, RTE_MAX_ETHPORTS); +} + +int __rte_experimental +rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da) +{ + struct rte_kvargs args; + struct rte_kvargs_pair *pair; + unsigned int i; + int result = 0; + + memset(eth_da, 0, sizeof(*eth_da)); + + result = rte_eth_devargs_tokenise(&args, dargs); + if (result < 0) + goto parse_cleanup; + + for (i = 0; i < args.count; i++) { + pair = &args.pairs[i]; + if (strcmp("representor", pair->key) == 0) { + result = rte_eth_devargs_parse_list(pair->value, + rte_eth_devargs_parse_representor_ports, + eth_da); + if (result < 0) + goto parse_cleanup; + } + } + +parse_cleanup: + if (args.str) + free(args.str); + + return result; +} + +RTE_INIT(ethdev_init_log) +{ + rte_eth_dev_logtype = rte_log_register("lib.ethdev"); + if (rte_eth_dev_logtype >= 0) + rte_log_set_level(rte_eth_dev_logtype, RTE_LOG_INFO); +} diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h new file mode 100644 index 00000000..7070e9ab --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev.h @@ -0,0 +1,4298 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#ifndef _RTE_ETHDEV_H_ +#define _RTE_ETHDEV_H_ + +/** + * @file + * + * RTE Ethernet Device API + * + * The Ethernet Device API is composed of two parts: + * + * - The application-oriented Ethernet API that includes functions to setup + * an Ethernet device (configure it, setup its RX and TX queues and start it), + * to get its MAC address, the speed and the status of its physical link, + * to receive and to transmit packets, and so on. + * + * - The driver-oriented Ethernet API that exports functions allowing + * an Ethernet Poll Mode Driver (PMD) to allocate an Ethernet device instance, + * create memzone for HW rings and process registered callbacks, and so on. + * PMDs should include rte_ethdev_driver.h instead of this header. + * + * By default, all the functions of the Ethernet Device API exported by a PMD + * are lock-free functions which assume to not be invoked in parallel on + * different logical cores to work on the same target object. For instance, + * the receive function of a PMD cannot be invoked in parallel on two logical + * cores to poll the same RX queue [of the same port]. Of course, this function + * can be invoked in parallel by different logical cores on different RX queues. + * It is the responsibility of the upper level application to enforce this rule. + * + * If needed, parallel accesses by multiple logical cores to shared queues + * shall be explicitly protected by dedicated inline lock-aware functions + * built on top of their corresponding lock-free functions of the PMD API. + * + * In all functions of the Ethernet API, the Ethernet device is + * designated by an integer >= 0 named the device port identifier. + * + * At the Ethernet driver level, Ethernet devices are represented by a generic + * data structure of type *rte_eth_dev*. + * + * Ethernet devices are dynamically registered during the PCI probing phase + * performed at EAL initialization time. + * When an Ethernet device is being probed, an *rte_eth_dev* structure and + * a new port identifier are allocated for that device. Then, the eth_dev_init() + * function supplied by the Ethernet driver matching the probed PCI + * device is invoked to properly initialize the device. + * + * The role of the device init function consists of resetting the hardware, + * checking access to Non-volatile Memory (NVM), reading the MAC address + * from NVM etc. + * + * If the device init operation is successful, the correspondence between + * the port identifier assigned to the new device and its associated + * *rte_eth_dev* structure is effectively registered. + * Otherwise, both the *rte_eth_dev* structure and the port identifier are + * freed. + * + * The functions exported by the application Ethernet API to setup a device + * designated by its port identifier must be invoked in the following order: + * - rte_eth_dev_configure() + * - rte_eth_tx_queue_setup() + * - rte_eth_rx_queue_setup() + * - rte_eth_dev_start() + * + * Then, the network application can invoke, in any order, the functions + * exported by the Ethernet API to get the MAC address of a given device, to + * get the speed and the status of a device physical link, to receive/transmit + * [burst of] packets, and so on. + * + * If the application wants to change the configuration (i.e. call + * rte_eth_dev_configure(), rte_eth_tx_queue_setup(), or + * rte_eth_rx_queue_setup()), it must call rte_eth_dev_stop() first to stop the + * device and then do the reconfiguration before calling rte_eth_dev_start() + * again. The transmit and receive functions should not be invoked when the + * device is stopped. + * + * Please note that some configuration is not stored between calls to + * rte_eth_dev_stop()/rte_eth_dev_start(). The following configuration will + * be retained: + * + * - flow control settings + * - receive mode configuration (promiscuous mode, hardware checksum mode, + * RSS/VMDQ settings etc.) + * - VLAN filtering configuration + * - MAC addresses supplied to MAC address array + * - flow director filtering mode (but not filtering rules) + * - NIC queue statistics mappings + * + * Any other configuration will not be stored and will need to be re-entered + * before a call to rte_eth_dev_start(). + * + * Finally, a network application can close an Ethernet device by invoking the + * rte_eth_dev_close() function. + * + * Each function of the application Ethernet API invokes a specific function + * of the PMD that controls the target device designated by its port + * identifier. + * For this purpose, all device-specific functions of an Ethernet driver are + * supplied through a set of pointers contained in a generic structure of type + * *eth_dev_ops*. + * The address of the *eth_dev_ops* structure is stored in the *rte_eth_dev* + * structure by the device init function of the Ethernet driver, which is + * invoked during the PCI probing phase, as explained earlier. + * + * In other words, each function of the Ethernet API simply retrieves the + * *rte_eth_dev* structure associated with the device port identifier and + * performs an indirect invocation of the corresponding driver function + * supplied in the *eth_dev_ops* structure of the *rte_eth_dev* structure. + * + * For performance reasons, the address of the burst-oriented RX and TX + * functions of the Ethernet driver are not contained in the *eth_dev_ops* + * structure. Instead, they are directly stored at the beginning of the + * *rte_eth_dev* structure to avoid an extra indirect memory access during + * their invocation. + * + * RTE ethernet device drivers do not use interrupts for transmitting or + * receiving. Instead, Ethernet drivers export Poll-Mode receive and transmit + * functions to applications. + * Both receive and transmit functions are packet-burst oriented to minimize + * their cost per packet through the following optimizations: + * + * - Sharing among multiple packets the incompressible cost of the + * invocation of receive/transmit functions. + * + * - Enabling receive/transmit functions to take advantage of burst-oriented + * hardware features (L1 cache, prefetch instructions, NIC head/tail + * registers) to minimize the number of CPU cycles per packet, for instance, + * by avoiding useless read memory accesses to ring descriptors, or by + * systematically using arrays of pointers that exactly fit L1 cache line + * boundaries and sizes. + * + * The burst-oriented receive function does not provide any error notification, + * to avoid the corresponding overhead. As a hint, the upper-level application + * might check the status of the device link once being systematically returned + * a 0 value by the receive function of the driver for a given number of tries. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* Use this macro to check if LRO API is supported */ +#define RTE_ETHDEV_HAS_LRO_SUPPORT + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_ether.h" +#include "rte_eth_ctrl.h" +#include "rte_dev_info.h" + +extern int rte_eth_dev_logtype; + +#define RTE_ETHDEV_LOG(level, ...) \ + rte_log(RTE_LOG_ ## level, rte_eth_dev_logtype, "" __VA_ARGS__) + +struct rte_mbuf; + +/** + * A structure used to retrieve statistics for an Ethernet port. + * Not all statistics fields in struct rte_eth_stats are supported + * by any type of network interface card (NIC). If any statistics + * field is not supported, its value is 0. + */ +struct rte_eth_stats { + uint64_t ipackets; /**< Total number of successfully received packets. */ + uint64_t opackets; /**< Total number of successfully transmitted packets.*/ + uint64_t ibytes; /**< Total number of successfully received bytes. */ + uint64_t obytes; /**< Total number of successfully transmitted bytes. */ + uint64_t imissed; + /**< Total of RX packets dropped by the HW, + * because there are no available buffer (i.e. RX queues are full). + */ + uint64_t ierrors; /**< Total number of erroneous received packets. */ + uint64_t oerrors; /**< Total number of failed transmitted packets. */ + uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */ + uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue RX packets. */ + uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue TX packets. */ + uint64_t q_ibytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of successfully received queue bytes. */ + uint64_t q_obytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of successfully transmitted queue bytes. */ + uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue packets received that are dropped. */ +}; + +/** + * Device supported speeds bitmap flags + */ +#define ETH_LINK_SPEED_AUTONEG (0 << 0) /**< Autonegotiate (all speeds) */ +#define ETH_LINK_SPEED_FIXED (1 << 0) /**< Disable autoneg (fixed speed) */ +#define ETH_LINK_SPEED_10M_HD (1 << 1) /**< 10 Mbps half-duplex */ +#define ETH_LINK_SPEED_10M (1 << 2) /**< 10 Mbps full-duplex */ +#define ETH_LINK_SPEED_100M_HD (1 << 3) /**< 100 Mbps half-duplex */ +#define ETH_LINK_SPEED_100M (1 << 4) /**< 100 Mbps full-duplex */ +#define ETH_LINK_SPEED_1G (1 << 5) /**< 1 Gbps */ +#define ETH_LINK_SPEED_2_5G (1 << 6) /**< 2.5 Gbps */ +#define ETH_LINK_SPEED_5G (1 << 7) /**< 5 Gbps */ +#define ETH_LINK_SPEED_10G (1 << 8) /**< 10 Gbps */ +#define ETH_LINK_SPEED_20G (1 << 9) /**< 20 Gbps */ +#define ETH_LINK_SPEED_25G (1 << 10) /**< 25 Gbps */ +#define ETH_LINK_SPEED_40G (1 << 11) /**< 40 Gbps */ +#define ETH_LINK_SPEED_50G (1 << 12) /**< 50 Gbps */ +#define ETH_LINK_SPEED_56G (1 << 13) /**< 56 Gbps */ +#define ETH_LINK_SPEED_100G (1 << 14) /**< 100 Gbps */ + +/** + * Ethernet numeric link speeds in Mbps + */ +#define ETH_SPEED_NUM_NONE 0 /**< Not defined */ +#define ETH_SPEED_NUM_10M 10 /**< 10 Mbps */ +#define ETH_SPEED_NUM_100M 100 /**< 100 Mbps */ +#define ETH_SPEED_NUM_1G 1000 /**< 1 Gbps */ +#define ETH_SPEED_NUM_2_5G 2500 /**< 2.5 Gbps */ +#define ETH_SPEED_NUM_5G 5000 /**< 5 Gbps */ +#define ETH_SPEED_NUM_10G 10000 /**< 10 Gbps */ +#define ETH_SPEED_NUM_20G 20000 /**< 20 Gbps */ +#define ETH_SPEED_NUM_25G 25000 /**< 25 Gbps */ +#define ETH_SPEED_NUM_40G 40000 /**< 40 Gbps */ +#define ETH_SPEED_NUM_50G 50000 /**< 50 Gbps */ +#define ETH_SPEED_NUM_56G 56000 /**< 56 Gbps */ +#define ETH_SPEED_NUM_100G 100000 /**< 100 Gbps */ + +/** + * A structure used to retrieve link-level information of an Ethernet port. + */ +__extension__ +struct rte_eth_link { + uint32_t link_speed; /**< ETH_SPEED_NUM_ */ + uint16_t link_duplex : 1; /**< ETH_LINK_[HALF/FULL]_DUPLEX */ + uint16_t link_autoneg : 1; /**< ETH_LINK_[AUTONEG/FIXED] */ + uint16_t link_status : 1; /**< ETH_LINK_[DOWN/UP] */ +} __attribute__((aligned(8))); /**< aligned for atomic64 read/write */ + +/* Utility constants */ +#define ETH_LINK_HALF_DUPLEX 0 /**< Half-duplex connection (see link_duplex). */ +#define ETH_LINK_FULL_DUPLEX 1 /**< Full-duplex connection (see link_duplex). */ +#define ETH_LINK_DOWN 0 /**< Link is down (see link_status). */ +#define ETH_LINK_UP 1 /**< Link is up (see link_status). */ +#define ETH_LINK_FIXED 0 /**< No autonegotiation (see link_autoneg). */ +#define ETH_LINK_AUTONEG 1 /**< Autonegotiated (see link_autoneg). */ + +/** + * A structure used to configure the ring threshold registers of an RX/TX + * queue for an Ethernet port. + */ +struct rte_eth_thresh { + uint8_t pthresh; /**< Ring prefetch threshold. */ + uint8_t hthresh; /**< Ring host threshold. */ + uint8_t wthresh; /**< Ring writeback threshold. */ +}; + +/** + * Simple flags are used for rte_eth_conf.rxmode.mq_mode. + */ +#define ETH_MQ_RX_RSS_FLAG 0x1 +#define ETH_MQ_RX_DCB_FLAG 0x2 +#define ETH_MQ_RX_VMDQ_FLAG 0x4 + +/** + * A set of values to identify what method is to be used to route + * packets to multiple queues. + */ +enum rte_eth_rx_mq_mode { + /** None of DCB,RSS or VMDQ mode */ + ETH_MQ_RX_NONE = 0, + + /** For RX side, only RSS is on */ + ETH_MQ_RX_RSS = ETH_MQ_RX_RSS_FLAG, + /** For RX side,only DCB is on. */ + ETH_MQ_RX_DCB = ETH_MQ_RX_DCB_FLAG, + /** Both DCB and RSS enable */ + ETH_MQ_RX_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG, + + /** Only VMDQ, no RSS nor DCB */ + ETH_MQ_RX_VMDQ_ONLY = ETH_MQ_RX_VMDQ_FLAG, + /** RSS mode with VMDQ */ + ETH_MQ_RX_VMDQ_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_VMDQ_FLAG, + /** Use VMDQ+DCB to route traffic to queues */ + ETH_MQ_RX_VMDQ_DCB = ETH_MQ_RX_VMDQ_FLAG | ETH_MQ_RX_DCB_FLAG, + /** Enable both VMDQ and DCB in VMDq */ + ETH_MQ_RX_VMDQ_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG | + ETH_MQ_RX_VMDQ_FLAG, +}; + +/** + * for rx mq mode backward compatible + */ +#define ETH_RSS ETH_MQ_RX_RSS +#define VMDQ_DCB ETH_MQ_RX_VMDQ_DCB +#define ETH_DCB_RX ETH_MQ_RX_DCB + +/** + * A set of values to identify what method is to be used to transmit + * packets using multi-TCs. + */ +enum rte_eth_tx_mq_mode { + ETH_MQ_TX_NONE = 0, /**< It is in neither DCB nor VT mode. */ + ETH_MQ_TX_DCB, /**< For TX side,only DCB is on. */ + ETH_MQ_TX_VMDQ_DCB, /**< For TX side,both DCB and VT is on. */ + ETH_MQ_TX_VMDQ_ONLY, /**< Only VT on, no DCB */ +}; + +/** + * for tx mq mode backward compatible + */ +#define ETH_DCB_NONE ETH_MQ_TX_NONE +#define ETH_VMDQ_DCB_TX ETH_MQ_TX_VMDQ_DCB +#define ETH_DCB_TX ETH_MQ_TX_DCB + +/** + * A structure used to configure the RX features of an Ethernet port. + */ +struct rte_eth_rxmode { + /** The multi-queue packet distribution mode to be used, e.g. RSS. */ + enum rte_eth_rx_mq_mode mq_mode; + uint32_t max_rx_pkt_len; /**< Only used if JUMBO_FRAME enabled. */ + uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/ + /** + * Per-port Rx offloads to be set using DEV_RX_OFFLOAD_* flags. + * Only offloads set on rx_offload_capa field on rte_eth_dev_info + * structure are allowed to be set. + */ + uint64_t offloads; +}; + +/** + * VLAN types to indicate if it is for single VLAN, inner VLAN or outer VLAN. + * Note that single VLAN is treated the same as inner VLAN. + */ +enum rte_vlan_type { + ETH_VLAN_TYPE_UNKNOWN = 0, + ETH_VLAN_TYPE_INNER, /**< Inner VLAN. */ + ETH_VLAN_TYPE_OUTER, /**< Single VLAN, or outer VLAN. */ + ETH_VLAN_TYPE_MAX, +}; + +/** + * A structure used to describe a vlan filter. + * If the bit corresponding to a VID is set, such VID is on. + */ +struct rte_vlan_filter_conf { + uint64_t ids[64]; +}; + +/** + * A structure used to configure the Receive Side Scaling (RSS) feature + * of an Ethernet port. + * If not NULL, the *rss_key* pointer of the *rss_conf* structure points + * to an array holding the RSS key to use for hashing specific header + * fields of received packets. The length of this array should be indicated + * by *rss_key_len* below. Otherwise, a default random hash key is used by + * the device driver. + * + * The *rss_key_len* field of the *rss_conf* structure indicates the length + * in bytes of the array pointed by *rss_key*. To be compatible, this length + * will be checked in i40e only. Others assume 40 bytes to be used as before. + * + * The *rss_hf* field of the *rss_conf* structure indicates the different + * types of IPv4/IPv6 packets to which the RSS hashing must be applied. + * Supplying an *rss_hf* equal to zero disables the RSS feature. + */ +struct rte_eth_rss_conf { + uint8_t *rss_key; /**< If not NULL, 40-byte hash key. */ + uint8_t rss_key_len; /**< hash key length in bytes. */ + uint64_t rss_hf; /**< Hash functions to apply - see below. */ +}; + +/* + * The RSS offload types are defined based on flow types which are defined + * in rte_eth_ctrl.h. Different NIC hardwares may support different RSS offload + * types. The supported flow types or RSS offload types can be queried by + * rte_eth_dev_info_get(). + */ +#define ETH_RSS_IPV4 (1ULL << RTE_ETH_FLOW_IPV4) +#define ETH_RSS_FRAG_IPV4 (1ULL << RTE_ETH_FLOW_FRAG_IPV4) +#define ETH_RSS_NONFRAG_IPV4_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_TCP) +#define ETH_RSS_NONFRAG_IPV4_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_UDP) +#define ETH_RSS_NONFRAG_IPV4_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_SCTP) +#define ETH_RSS_NONFRAG_IPV4_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) +#define ETH_RSS_IPV6 (1ULL << RTE_ETH_FLOW_IPV6) +#define ETH_RSS_FRAG_IPV6 (1ULL << RTE_ETH_FLOW_FRAG_IPV6) +#define ETH_RSS_NONFRAG_IPV6_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_TCP) +#define ETH_RSS_NONFRAG_IPV6_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_UDP) +#define ETH_RSS_NONFRAG_IPV6_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_SCTP) +#define ETH_RSS_NONFRAG_IPV6_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_OTHER) +#define ETH_RSS_L2_PAYLOAD (1ULL << RTE_ETH_FLOW_L2_PAYLOAD) +#define ETH_RSS_IPV6_EX (1ULL << RTE_ETH_FLOW_IPV6_EX) +#define ETH_RSS_IPV6_TCP_EX (1ULL << RTE_ETH_FLOW_IPV6_TCP_EX) +#define ETH_RSS_IPV6_UDP_EX (1ULL << RTE_ETH_FLOW_IPV6_UDP_EX) +#define ETH_RSS_PORT (1ULL << RTE_ETH_FLOW_PORT) +#define ETH_RSS_VXLAN (1ULL << RTE_ETH_FLOW_VXLAN) +#define ETH_RSS_GENEVE (1ULL << RTE_ETH_FLOW_GENEVE) +#define ETH_RSS_NVGRE (1ULL << RTE_ETH_FLOW_NVGRE) + +#define ETH_RSS_IP ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_FRAG_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_OTHER | \ + ETH_RSS_IPV6 | \ + ETH_RSS_FRAG_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_OTHER | \ + ETH_RSS_IPV6_EX) + +#define ETH_RSS_UDP ( \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_IPV6_UDP_EX) + +#define ETH_RSS_TCP ( \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_IPV6_TCP_EX) + +#define ETH_RSS_SCTP ( \ + ETH_RSS_NONFRAG_IPV4_SCTP | \ + ETH_RSS_NONFRAG_IPV6_SCTP) + +#define ETH_RSS_TUNNEL ( \ + ETH_RSS_VXLAN | \ + ETH_RSS_GENEVE | \ + ETH_RSS_NVGRE) + +/**< Mask of valid RSS hash protocols */ +#define ETH_RSS_PROTO_MASK ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_FRAG_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV4_SCTP | \ + ETH_RSS_NONFRAG_IPV4_OTHER | \ + ETH_RSS_IPV6 | \ + ETH_RSS_FRAG_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_NONFRAG_IPV6_SCTP | \ + ETH_RSS_NONFRAG_IPV6_OTHER | \ + ETH_RSS_L2_PAYLOAD | \ + ETH_RSS_IPV6_EX | \ + ETH_RSS_IPV6_TCP_EX | \ + ETH_RSS_IPV6_UDP_EX | \ + ETH_RSS_PORT | \ + ETH_RSS_VXLAN | \ + ETH_RSS_GENEVE | \ + ETH_RSS_NVGRE) + +/* + * Definitions used for redirection table entry size. + * Some RSS RETA sizes may not be supported by some drivers, check the + * documentation or the description of relevant functions for more details. + */ +#define ETH_RSS_RETA_SIZE_64 64 +#define ETH_RSS_RETA_SIZE_128 128 +#define ETH_RSS_RETA_SIZE_256 256 +#define ETH_RSS_RETA_SIZE_512 512 +#define RTE_RETA_GROUP_SIZE 64 + +/* Definitions used for VMDQ and DCB functionality */ +#define ETH_VMDQ_MAX_VLAN_FILTERS 64 /**< Maximum nb. of VMDQ vlan filters. */ +#define ETH_DCB_NUM_USER_PRIORITIES 8 /**< Maximum nb. of DCB priorities. */ +#define ETH_VMDQ_DCB_NUM_QUEUES 128 /**< Maximum nb. of VMDQ DCB queues. */ +#define ETH_DCB_NUM_QUEUES 128 /**< Maximum nb. of DCB queues. */ + +/* DCB capability defines */ +#define ETH_DCB_PG_SUPPORT 0x00000001 /**< Priority Group(ETS) support. */ +#define ETH_DCB_PFC_SUPPORT 0x00000002 /**< Priority Flow Control support. */ + +/* Definitions used for VLAN Offload functionality */ +#define ETH_VLAN_STRIP_OFFLOAD 0x0001 /**< VLAN Strip On/Off */ +#define ETH_VLAN_FILTER_OFFLOAD 0x0002 /**< VLAN Filter On/Off */ +#define ETH_VLAN_EXTEND_OFFLOAD 0x0004 /**< VLAN Extend On/Off */ + +/* Definitions used for mask VLAN setting */ +#define ETH_VLAN_STRIP_MASK 0x0001 /**< VLAN Strip setting mask */ +#define ETH_VLAN_FILTER_MASK 0x0002 /**< VLAN Filter setting mask*/ +#define ETH_VLAN_EXTEND_MASK 0x0004 /**< VLAN Extend setting mask*/ +#define ETH_VLAN_ID_MAX 0x0FFF /**< VLAN ID is in lower 12 bits*/ + +/* Definitions used for receive MAC address */ +#define ETH_NUM_RECEIVE_MAC_ADDR 128 /**< Maximum nb. of receive mac addr. */ + +/* Definitions used for unicast hash */ +#define ETH_VMDQ_NUM_UC_HASH_ARRAY 128 /**< Maximum nb. of UC hash array. */ + +/* Definitions used for VMDQ pool rx mode setting */ +#define ETH_VMDQ_ACCEPT_UNTAG 0x0001 /**< accept untagged packets. */ +#define ETH_VMDQ_ACCEPT_HASH_MC 0x0002 /**< accept packets in multicast table . */ +#define ETH_VMDQ_ACCEPT_HASH_UC 0x0004 /**< accept packets in unicast table. */ +#define ETH_VMDQ_ACCEPT_BROADCAST 0x0008 /**< accept broadcast packets. */ +#define ETH_VMDQ_ACCEPT_MULTICAST 0x0010 /**< multicast promiscuous. */ + +/** Maximum nb. of vlan per mirror rule */ +#define ETH_MIRROR_MAX_VLANS 64 + +#define ETH_MIRROR_VIRTUAL_POOL_UP 0x01 /**< Virtual Pool uplink Mirroring. */ +#define ETH_MIRROR_UPLINK_PORT 0x02 /**< Uplink Port Mirroring. */ +#define ETH_MIRROR_DOWNLINK_PORT 0x04 /**< Downlink Port Mirroring. */ +#define ETH_MIRROR_VLAN 0x08 /**< VLAN Mirroring. */ +#define ETH_MIRROR_VIRTUAL_POOL_DOWN 0x10 /**< Virtual Pool downlink Mirroring. */ + +/** + * A structure used to configure VLAN traffic mirror of an Ethernet port. + */ +struct rte_eth_vlan_mirror { + uint64_t vlan_mask; /**< mask for valid VLAN ID. */ + /** VLAN ID list for vlan mirroring. */ + uint16_t vlan_id[ETH_MIRROR_MAX_VLANS]; +}; + +/** + * A structure used to configure traffic mirror of an Ethernet port. + */ +struct rte_eth_mirror_conf { + uint8_t rule_type; /**< Mirroring rule type */ + uint8_t dst_pool; /**< Destination pool for this mirror rule. */ + uint64_t pool_mask; /**< Bitmap of pool for pool mirroring */ + /** VLAN ID setting for VLAN mirroring. */ + struct rte_eth_vlan_mirror vlan; +}; + +/** + * A structure used to configure 64 entries of Redirection Table of the + * Receive Side Scaling (RSS) feature of an Ethernet port. To configure + * more than 64 entries supported by hardware, an array of this structure + * is needed. + */ +struct rte_eth_rss_reta_entry64 { + uint64_t mask; + /**< Mask bits indicate which entries need to be updated/queried. */ + uint16_t reta[RTE_RETA_GROUP_SIZE]; + /**< Group of 64 redirection table entries. */ +}; + +/** + * This enum indicates the possible number of traffic classes + * in DCB configurations + */ +enum rte_eth_nb_tcs { + ETH_4_TCS = 4, /**< 4 TCs with DCB. */ + ETH_8_TCS = 8 /**< 8 TCs with DCB. */ +}; + +/** + * This enum indicates the possible number of queue pools + * in VMDQ configurations. + */ +enum rte_eth_nb_pools { + ETH_8_POOLS = 8, /**< 8 VMDq pools. */ + ETH_16_POOLS = 16, /**< 16 VMDq pools. */ + ETH_32_POOLS = 32, /**< 32 VMDq pools. */ + ETH_64_POOLS = 64 /**< 64 VMDq pools. */ +}; + +/* This structure may be extended in future. */ +struct rte_eth_dcb_rx_conf { + enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_vmdq_dcb_tx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools. */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_dcb_tx_conf { + enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs. */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_vmdq_tx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< VMDq mode, 64 pools. */ +}; + +/** + * A structure used to configure the VMDQ+DCB feature + * of an Ethernet port. + * + * Using this feature, packets are routed to a pool of queues, based + * on the vlan id in the vlan tag, and then to a specific queue within + * that pool, using the user priority vlan tag field. + * + * A default pool may be used, if desired, to route all traffic which + * does not match the vlan filter rules. + */ +struct rte_eth_vmdq_dcb_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools */ + uint8_t enable_default_pool; /**< If non-zero, use a default pool */ + uint8_t default_pool; /**< The default pool, if applicable */ + uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ + struct { + uint16_t vlan_id; /**< The vlan id of the received frame */ + uint64_t pools; /**< Bitmask of pools for packet rx */ + } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; + /**< Selects a queue in a pool */ +}; + +/** + * A structure used to configure the VMDQ feature of an Ethernet port when + * not combined with the DCB feature. + * + * Using this feature, packets are routed to a pool of queues. By default, + * the pool selection is based on the MAC address, the vlan id in the + * vlan tag as specified in the pool_map array. + * Passing the ETH_VMDQ_ACCEPT_UNTAG in the rx_mode field allows pool + * selection using only the MAC address. MAC address to pool mapping is done + * using the rte_eth_dev_mac_addr_add function, with the pool parameter + * corresponding to the pool id. + * + * Queue selection within the selected pool will be done using RSS when + * it is enabled or revert to the first queue of the pool if not. + * + * A default pool may be used, if desired, to route all traffic which + * does not match the vlan filter rules or any pool MAC address. + */ +struct rte_eth_vmdq_rx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< VMDq only mode, 8 or 64 pools */ + uint8_t enable_default_pool; /**< If non-zero, use a default pool */ + uint8_t default_pool; /**< The default pool, if applicable */ + uint8_t enable_loop_back; /**< Enable VT loop back */ + uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ + uint32_t rx_mode; /**< Flags from ETH_VMDQ_ACCEPT_* */ + struct { + uint16_t vlan_id; /**< The vlan id of the received frame */ + uint64_t pools; /**< Bitmask of pools for packet rx */ + } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ +}; + +/** + * A structure used to configure the TX features of an Ethernet port. + */ +struct rte_eth_txmode { + enum rte_eth_tx_mq_mode mq_mode; /**< TX multi-queues mode. */ + /** + * Per-port Tx offloads to be set using DEV_TX_OFFLOAD_* flags. + * Only offloads set on tx_offload_capa field on rte_eth_dev_info + * structure are allowed to be set. + */ + uint64_t offloads; + + /* For i40e specifically */ + uint16_t pvid; + __extension__ + uint8_t hw_vlan_reject_tagged : 1, + /**< If set, reject sending out tagged pkts */ + hw_vlan_reject_untagged : 1, + /**< If set, reject sending out untagged pkts */ + hw_vlan_insert_pvid : 1; + /**< If set, enable port based VLAN insertion */ +}; + +/** + * A structure used to configure an RX ring of an Ethernet port. + */ +struct rte_eth_rxconf { + struct rte_eth_thresh rx_thresh; /**< RX ring threshold registers. */ + uint16_t rx_free_thresh; /**< Drives the freeing of RX descriptors. */ + uint8_t rx_drop_en; /**< Drop packets if no descriptors are available. */ + uint8_t rx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ + /** + * Per-queue Rx offloads to be set using DEV_RX_OFFLOAD_* flags. + * Only offloads set on rx_queue_offload_capa or rx_offload_capa + * fields on rte_eth_dev_info structure are allowed to be set. + */ + uint64_t offloads; +}; + +/** + * A structure used to configure a TX ring of an Ethernet port. + */ +struct rte_eth_txconf { + struct rte_eth_thresh tx_thresh; /**< TX ring threshold registers. */ + uint16_t tx_rs_thresh; /**< Drives the setting of RS bit on TXDs. */ + uint16_t tx_free_thresh; /**< Start freeing TX buffers if there are + less free descriptors than this value. */ + + uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ + /** + * Per-queue Tx offloads to be set using DEV_TX_OFFLOAD_* flags. + * Only offloads set on tx_queue_offload_capa or tx_offload_capa + * fields on rte_eth_dev_info structure are allowed to be set. + */ + uint64_t offloads; +}; + +/** + * A structure contains information about HW descriptor ring limitations. + */ +struct rte_eth_desc_lim { + uint16_t nb_max; /**< Max allowed number of descriptors. */ + uint16_t nb_min; /**< Min allowed number of descriptors. */ + uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + + /** + * Max allowed number of segments per whole packet. + * + * - For TSO packet this is the total number of data descriptors allowed + * by device. + * + * @see nb_mtu_seg_max + */ + uint16_t nb_seg_max; + + /** + * Max number of segments per one MTU. + * + * - For non-TSO packet, this is the maximum allowed number of segments + * in a single transmit packet. + * + * - For TSO packet each segment within the TSO may span up to this + * value. + * + * @see nb_seg_max + */ + uint16_t nb_mtu_seg_max; +}; + +/** + * This enum indicates the flow control mode + */ +enum rte_eth_fc_mode { + RTE_FC_NONE = 0, /**< Disable flow control. */ + RTE_FC_RX_PAUSE, /**< RX pause frame, enable flowctrl on TX side. */ + RTE_FC_TX_PAUSE, /**< TX pause frame, enable flowctrl on RX side. */ + RTE_FC_FULL /**< Enable flow control on both side. */ +}; + +/** + * A structure used to configure Ethernet flow control parameter. + * These parameters will be configured into the register of the NIC. + * Please refer to the corresponding data sheet for proper value. + */ +struct rte_eth_fc_conf { + uint32_t high_water; /**< High threshold value to trigger XOFF */ + uint32_t low_water; /**< Low threshold value to trigger XON */ + uint16_t pause_time; /**< Pause quota in the Pause frame */ + uint16_t send_xon; /**< Is XON frame need be sent */ + enum rte_eth_fc_mode mode; /**< Link flow control mode */ + uint8_t mac_ctrl_frame_fwd; /**< Forward MAC control frames */ + uint8_t autoneg; /**< Use Pause autoneg */ +}; + +/** + * A structure used to configure Ethernet priority flow control parameter. + * These parameters will be configured into the register of the NIC. + * Please refer to the corresponding data sheet for proper value. + */ +struct rte_eth_pfc_conf { + struct rte_eth_fc_conf fc; /**< General flow control parameter. */ + uint8_t priority; /**< VLAN User Priority. */ +}; + +/** + * Memory space that can be configured to store Flow Director filters + * in the board memory. + */ +enum rte_fdir_pballoc_type { + RTE_FDIR_PBALLOC_64K = 0, /**< 64k. */ + RTE_FDIR_PBALLOC_128K, /**< 128k. */ + RTE_FDIR_PBALLOC_256K, /**< 256k. */ +}; + +/** + * Select report mode of FDIR hash information in RX descriptors. + */ +enum rte_fdir_status_mode { + RTE_FDIR_NO_REPORT_STATUS = 0, /**< Never report FDIR hash. */ + RTE_FDIR_REPORT_STATUS, /**< Only report FDIR hash for matching pkts. */ + RTE_FDIR_REPORT_STATUS_ALWAYS, /**< Always report FDIR hash. */ +}; + +/** + * A structure used to configure the Flow Director (FDIR) feature + * of an Ethernet port. + * + * If mode is RTE_FDIR_DISABLE, the pballoc value is ignored. + */ +struct rte_fdir_conf { + enum rte_fdir_mode mode; /**< Flow Director mode. */ + enum rte_fdir_pballoc_type pballoc; /**< Space for FDIR filters. */ + enum rte_fdir_status_mode status; /**< How to report FDIR hash. */ + /** RX queue of packets matching a "drop" filter in perfect mode. */ + uint8_t drop_queue; + struct rte_eth_fdir_masks mask; + struct rte_eth_fdir_flex_conf flex_conf; + /**< Flex payload configuration. */ +}; + +/** + * UDP tunneling configuration. + * Used to config the UDP port for a type of tunnel. + * NICs need the UDP port to identify the tunnel type. + * Normally a type of tunnel has a default UDP port, this structure can be used + * in case if the users want to change or support more UDP port. + */ +struct rte_eth_udp_tunnel { + uint16_t udp_port; /**< UDP port used for the tunnel. */ + uint8_t prot_type; /**< Tunnel type. Defined in rte_eth_tunnel_type. */ +}; + +/** + * A structure used to enable/disable specific device interrupts. + */ +struct rte_intr_conf { + /** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */ + uint32_t lsc:1; + /** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */ + uint32_t rxq:1; + /** enable/disable rmv interrupt. 0 (default) - disable, 1 enable */ + uint32_t rmv:1; +}; + +/** + * A structure used to configure an Ethernet port. + * Depending upon the RX multi-queue mode, extra advanced + * configuration settings may be needed. + */ +struct rte_eth_conf { + uint32_t link_speeds; /**< bitmap of ETH_LINK_SPEED_XXX of speeds to be + used. ETH_LINK_SPEED_FIXED disables link + autonegotiation, and a unique speed shall be + set. Otherwise, the bitmap defines the set of + speeds to be advertised. If the special value + ETH_LINK_SPEED_AUTONEG (0) is used, all speeds + supported are advertised. */ + struct rte_eth_rxmode rxmode; /**< Port RX configuration. */ + struct rte_eth_txmode txmode; /**< Port TX configuration. */ + uint32_t lpbk_mode; /**< Loopback operation mode. By default the value + is 0, meaning the loopback mode is disabled. + Read the datasheet of given ethernet controller + for details. The possible values of this field + are defined in implementation of each driver. */ + struct { + struct rte_eth_rss_conf rss_conf; /**< Port RSS configuration */ + struct rte_eth_vmdq_dcb_conf vmdq_dcb_conf; + /**< Port vmdq+dcb configuration. */ + struct rte_eth_dcb_rx_conf dcb_rx_conf; + /**< Port dcb RX configuration. */ + struct rte_eth_vmdq_rx_conf vmdq_rx_conf; + /**< Port vmdq RX configuration. */ + } rx_adv_conf; /**< Port RX filtering configuration. */ + union { + struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf; + /**< Port vmdq+dcb TX configuration. */ + struct rte_eth_dcb_tx_conf dcb_tx_conf; + /**< Port dcb TX configuration. */ + struct rte_eth_vmdq_tx_conf vmdq_tx_conf; + /**< Port vmdq TX configuration. */ + } tx_adv_conf; /**< Port TX DCB configuration (union). */ + /** Currently,Priority Flow Control(PFC) are supported,if DCB with PFC + is needed,and the variable must be set ETH_DCB_PFC_SUPPORT. */ + uint32_t dcb_capability_en; + struct rte_fdir_conf fdir_conf; /**< FDIR configuration. */ + struct rte_intr_conf intr_conf; /**< Interrupt mode configuration. */ +}; + +/** + * A structure used to retrieve the contextual information of + * an Ethernet device, such as the controlling driver of the device, + * its PCI context, etc... + */ + +/** + * RX offload capabilities of a device. + */ +#define DEV_RX_OFFLOAD_VLAN_STRIP 0x00000001 +#define DEV_RX_OFFLOAD_IPV4_CKSUM 0x00000002 +#define DEV_RX_OFFLOAD_UDP_CKSUM 0x00000004 +#define DEV_RX_OFFLOAD_TCP_CKSUM 0x00000008 +#define DEV_RX_OFFLOAD_TCP_LRO 0x00000010 +#define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020 +#define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040 +#define DEV_RX_OFFLOAD_MACSEC_STRIP 0x00000080 +#define DEV_RX_OFFLOAD_HEADER_SPLIT 0x00000100 +#define DEV_RX_OFFLOAD_VLAN_FILTER 0x00000200 +#define DEV_RX_OFFLOAD_VLAN_EXTEND 0x00000400 +#define DEV_RX_OFFLOAD_JUMBO_FRAME 0x00000800 +#define DEV_RX_OFFLOAD_CRC_STRIP 0x00001000 +#define DEV_RX_OFFLOAD_SCATTER 0x00002000 +#define DEV_RX_OFFLOAD_TIMESTAMP 0x00004000 +#define DEV_RX_OFFLOAD_SECURITY 0x00008000 + +/** + * Invalid to set both DEV_RX_OFFLOAD_CRC_STRIP and DEV_RX_OFFLOAD_KEEP_CRC + * No DEV_RX_OFFLOAD_CRC_STRIP flag means keep CRC + */ +#define DEV_RX_OFFLOAD_KEEP_CRC 0x00010000 +#define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \ + DEV_RX_OFFLOAD_UDP_CKSUM | \ + DEV_RX_OFFLOAD_TCP_CKSUM) +#define DEV_RX_OFFLOAD_VLAN (DEV_RX_OFFLOAD_VLAN_STRIP | \ + DEV_RX_OFFLOAD_VLAN_FILTER | \ + DEV_RX_OFFLOAD_VLAN_EXTEND) + +/* + * If new Rx offload capabilities are defined, they also must be + * mentioned in rte_rx_offload_names in rte_ethdev.c file. + */ + +/** + * TX offload capabilities of a device. + */ +#define DEV_TX_OFFLOAD_VLAN_INSERT 0x00000001 +#define DEV_TX_OFFLOAD_IPV4_CKSUM 0x00000002 +#define DEV_TX_OFFLOAD_UDP_CKSUM 0x00000004 +#define DEV_TX_OFFLOAD_TCP_CKSUM 0x00000008 +#define DEV_TX_OFFLOAD_SCTP_CKSUM 0x00000010 +#define DEV_TX_OFFLOAD_TCP_TSO 0x00000020 +#define DEV_TX_OFFLOAD_UDP_TSO 0x00000040 +#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100 +#define DEV_TX_OFFLOAD_VXLAN_TNL_TSO 0x00000200 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GRE_TNL_TSO 0x00000400 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_MACSEC_INSERT 0x00002000 +#define DEV_TX_OFFLOAD_MT_LOCKFREE 0x00004000 +/**< Multiple threads can invoke rte_eth_tx_burst() concurrently on the same + * tx queue without SW lock. + */ +#define DEV_TX_OFFLOAD_MULTI_SEGS 0x00008000 +/**< Device supports multi segment send. */ +#define DEV_TX_OFFLOAD_MBUF_FAST_FREE 0x00010000 +/**< Device supports optimization for fast release of mbufs. + * When set application must guarantee that per-queue all mbufs comes from + * the same mempool and has refcnt = 1. + */ +#define DEV_TX_OFFLOAD_SECURITY 0x00020000 +/** + * Device supports generic UDP tunneled packet TSO. + * Application must set PKT_TX_TUNNEL_UDP and other mbuf fields required + * for tunnel TSO. + */ +#define DEV_TX_OFFLOAD_UDP_TNL_TSO 0x00040000 +/** + * Device supports generic IP tunneled packet TSO. + * Application must set PKT_TX_TUNNEL_IP and other mbuf fields required + * for tunnel TSO. + */ +#define DEV_TX_OFFLOAD_IP_TNL_TSO 0x00080000 + +#define RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP 0x00000001 +/**< Device supports Rx queue setup after device started*/ +#define RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP 0x00000002 +/**< Device supports Tx queue setup after device started*/ + +/* + * If new Tx offload capabilities are defined, they also must be + * mentioned in rte_tx_offload_names in rte_ethdev.c file. + */ + +/* + * Fallback default preferred Rx/Tx port parameters. + * These are used if an application requests default parameters + * but the PMD does not provide preferred values. + */ +#define RTE_ETH_DEV_FALLBACK_RX_RINGSIZE 512 +#define RTE_ETH_DEV_FALLBACK_TX_RINGSIZE 512 +#define RTE_ETH_DEV_FALLBACK_RX_NBQUEUES 1 +#define RTE_ETH_DEV_FALLBACK_TX_NBQUEUES 1 + +/** + * Preferred Rx/Tx port parameters. + * There are separate instances of this structure for transmission + * and reception respectively. + */ +struct rte_eth_dev_portconf { + uint16_t burst_size; /**< Device-preferred burst size */ + uint16_t ring_size; /**< Device-preferred size of queue rings */ + uint16_t nb_queues; /**< Device-preferred number of queues */ +}; + +/** + * Default values for switch domain id when ethdev does not support switch + * domain definitions. + */ +#define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID (0) + +/** + * Ethernet device associated switch information + */ +struct rte_eth_switch_info { + const char *name; /**< switch name */ + uint16_t domain_id; /**< switch domain id */ + uint16_t port_id; + /**< + * mapping to the devices physical switch port as enumerated from the + * perspective of the embedded interconnect/switch. For SR-IOV enabled + * device this may correspond to the VF_ID of each virtual function, + * but each driver should explicitly define the mapping of switch + * port identifier to that physical interconnect/switch + */ +}; + +/** + * Ethernet device information + */ +struct rte_eth_dev_info { + struct rte_device *device; /** Generic device information */ + const char *driver_name; /**< Device Driver name. */ + unsigned int if_index; /**< Index to bound host interface, or 0 if none. + Use if_indextoname() to translate into an interface name. */ + const uint32_t *dev_flags; /**< Device flags */ + uint32_t min_rx_bufsize; /**< Minimum size of RX buffer. */ + uint32_t max_rx_pktlen; /**< Maximum configurable length of RX pkt. */ + uint16_t max_rx_queues; /**< Maximum number of RX queues. */ + uint16_t max_tx_queues; /**< Maximum number of TX queues. */ + uint32_t max_mac_addrs; /**< Maximum number of MAC addresses. */ + uint32_t max_hash_mac_addrs; + /** Maximum number of hash MAC addresses for MTA and UTA. */ + uint16_t max_vfs; /**< Maximum number of VFs. */ + uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */ + uint64_t rx_offload_capa; + /**< All RX offload capabilities including all per-queue ones */ + uint64_t tx_offload_capa; + /**< All TX offload capabilities including all per-queue ones */ + uint64_t rx_queue_offload_capa; + /**< Device per-queue RX offload capabilities. */ + uint64_t tx_queue_offload_capa; + /**< Device per-queue TX offload capabilities. */ + uint16_t reta_size; + /**< Device redirection table size, the total number of entries. */ + uint8_t hash_key_size; /**< Hash key size in bytes */ + /** Bit mask of RSS offloads, the bit offset also means flow type */ + uint64_t flow_type_rss_offloads; + struct rte_eth_rxconf default_rxconf; /**< Default RX configuration */ + struct rte_eth_txconf default_txconf; /**< Default TX configuration */ + uint16_t vmdq_queue_base; /**< First queue ID for VMDQ pools. */ + uint16_t vmdq_queue_num; /**< Queue number for VMDQ pools. */ + uint16_t vmdq_pool_base; /**< First ID of VMDQ pools. */ + struct rte_eth_desc_lim rx_desc_lim; /**< RX descriptors limits */ + struct rte_eth_desc_lim tx_desc_lim; /**< TX descriptors limits */ + uint32_t speed_capa; /**< Supported speeds bitmap (ETH_LINK_SPEED_). */ + /** Configured number of rx/tx queues */ + uint16_t nb_rx_queues; /**< Number of RX queues. */ + uint16_t nb_tx_queues; /**< Number of TX queues. */ + /** Rx parameter recommendations */ + struct rte_eth_dev_portconf default_rxportconf; + /** Tx parameter recommendations */ + struct rte_eth_dev_portconf default_txportconf; + /** Generic device capabilities (RTE_ETH_DEV_CAPA_). */ + uint64_t dev_capa; + /** + * Switching information for ports on a device with a + * embedded managed interconnect/switch. + */ + struct rte_eth_switch_info switch_info; +}; + +/** + * Ethernet device RX queue information structure. + * Used to retieve information about configured queue. + */ +struct rte_eth_rxq_info { + struct rte_mempool *mp; /**< mempool used by that queue. */ + struct rte_eth_rxconf conf; /**< queue config parameters. */ + uint8_t scattered_rx; /**< scattered packets RX supported. */ + uint16_t nb_desc; /**< configured number of RXDs. */ +} __rte_cache_min_aligned; + +/** + * Ethernet device TX queue information structure. + * Used to retrieve information about configured queue. + */ +struct rte_eth_txq_info { + struct rte_eth_txconf conf; /**< queue config parameters. */ + uint16_t nb_desc; /**< configured number of TXDs. */ +} __rte_cache_min_aligned; + +/** Maximum name length for extended statistics counters */ +#define RTE_ETH_XSTATS_NAME_SIZE 64 + +/** + * An Ethernet device extended statistic structure + * + * This structure is used by rte_eth_xstats_get() to provide + * statistics that are not provided in the generic *rte_eth_stats* + * structure. + * It maps a name id, corresponding to an index in the array returned + * by rte_eth_xstats_get_names(), to a statistic value. + */ +struct rte_eth_xstat { + uint64_t id; /**< The index in xstats name array. */ + uint64_t value; /**< The statistic counter value. */ +}; + +/** + * A name element for extended statistics. + * + * An array of this structure is returned by rte_eth_xstats_get_names(). + * It lists the names of extended statistics for a PMD. The *rte_eth_xstat* + * structure references these names by their array index. + */ +struct rte_eth_xstat_name { + char name[RTE_ETH_XSTATS_NAME_SIZE]; /**< The statistic name. */ +}; + +#define ETH_DCB_NUM_TCS 8 +#define ETH_MAX_VMDQ_POOL 64 + +/** + * A structure used to get the information of queue and + * TC mapping on both TX and RX paths. + */ +struct rte_eth_dcb_tc_queue_mapping { + /** rx queues assigned to tc per Pool */ + struct { + uint8_t base; + uint8_t nb_queue; + } tc_rxq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; + /** rx queues assigned to tc per Pool */ + struct { + uint8_t base; + uint8_t nb_queue; + } tc_txq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; +}; + +/** + * A structure used to get the information of DCB. + * It includes TC UP mapping and queue TC mapping. + */ +struct rte_eth_dcb_info { + uint8_t nb_tcs; /**< number of TCs */ + uint8_t prio_tc[ETH_DCB_NUM_USER_PRIORITIES]; /**< Priority to tc */ + uint8_t tc_bws[ETH_DCB_NUM_TCS]; /**< TX BW percentage for each TC */ + /** rx queues assigned to tc */ + struct rte_eth_dcb_tc_queue_mapping tc_queue; +}; + +/** + * RX/TX queue states + */ +#define RTE_ETH_QUEUE_STATE_STOPPED 0 +#define RTE_ETH_QUEUE_STATE_STARTED 1 + +#define RTE_ETH_ALL RTE_MAX_ETHPORTS + +/* Macros to check for valid port */ +#define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \ + if (!rte_eth_dev_is_valid_port(port_id)) { \ + RTE_ETHDEV_LOG(ERR, "Invalid port_id=%u\n", port_id); \ + return retval; \ + } \ +} while (0) + +#define RTE_ETH_VALID_PORTID_OR_RET(port_id) do { \ + if (!rte_eth_dev_is_valid_port(port_id)) { \ + RTE_ETHDEV_LOG(ERR, "Invalid port_id=%u\n", port_id); \ + return; \ + } \ +} while (0) + +/** + * l2 tunnel configuration. + */ + +/**< l2 tunnel enable mask */ +#define ETH_L2_TUNNEL_ENABLE_MASK 0x00000001 +/**< l2 tunnel insertion mask */ +#define ETH_L2_TUNNEL_INSERTION_MASK 0x00000002 +/**< l2 tunnel stripping mask */ +#define ETH_L2_TUNNEL_STRIPPING_MASK 0x00000004 +/**< l2 tunnel forwarding mask */ +#define ETH_L2_TUNNEL_FORWARDING_MASK 0x00000008 + +/** + * Function type used for RX packet processing packet callbacks. + * + * The callback function is called on RX with a burst of packets that have + * been received on the given port and queue. + * + * @param port_id + * The Ethernet port on which RX is being performed. + * @param queue + * The queue on the Ethernet port which is being used to receive the packets. + * @param pkts + * The burst of packets that have just been received. + * @param nb_pkts + * The number of packets in the burst pointed to by "pkts". + * @param max_pkts + * The max number of packets that can be stored in the "pkts" array. + * @param user_param + * The arbitrary user parameter passed in by the application when the callback + * was originally configured. + * @return + * The number of packets returned to the user. + */ +typedef uint16_t (*rte_rx_callback_fn)(uint16_t port_id, uint16_t queue, + struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t max_pkts, + void *user_param); + +/** + * Function type used for TX packet processing packet callbacks. + * + * The callback function is called on TX with a burst of packets immediately + * before the packets are put onto the hardware queue for transmission. + * + * @param port_id + * The Ethernet port on which TX is being performed. + * @param queue + * The queue on the Ethernet port which is being used to transmit the packets. + * @param pkts + * The burst of packets that are about to be transmitted. + * @param nb_pkts + * The number of packets in the burst pointed to by "pkts". + * @param user_param + * The arbitrary user parameter passed in by the application when the callback + * was originally configured. + * @return + * The number of packets to be written to the NIC. + */ +typedef uint16_t (*rte_tx_callback_fn)(uint16_t port_id, uint16_t queue, + struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param); + +/** + * Possible states of an ethdev port. + */ +enum rte_eth_dev_state { + /** Device is unused before being probed. */ + RTE_ETH_DEV_UNUSED = 0, + /** Device is attached when allocated in probing. */ + RTE_ETH_DEV_ATTACHED, + /** The deferred state is useless and replaced by ownership. */ + RTE_ETH_DEV_DEFERRED, + /** Device is in removed state when plug-out is detected. */ + RTE_ETH_DEV_REMOVED, +}; + +struct rte_eth_dev_sriov { + uint8_t active; /**< SRIOV is active with 16, 32 or 64 pools */ + uint8_t nb_q_per_pool; /**< rx queue number per pool */ + uint16_t def_vmdq_idx; /**< Default pool num used for PF */ + uint16_t def_pool_q_idx; /**< Default pool queue start reg index */ +}; +#define RTE_ETH_DEV_SRIOV(dev) ((dev)->data->sriov) + +#define RTE_ETH_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN + +#define RTE_ETH_DEV_NO_OWNER 0 + +#define RTE_ETH_MAX_OWNER_NAME_LEN 64 + +struct rte_eth_dev_owner { + uint64_t id; /**< The owner unique identifier. */ + char name[RTE_ETH_MAX_OWNER_NAME_LEN]; /**< The owner name. */ +}; + +/** Device supports link state interrupt */ +#define RTE_ETH_DEV_INTR_LSC 0x0002 +/** Device is a bonded slave */ +#define RTE_ETH_DEV_BONDED_SLAVE 0x0004 +/** Device supports device removal interrupt */ +#define RTE_ETH_DEV_INTR_RMV 0x0008 +/** Device is port representor */ +#define RTE_ETH_DEV_REPRESENTOR 0x0010 + +/** + * Iterates over valid ethdev ports owned by a specific owner. + * + * @param port_id + * The id of the next possible valid owned port. + * @param owner_id + * The owner identifier. + * RTE_ETH_DEV_NO_OWNER means iterate over all valid ownerless ports. + * @return + * Next valid port id owned by owner_id, RTE_MAX_ETHPORTS if there is none. + */ +uint64_t rte_eth_find_next_owned_by(uint16_t port_id, + const uint64_t owner_id); + +/** + * Macro to iterate over all enabled ethdev ports owned by a specific owner. + */ +#define RTE_ETH_FOREACH_DEV_OWNED_BY(p, o) \ + for (p = rte_eth_find_next_owned_by(0, o); \ + (unsigned int)p < (unsigned int)RTE_MAX_ETHPORTS; \ + p = rte_eth_find_next_owned_by(p + 1, o)) + +/** + * Iterates over valid ethdev ports. + * + * @param port_id + * The id of the next possible valid port. + * @return + * Next valid port id, RTE_MAX_ETHPORTS if there is none. + */ +uint16_t rte_eth_find_next(uint16_t port_id); + +/** + * Macro to iterate over all enabled and ownerless ethdev ports. + */ +#define RTE_ETH_FOREACH_DEV(p) \ + RTE_ETH_FOREACH_DEV_OWNED_BY(p, RTE_ETH_DEV_NO_OWNER) + + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Get a new unique owner identifier. + * An owner identifier is used to owns Ethernet devices by only one DPDK entity + * to avoid multiple management of device by different entities. + * + * @param owner_id + * Owner identifier pointer. + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental rte_eth_dev_owner_new(uint64_t *owner_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Set an Ethernet device owner. + * + * @param port_id + * The identifier of the port to own. + * @param owner + * The owner pointer. + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental rte_eth_dev_owner_set(const uint16_t port_id, + const struct rte_eth_dev_owner *owner); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Unset Ethernet device owner to make the device ownerless. + * + * @param port_id + * The identifier of port to make ownerless. + * @param owner_id + * The owner identifier. + * @return + * 0 on success, negative errno value on error. + */ +int __rte_experimental rte_eth_dev_owner_unset(const uint16_t port_id, + const uint64_t owner_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Remove owner from all Ethernet devices owned by a specific owner. + * + * @param owner_id + * The owner identifier. + */ +void __rte_experimental rte_eth_dev_owner_delete(const uint64_t owner_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Get the owner of an Ethernet device. + * + * @param port_id + * The port identifier. + * @param owner + * The owner structure pointer to fill. + * @return + * 0 on success, negative errno value on error.. + */ +int __rte_experimental rte_eth_dev_owner_get(const uint16_t port_id, + struct rte_eth_dev_owner *owner); + +/** + * Get the total number of Ethernet devices that have been successfully + * initialized by the matching Ethernet driver during the PCI probing phase + * and that are available for applications to use. These devices must be + * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with + * non-contiguous ranges of devices. + * These non-contiguous ranges can be created by calls to hotplug functions or + * by some PMDs. + * + * @return + * - The total number of usable Ethernet devices. + */ +__rte_deprecated +uint16_t rte_eth_dev_count(void); + +/** + * Get the number of ports which are usable for the application. + * + * These devices must be iterated by using the macro + * ``RTE_ETH_FOREACH_DEV`` or ``RTE_ETH_FOREACH_DEV_OWNED_BY`` + * to deal with non-contiguous ranges of devices. + * + * @return + * The count of available Ethernet devices. + */ +uint16_t rte_eth_dev_count_avail(void); + +/** + * Get the total number of ports which are allocated. + * + * Some devices may not be available for the application. + * + * @return + * The total count of Ethernet devices. + */ +uint16_t __rte_experimental rte_eth_dev_count_total(void); + +/** + * Attach a new Ethernet device specified by arguments. + * + * @param devargs + * A pointer to a strings array describing the new device + * to be attached. The strings should be a pci address like + * '0000:01:00.0' or virtual device name like 'net_pcap0'. + * @param port_id + * A pointer to a port identifier actually attached. + * @return + * 0 on success and port_id is filled, negative on error + */ +__rte_deprecated +int rte_eth_dev_attach(const char *devargs, uint16_t *port_id); + +/** + * Detach a Ethernet device specified by port identifier. + * This function must be called when the device is in the + * closed state. + * + * @param port_id + * The port identifier of the device to detach. + * @param devname + * A pointer to a buffer that will be filled with the device name. + * This buffer must be at least RTE_DEV_NAME_MAX_LEN long. + * @return + * 0 on success and devname is filled, negative on error + */ +__rte_deprecated +int rte_eth_dev_detach(uint16_t port_id, char *devname); + +/** + * Convert a numerical speed in Mbps to a bitmap flag that can be used in + * the bitmap link_speeds of the struct rte_eth_conf + * + * @param speed + * Numerical speed value in Mbps + * @param duplex + * ETH_LINK_[HALF/FULL]_DUPLEX (only for 10/100M speeds) + * @return + * 0 if the speed cannot be mapped + */ +uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Get DEV_RX_OFFLOAD_* flag name. + * + * @param offload + * Offload flag. + * @return + * Offload name or 'UNKNOWN' if the flag cannot be recognised. + */ +const char * __rte_experimental rte_eth_dev_rx_offload_name(uint64_t offload); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Get DEV_TX_OFFLOAD_* flag name. + * + * @param offload + * Offload flag. + * @return + * Offload name or 'UNKNOWN' if the flag cannot be recognised. + */ +const char * __rte_experimental rte_eth_dev_tx_offload_name(uint64_t offload); + +/** + * Configure an Ethernet device. + * This function must be invoked first before any other function in the + * Ethernet API. This function can also be re-invoked when a device is in the + * stopped state. + * + * @param port_id + * The port identifier of the Ethernet device to configure. + * @param nb_rx_queue + * The number of receive queues to set up for the Ethernet device. + * @param nb_tx_queue + * The number of transmit queues to set up for the Ethernet device. + * @param eth_conf + * The pointer to the configuration data to be used for the Ethernet device. + * The *rte_eth_conf* structure includes: + * - the hardware offload features to activate, with dedicated fields for + * each statically configurable offload hardware feature provided by + * Ethernet devices, such as IP checksum or VLAN tag stripping for + * example. + * The Rx offload bitfield API is obsolete and will be deprecated. + * Applications should set the ignore_bitfield_offloads bit on *rxmode* + * structure and use offloads field to set per-port offloads instead. + * - Any offloading set in eth_conf->[rt]xmode.offloads must be within + * the [rt]x_offload_capa returned from rte_eth_dev_infos_get(). + * Any type of device supported offloading set in the input argument + * eth_conf->[rt]xmode.offloads to rte_eth_dev_configure() is enabled + * on all queues and it can't be disabled in rte_eth_[rt]x_queue_setup() + * - the Receive Side Scaling (RSS) configuration when using multiple RX + * queues per port. Any RSS hash function set in eth_conf->rss_conf.rss_hf + * must be within the flow_type_rss_offloads provided by drivers via + * rte_eth_dev_infos_get() API. + * + * Embedding all configuration information in a single data structure + * is the more flexible method that allows the addition of new features + * without changing the syntax of the API. + * @return + * - 0: Success, device configured. + * - <0: Error code returned by the driver configuration function. + */ +int rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_queue, + uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Check if an Ethernet device was physically removed. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * 1 when the Ethernet device is removed, otherwise 0. + */ +int __rte_experimental +rte_eth_dev_is_removed(uint16_t port_id); + +/** + * Allocate and set up a receive queue for an Ethernet device. + * + * The function allocates a contiguous block of memory for *nb_rx_desc* + * receive descriptors from a memory zone associated with *socket_id* + * and initializes each receive descriptor with a network buffer allocated + * from the memory pool *mb_pool*. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue to set up. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param nb_rx_desc + * The number of receive descriptors to allocate for the receive ring. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for + * the DMA memory allocated for the receive descriptors of the ring. + * @param rx_conf + * The pointer to the configuration data to be used for the receive queue. + * NULL value is allowed, in which case default RX configuration + * will be used. + * The *rx_conf* structure contains an *rx_thresh* structure with the values + * of the Prefetch, Host, and Write-Back threshold registers of the receive + * ring. + * In addition it contains the hardware offloads features to activate using + * the DEV_RX_OFFLOAD_* flags. + * If an offloading set in rx_conf->offloads + * hasn't been set in the input argument eth_conf->rxmode.offloads + * to rte_eth_dev_configure(), it is a new added offloading, it must be + * per-queue type and it is enabled for the queue. + * No need to repeat any bit in rx_conf->offloads which has already been + * enabled in rte_eth_dev_configure() at port level. An offloading enabled + * at port level can't be disabled at queue level. + * @param mb_pool + * The pointer to the memory pool from which to allocate *rte_mbuf* network + * memory buffers to populate each descriptor of the receive ring. + * @return + * - 0: Success, receive queue correctly set up. + * - -EIO: if device is removed. + * - -EINVAL: The size of network buffers which can be allocated from the + * memory pool does not fit the various buffer sizes allowed by the + * device controller. + * - -ENOMEM: Unable to allocate the receive ring descriptors or to + * allocate network memory buffers from the memory pool when + * initializing receive descriptors. + */ +int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); + +/** + * Allocate and set up a transmit queue for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tx_queue_id + * The index of the transmit queue to set up. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param nb_tx_desc + * The number of transmit descriptors to allocate for the transmit ring. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + * Its value can be *SOCKET_ID_ANY* if there is no NUMA constraint for + * the DMA memory allocated for the transmit descriptors of the ring. + * @param tx_conf + * The pointer to the configuration data to be used for the transmit queue. + * NULL value is allowed, in which case default TX configuration + * will be used. + * The *tx_conf* structure contains the following data: + * - The *tx_thresh* structure with the values of the Prefetch, Host, and + * Write-Back threshold registers of the transmit ring. + * When setting Write-Back threshold to the value greater then zero, + * *tx_rs_thresh* value should be explicitly set to one. + * - The *tx_free_thresh* value indicates the [minimum] number of network + * buffers that must be pending in the transmit ring to trigger their + * [implicit] freeing by the driver transmit function. + * - The *tx_rs_thresh* value indicates the [minimum] number of transmit + * descriptors that must be pending in the transmit ring before setting the + * RS bit on a descriptor by the driver transmit function. + * The *tx_rs_thresh* value should be less or equal then + * *tx_free_thresh* value, and both of them should be less then + * *nb_tx_desc* - 3. + * - The *offloads* member contains Tx offloads to be enabled. + * If an offloading set in tx_conf->offloads + * hasn't been set in the input argument eth_conf->txmode.offloads + * to rte_eth_dev_configure(), it is a new added offloading, it must be + * per-queue type and it is enabled for the queue. + * No need to repeat any bit in tx_conf->offloads which has already been + * enabled in rte_eth_dev_configure() at port level. An offloading enabled + * at port level can't be disabled at queue level. + * + * Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces + * the transmit function to use default values. + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -ENOMEM: Unable to allocate the transmit ring descriptors. + */ +int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +/** + * Return the NUMA socket to which an Ethernet device is connected + * + * @param port_id + * The port identifier of the Ethernet device + * @return + * The NUMA socket id to which the Ethernet device is connected or + * a default of zero if the socket could not be determined. + * -1 is returned is the port_id value is out of range. + */ +int rte_eth_dev_socket_id(uint16_t port_id); + +/** + * Check if port_id of device is attached + * + * @param port_id + * The port identifier of the Ethernet device + * @return + * - 0 if port is out of range or not attached + * - 1 if device is attached + */ +int rte_eth_dev_is_valid_port(uint16_t port_id); + +/** + * Start specified RX queue of a port. It is used when rx_deferred_start + * flag of the specified queue is true. + * + * @param port_id + * The port identifier of the Ethernet device + * @param rx_queue_id + * The index of the rx queue to update the ring. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the receive queue is started. + * - -EINVAL: The port_id or the queue_id out of range. + * - -EIO: if device is removed. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id); + +/** + * Stop specified RX queue of a port + * + * @param port_id + * The port identifier of the Ethernet device + * @param rx_queue_id + * The index of the rx queue to update the ring. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the receive queue is stopped. + * - -EINVAL: The port_id or the queue_id out of range. + * - -EIO: if device is removed. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id); + +/** + * Start TX for specified queue of a port. It is used when tx_deferred_start + * flag of the specified queue is true. + * + * @param port_id + * The port identifier of the Ethernet device + * @param tx_queue_id + * The index of the tx queue to update the ring. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is started. + * - -EINVAL: The port_id or the queue_id out of range. + * - -EIO: if device is removed. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id); + +/** + * Stop specified TX queue of a port + * + * @param port_id + * The port identifier of the Ethernet device + * @param tx_queue_id + * The index of the tx queue to update the ring. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is stopped. + * - -EINVAL: The port_id or the queue_id out of range. + * - -EIO: if device is removed. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id); + +/** + * Start an Ethernet device. + * + * The device start step is the last one and consists of setting the configured + * offload features and in starting the transmit and the receive units of the + * device. + * On success, all basic functions exported by the Ethernet API (link status, + * receive/transmit, and so on) can be invoked. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - 0: Success, Ethernet device started. + * - <0: Error code of the driver device start function. + */ +int rte_eth_dev_start(uint16_t port_id); + +/** + * Stop an Ethernet device. The device can be restarted with a call to + * rte_eth_dev_start() + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_dev_stop(uint16_t port_id); + +/** + * Link up an Ethernet device. + * + * Set device link up will re-enable the device rx/tx + * functionality after it is previously set device linked down. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - 0: Success, Ethernet device linked up. + * - <0: Error code of the driver device link up function. + */ +int rte_eth_dev_set_link_up(uint16_t port_id); + +/** + * Link down an Ethernet device. + * The device rx/tx functionality will be disabled if success, + * and it can be re-enabled with a call to + * rte_eth_dev_set_link_up() + * + * @param port_id + * The port identifier of the Ethernet device. + */ +int rte_eth_dev_set_link_down(uint16_t port_id); + +/** + * Close a stopped Ethernet device. The device cannot be restarted! + * The function frees all resources except for needed by the + * closed state. To free these resources, call rte_eth_dev_detach(). + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_dev_close(uint16_t port_id); + +/** + * Reset a Ethernet device and keep its port id. + * + * When a port has to be reset passively, the DPDK application can invoke + * this function. For example when a PF is reset, all its VFs should also + * be reset. Normally a DPDK application can invoke this function when + * RTE_ETH_EVENT_INTR_RESET event is detected, but can also use it to start + * a port reset in other circumstances. + * + * When this function is called, it first stops the port and then calls the + * PMD specific dev_uninit( ) and dev_init( ) to return the port to initial + * state, in which no Tx and Rx queues are setup, as if the port has been + * reset and not started. The port keeps the port id it had before the + * function call. + * + * After calling rte_eth_dev_reset( ), the application should use + * rte_eth_dev_configure( ), rte_eth_rx_queue_setup( ), + * rte_eth_tx_queue_setup( ), and rte_eth_dev_start( ) + * to reconfigure the device as appropriate. + * + * Note: To avoid unexpected behavior, the application should stop calling + * Tx and Rx functions before calling rte_eth_dev_reset( ). For thread + * safety, all these controlling functions should be called from the same + * thread. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - (0) if successful. + * - (-EINVAL) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support this function. + * - (-EPERM) if not ran from the primary process. + * - (-EIO) if re-initialisation failed or device is removed. + * - (-ENOMEM) if the reset failed due to OOM. + * - (-EAGAIN) if the reset temporarily failed and should be retried later. + */ +int rte_eth_dev_reset(uint16_t port_id); + +/** + * Enable receipt in promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_promiscuous_enable(uint16_t port_id); + +/** + * Disable receipt in promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_promiscuous_disable(uint16_t port_id); + +/** + * Return the value of promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (1) if promiscuous is enabled + * - (0) if promiscuous is disabled. + * - (-1) on error + */ +int rte_eth_promiscuous_get(uint16_t port_id); + +/** + * Enable the receipt of any multicast frame by an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_allmulticast_enable(uint16_t port_id); + +/** + * Disable the receipt of all multicast frames by an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_allmulticast_disable(uint16_t port_id); + +/** + * Return the value of allmulticast mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (1) if allmulticast is enabled + * - (0) if allmulticast is disabled. + * - (-1) on error + */ +int rte_eth_allmulticast_get(uint16_t port_id); + +/** + * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX + * or FULL-DUPLEX) of the physical link of an Ethernet device. It might need + * to wait up to 9 seconds in it. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param link + * A pointer to an *rte_eth_link* structure to be filled with + * the status, the speed and the mode of the Ethernet device link. + */ +void rte_eth_link_get(uint16_t port_id, struct rte_eth_link *link); + +/** + * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX + * or FULL-DUPLEX) of the physical link of an Ethernet device. It is a no-wait + * version of rte_eth_link_get(). + * + * @param port_id + * The port identifier of the Ethernet device. + * @param link + * A pointer to an *rte_eth_link* structure to be filled with + * the status, the speed and the mode of the Ethernet device link. + */ +void rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *link); + +/** + * Retrieve the general I/O statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param stats + * A pointer to a structure of type *rte_eth_stats* to be filled with + * the values of device counters for the following set of statistics: + * - *ipackets* with the total of successfully received packets. + * - *opackets* with the total of successfully transmitted packets. + * - *ibytes* with the total of successfully received bytes. + * - *obytes* with the total of successfully transmitted bytes. + * - *ierrors* with the total of erroneous received packets. + * - *oerrors* with the total of failed transmitted packets. + * @return + * Zero if successful. Non-zero otherwise. + */ +int rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats); + +/** + * Reset the general I/O statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if device notified to reset stats. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_stats_reset(uint16_t port_id); + +/** + * Retrieve names of extended statistics of an Ethernet device. + * + * There is an assumption that 'xstat_names' and 'xstats' arrays are matched + * by array index: + * xstats_names[i].name => xstats[i].value + * + * And the array index is same with id field of 'struct rte_eth_xstat': + * xstats[i].id == i + * + * This assumption makes key-value pair matching less flexible but simpler. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats_names + * An rte_eth_xstat_name array of at least *size* elements to + * be filled. If set to NULL, the function returns the required number + * of elements. + * @param size + * The size of the xstats_names array (number of elements). + * @return + * - A positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int rte_eth_xstats_get_names(uint16_t port_id, + struct rte_eth_xstat_name *xstats_names, + unsigned int size); + +/** + * Retrieve extended statistics of an Ethernet device. + * + * There is an assumption that 'xstat_names' and 'xstats' arrays are matched + * by array index: + * xstats_names[i].name => xstats[i].value + * + * And the array index is same with id field of 'struct rte_eth_xstat': + * xstats[i].id == i + * + * This assumption makes key-value pair matching less flexible but simpler. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats + * A pointer to a table of structure of type *rte_eth_xstat* + * to be filled with device statistics ids and values. + * This parameter can be set to NULL if n is 0. + * @param n + * The size of the xstats array (number of elements). + * @return + * - A positive value lower or equal to n: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than n: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, + unsigned int n); + +/** + * Retrieve names of extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats_names + * An rte_eth_xstat_name array of at least *size* elements to + * be filled. If set to NULL, the function returns the required number + * of elements. + * @param ids + * IDs array given by app to retrieve specific statistics + * @param size + * The size of the xstats_names array (number of elements). + * @return + * - A positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int +rte_eth_xstats_get_names_by_id(uint16_t port_id, + struct rte_eth_xstat_name *xstats_names, unsigned int size, + uint64_t *ids); + +/** + * Retrieve extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param ids + * A pointer to an ids array passed by application. This tells which + * statistics values function should retrieve. This parameter + * can be set to NULL if size is 0. In this case function will retrieve + * all avalible statistics. + * @param values + * A pointer to a table to be filled with device statistics values. + * @param size + * The size of the ids array (number of elements). + * @return + * - A positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids, + uint64_t *values, unsigned int size); + +/** + * Gets the ID of a statistic from its name. + * + * This function searches for the statistics using string compares, and + * as such should not be used on the fast-path. For fast-path retrieval of + * specific statistics, store the ID as provided in *id* from this function, + * and pass the ID to rte_eth_xstats_get() + * + * @param port_id The port to look up statistics from + * @param xstat_name The name of the statistic to return + * @param[out] id A pointer to an app-supplied uint64_t which should be + * set to the ID of the stat if the stat exists. + * @return + * 0 on success + * -ENODEV for invalid port_id, + * -EIO if device is removed, + * -EINVAL if the xstat_name doesn't exist in port_id + */ +int rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name, + uint64_t *id); + +/** + * Reset extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_xstats_reset(uint16_t port_id); + +/** + * Set a mapping for the specified transmit queue to the specified per-queue + * statistics counter. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tx_queue_id + * The index of the transmit queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param stat_idx + * The per-queue packet statistics functionality number that the transmit + * queue is to be assigned. + * The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1]. + * @return + * Zero if successful. Non-zero otherwise. + */ +int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, + uint16_t tx_queue_id, uint8_t stat_idx); + +/** + * Set a mapping for the specified receive queue to the specified per-queue + * statistics counter. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param stat_idx + * The per-queue packet statistics functionality number that the receive + * queue is to be assigned. + * The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1]. + * @return + * Zero if successful. Non-zero otherwise. + */ +int rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, + uint16_t rx_queue_id, + uint8_t stat_idx); + +/** + * Retrieve the Ethernet address of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * A pointer to a structure of type *ether_addr* to be filled with + * the Ethernet address of the Ethernet device. + */ +void rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr); + +/** + * Retrieve the contextual information of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dev_info + * A pointer to a structure of type *rte_eth_dev_info* to be filled with + * the contextual information of the Ethernet device. + */ +void rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info); + +/** + * Retrieve the firmware version of a device. + * + * @param port_id + * The port identifier of the device. + * @param fw_version + * A pointer to a string array storing the firmware version of a device, + * the string includes terminating null. This pointer is allocated by caller. + * @param fw_size + * The size of the string array pointed by fw_version, which should be + * large enough to store firmware version of the device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (>0) if *fw_size* is not enough to store firmware version, return + * the size of the non truncated string. + */ +int rte_eth_dev_fw_version_get(uint16_t port_id, + char *fw_version, size_t fw_size); + +/** + * Retrieve the supported packet types of an Ethernet device. + * + * When a packet type is announced as supported, it *must* be recognized by + * the PMD. For instance, if RTE_PTYPE_L2_ETHER, RTE_PTYPE_L2_ETHER_VLAN + * and RTE_PTYPE_L3_IPV4 are announced, the PMD must return the following + * packet types for these packets: + * - Ether/IPv4 -> RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4 + * - Ether/Vlan/IPv4 -> RTE_PTYPE_L2_ETHER_VLAN | RTE_PTYPE_L3_IPV4 + * - Ether/[anything else] -> RTE_PTYPE_L2_ETHER + * - Ether/Vlan/[anything else] -> RTE_PTYPE_L2_ETHER_VLAN + * + * When a packet is received by a PMD, the most precise type must be + * returned among the ones supported. However a PMD is allowed to set + * packet type that is not in the supported list, at the condition that it + * is more precise. Therefore, a PMD announcing no supported packet types + * can still set a matching packet type in a received packet. + * + * @note + * Better to invoke this API after the device is already started or rx burst + * function is decided, to obtain correct supported ptypes. + * @note + * if a given PMD does not report what ptypes it supports, then the supported + * ptype count is reported as 0. + * @param port_id + * The port identifier of the Ethernet device. + * @param ptype_mask + * A hint of what kind of packet type which the caller is interested in. + * @param ptypes + * An array pointer to store adequate packet types, allocated by caller. + * @param num + * Size of the array pointed by param ptypes. + * @return + * - (>=0) Number of supported ptypes. If the number of types exceeds num, + * only num entries will be filled into the ptypes array, but the full + * count of supported ptypes will be returned. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask, + uint32_t *ptypes, int num); + +/** + * Retrieve the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A pointer to a uint16_t where the retrieved MTU is to be stored. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu); + +/** + * Change the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A uint16_t for the MTU to be applied. + * @return + * - (0) if successful. + * - (-ENOTSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if *mtu* invalid. + * - (-EBUSY) if operation is not allowed when the port is running + */ +int rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu); + +/** + * Enable/Disable hardware filtering by an Ethernet device of received + * VLAN packets tagged with a given VLAN Tag Identifier. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vlan_id + * The VLAN Tag Identifier whose filtering must be enabled or disabled. + * @param on + * If > 0, enable VLAN filtering of VLAN packets tagged with *vlan_id*. + * Otherwise, disable VLAN filtering of VLAN packets tagged with *vlan_id*. + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-ENOSYS) if VLAN filtering on *port_id* disabled. + * - (-EINVAL) if *vlan_id* > 4095. + */ +int rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on); + +/** + * Enable/Disable hardware VLAN Strip by a rx queue of an Ethernet device. + * 82599/X540/X550 can support VLAN stripping at the rx queue level + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param on + * If 1, Enable VLAN Stripping of the receive queue of the Ethernet port. + * If 0, Disable VLAN Stripping of the receive queue of the Ethernet port. + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN stripping not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if *rx_queue_id* invalid. + */ +int rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id, + int on); + +/** + * Set the Outer VLAN Ether Type by an Ethernet device, it can be inserted to + * the VLAN Header. This is a register setup available on some Intel NIC, not + * but all, please check the data sheet for availability. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vlan_type + * The vlan type. + * @param tag_type + * The Tag Protocol ID + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN TPID setup is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_set_vlan_ether_type(uint16_t port_id, + enum rte_vlan_type vlan_type, + uint16_t tag_type); + +/** + * Set VLAN offload configuration on an Ethernet device + * Enable/Disable Extended VLAN by an Ethernet device, This is a register setup + * available on some Intel NIC, not but all, please check the data sheet for + * availability. + * Enable/Disable VLAN Strip can be done on rx queue for certain NIC, but here + * the configuration is applied on the port level. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param offload_mask + * The VLAN Offload bit mask can be mixed use with "OR" + * ETH_VLAN_STRIP_OFFLOAD + * ETH_VLAN_FILTER_OFFLOAD + * ETH_VLAN_EXTEND_OFFLOAD + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask); + +/** + * Read VLAN Offload configuration from an Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>0) if successful. Bit mask to indicate + * ETH_VLAN_STRIP_OFFLOAD + * ETH_VLAN_FILTER_OFFLOAD + * ETH_VLAN_EXTEND_OFFLOAD + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_get_vlan_offload(uint16_t port_id); + +/** + * Set port based TX VLAN insertion on or off. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pvid + * Port based TX VLAN identifier together with user priority. + * @param on + * Turn on or off the port based TX VLAN insertion. + * + * @return + * - (0) if successful. + * - negative if failed. + */ +int rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on); + +typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count, + void *userdata); + +/** + * Structure used to buffer packets for future TX + * Used by APIs rte_eth_tx_buffer and rte_eth_tx_buffer_flush + */ +struct rte_eth_dev_tx_buffer { + buffer_tx_error_fn error_callback; + void *error_userdata; + uint16_t size; /**< Size of buffer for buffered tx */ + uint16_t length; /**< Number of packets in the array */ + struct rte_mbuf *pkts[]; + /**< Pending packets to be sent on explicit flush or when full */ +}; + +/** + * Calculate the size of the tx buffer. + * + * @param sz + * Number of stored packets. + */ +#define RTE_ETH_TX_BUFFER_SIZE(sz) \ + (sizeof(struct rte_eth_dev_tx_buffer) + (sz) * sizeof(struct rte_mbuf *)) + +/** + * Initialize default values for buffered transmitting + * + * @param buffer + * Tx buffer to be initialized. + * @param size + * Buffer size + * @return + * 0 if no error + */ +int +rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size); + +/** + * Configure a callback for buffered packets which cannot be sent + * + * Register a specific callback to be called when an attempt is made to send + * all packets buffered on an ethernet port, but not all packets can + * successfully be sent. The callback registered here will be called only + * from calls to rte_eth_tx_buffer() and rte_eth_tx_buffer_flush() APIs. + * The default callback configured for each queue by default just frees the + * packets back to the calling mempool. If additional behaviour is required, + * for example, to count dropped packets, or to retry transmission of packets + * which cannot be sent, this function should be used to register a suitable + * callback function to implement the desired behaviour. + * The example callback "rte_eth_count_unsent_packet_callback()" is also + * provided as reference. + * + * @param buffer + * The port identifier of the Ethernet device. + * @param callback + * The function to be used as the callback. + * @param userdata + * Arbitrary parameter to be passed to the callback function + * @return + * 0 on success, or -1 on error with rte_errno set appropriately + */ +int +rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer, + buffer_tx_error_fn callback, void *userdata); + +/** + * Callback function for silently dropping unsent buffered packets. + * + * This function can be passed to rte_eth_tx_buffer_set_err_callback() to + * adjust the default behavior when buffered packets cannot be sent. This + * function drops any unsent packets silently and is used by tx buffered + * operations as default behavior. + * + * NOTE: this function should not be called directly, instead it should be used + * as a callback for packet buffering. + * + * NOTE: when configuring this function as a callback with + * rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter + * should point to an uint64_t value. + * + * @param pkts + * The previously buffered packets which could not be sent + * @param unsent + * The number of unsent packets in the pkts array + * @param userdata + * Not used + */ +void +rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata); + +/** + * Callback function for tracking unsent buffered packets. + * + * This function can be passed to rte_eth_tx_buffer_set_err_callback() to + * adjust the default behavior when buffered packets cannot be sent. This + * function drops any unsent packets, but also updates a user-supplied counter + * to track the overall number of packets dropped. The counter should be an + * uint64_t variable. + * + * NOTE: this function should not be called directly, instead it should be used + * as a callback for packet buffering. + * + * NOTE: when configuring this function as a callback with + * rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter + * should point to an uint64_t value. + * + * @param pkts + * The previously buffered packets which could not be sent + * @param unsent + * The number of unsent packets in the pkts array + * @param userdata + * Pointer to an uint64_t value, which will be incremented by unsent + */ +void +rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata); + +/** + * Request the driver to free mbufs currently cached by the driver. The + * driver will only free the mbuf if it is no longer in use. It is the + * application's responsibity to ensure rte_eth_tx_buffer_flush(..) is + * called if needed. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param free_cnt + * Maximum number of packets to free. Use 0 to indicate all possible packets + * should be freed. Note that a packet may be using multiple mbufs. + * @return + * Failure: < 0 + * -ENODEV: Invalid interface + * -EIO: device is removed + * -ENOTSUP: Driver does not support function + * Success: >= 0 + * 0-n: Number of packets freed. More packets may still remain in ring that + * are in use. + */ +int +rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt); + +/** + * Subtypes for IPsec offload event(@ref RTE_ETH_EVENT_IPSEC) raised by + * eth device. + */ +enum rte_eth_event_ipsec_subtype { + RTE_ETH_EVENT_IPSEC_UNKNOWN = 0, + /**< Unknown event type */ + RTE_ETH_EVENT_IPSEC_ESN_OVERFLOW, + /**< Sequence number overflow */ + RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY, + /**< Soft time expiry of SA */ + RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY, + /**< Soft byte expiry of SA */ + RTE_ETH_EVENT_IPSEC_MAX + /**< Max value of this enum */ +}; + +/** + * Descriptor for @ref RTE_ETH_EVENT_IPSEC event. Used by eth dev to send extra + * information of the IPsec offload event. + */ +struct rte_eth_event_ipsec_desc { + enum rte_eth_event_ipsec_subtype subtype; + /**< Type of RTE_ETH_EVENT_IPSEC_* event */ + uint64_t metadata; + /**< Event specific metadata + * + * For the following events, *userdata* registered + * with the *rte_security_session* would be returned + * as metadata, + * + * - @ref RTE_ETH_EVENT_IPSEC_ESN_OVERFLOW + * - @ref RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY + * - @ref RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY + * + * @see struct rte_security_session_conf + * + */ +}; + +/** + * The eth device event type for interrupt, and maybe others in the future. + */ +enum rte_eth_event_type { + RTE_ETH_EVENT_UNKNOWN, /**< unknown event type */ + RTE_ETH_EVENT_INTR_LSC, /**< lsc interrupt event */ + RTE_ETH_EVENT_QUEUE_STATE, + /**< queue state event (enabled/disabled) */ + RTE_ETH_EVENT_INTR_RESET, + /**< reset interrupt event, sent to VF on PF reset */ + RTE_ETH_EVENT_VF_MBOX, /**< message from the VF received by PF */ + RTE_ETH_EVENT_MACSEC, /**< MACsec offload related event */ + RTE_ETH_EVENT_INTR_RMV, /**< device removal event */ + RTE_ETH_EVENT_NEW, /**< port is probed */ + RTE_ETH_EVENT_DESTROY, /**< port is released */ + RTE_ETH_EVENT_IPSEC, /**< IPsec offload related event */ + RTE_ETH_EVENT_MAX /**< max value of this enum */ +}; + +typedef int (*rte_eth_dev_cb_fn)(uint16_t port_id, + enum rte_eth_event_type event, void *cb_arg, void *ret_param); +/**< user application callback to be registered for interrupts */ + +/** + * Register a callback function for port event. + * + * @param port_id + * Port id. + * RTE_ETH_ALL means register the event for all port ids. + * @param event + * Event interested. + * @param cb_fn + * User supplied callback function to be called. + * @param cb_arg + * Pointer to the parameters for the registered callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_callback_register(uint16_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg); + +/** + * Unregister a callback function for port event. + * + * @param port_id + * Port id. + * RTE_ETH_ALL means unregister the event for all port ids. + * @param event + * Event interested. + * @param cb_fn + * User supplied callback function to be called. + * @param cb_arg + * Pointer to the parameters for the registered callback. -1 means to + * remove all for the same callback address and same event. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_callback_unregister(uint16_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg); + +/** + * When there is no rx packet coming in Rx Queue for a long time, we can + * sleep lcore related to RX Queue for power saving, and enable rx interrupt + * to be triggered when Rx packet arrives. + * + * The rte_eth_dev_rx_intr_enable() function enables rx queue + * interrupt on specific rx queue of a port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_rx_intr_enable(uint16_t port_id, uint16_t queue_id); + +/** + * When lcore wakes up from rx interrupt indicating packet coming, disable rx + * interrupt and returns to polling mode. + * + * The rte_eth_dev_rx_intr_disable() function disables rx queue + * interrupt on specific rx queue of a port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_rx_intr_disable(uint16_t port_id, uint16_t queue_id); + +/** + * RX Interrupt control per port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. + * @param op + * The operation be performed for the vector. + * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data); + +/** + * RX Interrupt control per queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param epfd + * Epoll instance fd which the intr vector associated to. + * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. + * @param op + * The operation be performed for the vector. + * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id, + int epfd, int op, void *data); + +/** + * Turn on the LED on the Ethernet device. + * This function turns on the LED on the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_led_on(uint16_t port_id); + +/** + * Turn off the LED on the Ethernet device. + * This function turns off the LED on the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_led_off(uint16_t port_id); + +/** + * Get current status of the Ethernet link flow control for Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param fc_conf + * The pointer to the structure where to store the flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support flow control. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_flow_ctrl_get(uint16_t port_id, + struct rte_eth_fc_conf *fc_conf); + +/** + * Configure the Ethernet link flow control for Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param fc_conf + * The pointer to the structure of the flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support flow control mode. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter + * - (-EIO) if flow control setup failure or device is removed. + */ +int rte_eth_dev_flow_ctrl_set(uint16_t port_id, + struct rte_eth_fc_conf *fc_conf); + +/** + * Configure the Ethernet priority flow control under DCB environment + * for Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pfc_conf + * The pointer to the structure of the priority flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support priority flow control mode. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter + * - (-EIO) if flow control setup failure or device is removed. + */ +int rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id, + struct rte_eth_pfc_conf *pfc_conf); + +/** + * Add a MAC address to an internal array of addresses used to enable whitelist + * filtering to accept packets only if the destination MAC address matches. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * The MAC address to add. + * @param pool + * VMDq pool index to associate address with (if VMDq is enabled). If VMDq is + * not enabled, this should be set to 0. + * @return + * - (0) if successfully added or *mac_addr* was already added. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port* is invalid. + * - (-EIO) if device is removed. + * - (-ENOSPC) if no more MAC addresses can be added. + * - (-EINVAL) if MAC address is invalid. + */ +int rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *mac_addr, + uint32_t pool); + +/** + * Remove a MAC address from the internal array of addresses. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * MAC address to remove. + * @return + * - (0) if successful, or *mac_addr* didn't exist. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port* invalid. + * - (-EADDRINUSE) if attempting to remove the default MAC address + */ +int rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *mac_addr); + +/** + * Set the default MAC address. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * New default MAC address. + * @return + * - (0) if successful, or *mac_addr* didn't exist. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if MAC address is invalid. + */ +int rte_eth_dev_default_mac_addr_set(uint16_t port_id, + struct ether_addr *mac_addr); + +/** + * Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param reta_conf + * RETA to update. + * @param reta_size + * Redirection table size. The table size can be queried by + * rte_eth_dev_info_get(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_rss_reta_update(uint16_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); + + /** + * Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param reta_conf + * RETA to query. + * @param reta_size + * Redirection table size. The table size can be queried by + * rte_eth_dev_info_get(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_rss_reta_query(uint16_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); + + /** + * Updates unicast hash table for receiving packet with the given destination + * MAC address, and the packet is routed to all VFs for which the RX mode is + * accept packets that match the unicast hash table. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param addr + * Unicast MAC address. + * @param on + * 1 - Set an unicast hash bit for receiving packets with the MAC address. + * 0 - Clear an unicast hash bit. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr, + uint8_t on); + + /** + * Updates all unicast hash bitmaps for receiving packet with any Unicast + * Ethernet MAC addresses,the packet is routed to all VFs for which the RX + * mode is accept packets that match the unicast hash table. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param on + * 1 - Set all unicast hash bitmaps for receiving all the Ethernet + * MAC addresses + * 0 - Clear all unicast hash bitmaps + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_uc_all_hash_table_set(uint16_t port_id, uint8_t on); + +/** + * Set a traffic mirroring rule on an Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mirror_conf + * The pointer to the traffic mirroring structure describing the mirroring rule. + * The *rte_eth_vm_mirror_conf* structure includes the type of mirroring rule, + * destination pool and the value of rule if enable vlan or pool mirroring. + * + * @param rule_id + * The index of traffic mirroring rule, we support four separated rules. + * @param on + * 1 - Enable a mirroring rule. + * 0 - Disable a mirroring rule. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if the mr_conf information is not correct. + */ +int rte_eth_mirror_rule_set(uint16_t port_id, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, + uint8_t on); + +/** + * Reset a traffic mirroring rule on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rule_id + * The index of traffic mirroring rule, we support four separated rules. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_mirror_rule_reset(uint16_t port_id, + uint8_t rule_id); + +/** + * Set the rate limitation for a queue on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_idx + * The queue id. + * @param tx_rate + * The tx rate in Mbps. Allocated from the total port link speed. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx, + uint16_t tx_rate); + + /** + * Configuration of Receive Side Scaling hash computation of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rss_conf + * The new configuration to use for RSS hash computation on the port. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_rss_hash_update(uint16_t port_id, + struct rte_eth_rss_conf *rss_conf); + + /** + * Retrieve current configuration of Receive Side Scaling hash computation + * of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rss_conf + * Where to store the current RSS hash configuration of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support RSS. + */ +int +rte_eth_dev_rss_hash_conf_get(uint16_t port_id, + struct rte_eth_rss_conf *rss_conf); + + /** + * Add UDP tunneling port for a specific type of tunnel. + * The packets with this UDP port will be identified as this type of tunnel. + * Before enabling any offloading function for a tunnel, users can call this API + * to change or add more UDP port for the tunnel. So the offloading function + * can take effect on the packets with the specific UDP port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tunnel_udp + * UDP tunneling configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_udp_tunnel_port_add(uint16_t port_id, + struct rte_eth_udp_tunnel *tunnel_udp); + + /** + * Delete UDP tunneling port a specific type of tunnel. + * The packets with this UDP port will not be identified as this type of tunnel + * any more. + * Before enabling any offloading function for a tunnel, users can call this API + * to delete a UDP port for the tunnel. So the offloading function will not take + * effect on the packets with the specific UDP port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tunnel_udp + * UDP tunneling configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id, + struct rte_eth_udp_tunnel *tunnel_udp); + +/** + * Check whether the filter type is supported on an Ethernet device. + * All the supported filter types are defined in 'rte_eth_ctrl.h'. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param filter_type + * Filter type. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this filter type. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_filter_supported(uint16_t port_id, + enum rte_filter_type filter_type); + +/** + * Take operations to assigned filter type on an Ethernet device. + * All the supported operations and filter types are defined in 'rte_eth_ctrl.h'. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param filter_type + * Filter type. + * @param filter_op + * Type of operation. + * @param arg + * A pointer to arguments defined specifically for the operation. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type, + enum rte_filter_op filter_op, void *arg); + +/** + * Get DCB information on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dcb_info + * dcb information. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support. + */ +int rte_eth_dev_get_dcb_info(uint16_t port_id, + struct rte_eth_dcb_info *dcb_info); + +struct rte_eth_rxtx_callback; + +/** + * Add a callback to be called on packet RX on a given port and queue. + * + * This API configures a function to be called for each burst of + * packets received on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_rx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +const struct rte_eth_rxtx_callback * +rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param); + +/** + * Add a callback that must be called first on packet RX on a given port + * and queue. + * + * This API configures a first function to be called for each burst of + * packets received on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_rx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +const struct rte_eth_rxtx_callback * +rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param); + +/** + * Add a callback to be called on packet TX on a given port and queue. + * + * This API configures a function to be called for each burst of + * packets sent on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_tx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +const struct rte_eth_rxtx_callback * +rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id, + rte_tx_callback_fn fn, void *user_param); + +/** + * Remove an RX packet callback from a given port and queue. + * + * This function is used to removed callbacks that were added to a NIC port + * queue using rte_eth_add_rx_callback(). + * + * Note: the callback is removed from the callback list but it isn't freed + * since the it may still be in use. The memory for the callback can be + * subsequently freed back by the application by calling rte_free(): + * + * - Immediately - if the port is stopped, or the user knows that no + * callbacks are in flight e.g. if called from the thread doing RX/TX + * on that queue. + * + * - After a short delay - where the delay is sufficient to allow any + * in-flight callbacks to complete. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device from which the callback is to be removed. + * @param user_cb + * User supplied callback created via rte_eth_add_rx_callback(). + * + * @return + * - 0: Success. Callback was removed. + * - -ENOTSUP: Callback support is not available. + * - -EINVAL: The port_id or the queue_id is out of range, or the callback + * is NULL or not found for the port/queue. + */ +int rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id, + const struct rte_eth_rxtx_callback *user_cb); + +/** + * Remove a TX packet callback from a given port and queue. + * + * This function is used to removed callbacks that were added to a NIC port + * queue using rte_eth_add_tx_callback(). + * + * Note: the callback is removed from the callback list but it isn't freed + * since the it may still be in use. The memory for the callback can be + * subsequently freed back by the application by calling rte_free(): + * + * - Immediately - if the port is stopped, or the user knows that no + * callbacks are in flight e.g. if called from the thread doing RX/TX + * on that queue. + * + * - After a short delay - where the delay is sufficient to allow any + * in-flight callbacks to complete. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device from which the callback is to be removed. + * @param user_cb + * User supplied callback created via rte_eth_add_tx_callback(). + * + * @return + * - 0: Success. Callback was removed. + * - -ENOTSUP: Callback support is not available. + * - -EINVAL: The port_id or the queue_id is out of range, or the callback + * is NULL or not found for the port/queue. + */ +int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id, + const struct rte_eth_rxtx_callback *user_cb); + +/** + * Retrieve information about given port's RX queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The RX queue on the Ethernet device for which information + * will be retrieved. + * @param qinfo + * A pointer to a structure of type *rte_eth_rxq_info_info* to be filled with + * the information of the Ethernet device. + * + * @return + * - 0: Success + * - -ENOTSUP: routine is not supported by the device PMD. + * - -EINVAL: The port_id or the queue_id is out of range. + */ +int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo); + +/** + * Retrieve information about given port's TX queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The TX queue on the Ethernet device for which information + * will be retrieved. + * @param qinfo + * A pointer to a structure of type *rte_eth_txq_info_info* to be filled with + * the information of the Ethernet device. + * + * @return + * - 0: Success + * - -ENOTSUP: routine is not supported by the device PMD. + * - -EINVAL: The port_id or the queue_id is out of range. + */ +int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id, + struct rte_eth_txq_info *qinfo); + +/** + * Retrieve device registers and register attributes (number of registers and + * register size) + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * Pointer to rte_dev_reg_info structure to fill in. If info->data is + * NULL the function fills in the width and length fields. If non-NULL + * the registers are put into the buffer pointed at by the data field. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info); + +/** + * Retrieve size of device EEPROM + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>=0) EEPROM size if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_eeprom_length(uint16_t port_id); + +/** + * Retrieve EEPROM and EEPROM attribute + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes buffer for return EEPROM data and + * EEPROM attributes to be filled. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); + +/** + * Program EEPROM with provided data + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes EEPROM data for programming and + * EEPROM attributes to be filled + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Retrieve the type and size of plugin module EEPROM + * + * @param port_id + * The port identifier of the Ethernet device. + * @param modinfo + * The type and size of plugin module EEPROM. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int __rte_experimental +rte_eth_dev_get_module_info(uint16_t port_id, + struct rte_eth_dev_module_info *modinfo); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Retrieve the data of plugin module EEPROM + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes the plugin module EEPROM attributes, and the + * buffer for return plugin module EEPROM data. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int __rte_experimental +rte_eth_dev_get_module_eeprom(uint16_t port_id, + struct rte_dev_eeprom_info *info); + +/** + * Set the list of multicast addresses to filter on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mc_addr_set + * The array of multicast addresses to set. Equal to NULL when the function + * is invoked to flush the set of filtered addresses. + * @param nb_mc_addr + * The number of multicast addresses in the *mc_addr_set* array. Equal to 0 + * when the function is invoked to flush the set of filtered addresses. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if PMD of *port_id* doesn't support multicast filtering. + * - (-ENOSPC) if *port_id* has not enough multicast filtering resources. + */ +int rte_eth_dev_set_mc_addr_list(uint16_t port_id, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr); + +/** + * Enable IEEE1588/802.1AS timestamping for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_enable(uint16_t port_id); + +/** + * Disable IEEE1588/802.1AS timestamping for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_disable(uint16_t port_id); + +/** + * Read an IEEE1588/802.1AS RX timestamp from an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param timestamp + * Pointer to the timestamp struct. + * @param flags + * Device specific flags. Used to pass the RX timesync register index to + * i40e. Unused in igb/ixgbe, pass 0 instead. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_read_rx_timestamp(uint16_t port_id, + struct timespec *timestamp, uint32_t flags); + +/** + * Read an IEEE1588/802.1AS TX timestamp from an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param timestamp + * Pointer to the timestamp struct. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_read_tx_timestamp(uint16_t port_id, + struct timespec *timestamp); + +/** + * Adjust the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param delta + * The adjustment in nanoseconds. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta); + +/** + * Read the time from the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param time + * Pointer to the timespec struct that holds the time. + * + * @return + * - 0: Success. + */ +int rte_eth_timesync_read_time(uint16_t port_id, struct timespec *time); + +/** + * Set the time of the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param time + * Pointer to the timespec struct that holds the time. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *time); + +/** + * Config l2 tunnel ether type of an Ethernet device for filtering specific + * tunnel packets by ether type. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param l2_tunnel + * l2 tunnel configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel); + +/** + * Enable/disable l2 tunnel offload functions. Include, + * 1, The ability of parsing a type of l2 tunnel of an Ethernet device. + * Filtering, forwarding and offloading this type of tunnel packets depend on + * this ability. + * 2, Stripping the l2 tunnel tag. + * 3, Insertion of the l2 tunnel tag. + * 4, Forwarding the packets based on the l2 tunnel tag. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param l2_tunnel + * l2 tunnel parameters. + * @param mask + * Indicate the offload function. + * @param en + * Enable or disable this function. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + uint32_t mask, + uint8_t en); + +/** +* Get the port id from device name. The device name should be specified +* as below: +* - PCIe address (Domain:Bus:Device.Function), for example- 0000:2:00.0 +* - SoC device name, for example- fsl-gmac0 +* - vdev dpdk name, for example- net_[pcap0|null0|tap0] +* +* @param name +* pci address or name of the device +* @param port_id +* pointer to port identifier of the device +* @return +* - (0) if successful and port_id is filled. +* - (-ENODEV or -EINVAL) on failure. +*/ +int +rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id); + +/** +* Get the device name from port id. The device name is specified as below: +* - PCIe address (Domain:Bus:Device.Function), for example- 0000:02:00.0 +* - SoC device name, for example- fsl-gmac0 +* - vdev dpdk name, for example- net_[pcap0|null0|tun0|tap0] +* +* @param port_id +* Port identifier of the device. +* @param name +* Buffer of size RTE_ETH_NAME_MAX_LEN to store the name. +* @return +* - (0) if successful. +* - (-EINVAL) on failure. +*/ +int +rte_eth_dev_get_name_by_port(uint16_t port_id, char *name); + +/** + * Check that numbers of Rx and Tx descriptors satisfy descriptors limits from + * the ethernet device information, otherwise adjust them to boundaries. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param nb_rx_desc + * A pointer to a uint16_t where the number of receive + * descriptors stored. + * @param nb_tx_desc + * A pointer to a uint16_t where the number of transmit + * descriptors stored. + * @return + * - (0) if successful. + * - (-ENOTSUP, -ENODEV or -EINVAL) on failure. + */ +int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id, + uint16_t *nb_rx_desc, + uint16_t *nb_tx_desc); + +/** + * Test if a port supports specific mempool ops. + * + * @param port_id + * Port identifier of the Ethernet device. + * @param [in] pool + * The name of the pool operations to test. + * @return + * - 0: best mempool ops choice for this port. + * - 1: mempool ops are supported for this port. + * - -ENOTSUP: mempool ops not supported for this port. + * - -ENODEV: Invalid port Identifier. + * - -EINVAL: Pool param is null. + */ +int +rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool); + +/** + * Get the security context for the Ethernet device. + * + * @param port_id + * Port identifier of the Ethernet device + * @return + * - NULL on error. + * - pointer to security context on success. + */ +void * +rte_eth_dev_get_sec_ctx(uint16_t port_id); + + +#include + +/** + * + * Retrieve a burst of input packets from a receive queue of an Ethernet + * device. The retrieved packets are stored in *rte_mbuf* structures whose + * pointers are supplied in the *rx_pkts* array. + * + * The rte_eth_rx_burst() function loops, parsing the RX ring of the + * receive queue, up to *nb_pkts* packets, and for each completed RX + * descriptor in the ring, it performs the following operations: + * + * - Initialize the *rte_mbuf* data structure associated with the + * RX descriptor according to the information provided by the NIC into + * that RX descriptor. + * + * - Store the *rte_mbuf* data structure into the next entry of the + * *rx_pkts* array. + * + * - Replenish the RX descriptor with a new *rte_mbuf* buffer + * allocated from the memory pool associated with the receive queue at + * initialization time. + * + * When retrieving an input packet that was scattered by the controller + * into multiple receive descriptors, the rte_eth_rx_burst() function + * appends the associated *rte_mbuf* buffers to the first buffer of the + * packet. + * + * The rte_eth_rx_burst() function returns the number of packets + * actually retrieved, which is the number of *rte_mbuf* data structures + * effectively supplied into the *rx_pkts* array. + * A return value equal to *nb_pkts* indicates that the RX queue contained + * at least *rx_pkts* packets, and this is likely to signify that other + * received packets remain in the input queue. Applications implementing + * a "retrieve as much received packets as possible" policy can check this + * specific case and keep invoking the rte_eth_rx_burst() function until + * a value less than *nb_pkts* is returned. + * + * This receive method has the following advantages: + * + * - It allows a run-to-completion network stack engine to retrieve and + * to immediately process received packets in a fast burst-oriented + * approach, avoiding the overhead of unnecessary intermediate packet + * queue/dequeue operations. + * + * - Conversely, it also allows an asynchronous-oriented processing + * method to retrieve bursts of received packets and to immediately + * queue them for further parallel processing by another logical core, + * for instance. However, instead of having received packets being + * individually queued by the driver, this approach allows the caller + * of the rte_eth_rx_burst() function to queue a burst of retrieved + * packets at a time and therefore dramatically reduce the cost of + * enqueue/dequeue operations per packet. + * + * - It allows the rte_eth_rx_burst() function of the driver to take + * advantage of burst-oriented hardware features (CPU cache, + * prefetch instructions, and so on) to minimize the number of CPU + * cycles per packet. + * + * To summarize, the proposed receive API enables many + * burst-oriented optimizations in both synchronous and asynchronous + * packet processing environments with no overhead in both cases. + * + * The rte_eth_rx_burst() function does not provide any error + * notification to avoid the corresponding overhead. As a hint, the + * upper-level application might check the status of the device link once + * being systematically returned a 0 value for a given number of tries. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param rx_pkts + * The address of an array of pointers to *rte_mbuf* structures that + * must be large enough to store *nb_pkts* pointers in it. + * @param nb_pkts + * The maximum number of packets to retrieve. + * @return + * The number of packets actually retrieved, which is the number + * of pointers to *rte_mbuf* structures effectively supplied to the + * *rx_pkts* array. + */ +static inline uint16_t +rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id, + struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + uint16_t nb_rx; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0); + + if (queue_id >= dev->data->nb_rx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id); + return 0; + } +#endif + nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], + rx_pkts, nb_pkts); + +#ifdef RTE_ETHDEV_RXTX_CALLBACKS + if (unlikely(dev->post_rx_burst_cbs[queue_id] != NULL)) { + struct rte_eth_rxtx_callback *cb = + dev->post_rx_burst_cbs[queue_id]; + + do { + nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx, + nb_pkts, cb->param); + cb = cb->next; + } while (cb != NULL); + } +#endif + + return nb_rx; +} + +/** + * Get the number of used descriptors of a rx queue + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue id on the specific port. + * @return + * The number of used descriptors in the specific queue, or: + * (-EINVAL) if *port_id* or *queue_id* is invalid + * (-ENOTSUP) if the device does not support this function + */ +static inline int +rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_count, -ENOTSUP); + if (queue_id >= dev->data->nb_rx_queues) + return -EINVAL; + + return (int)(*dev->dev_ops->rx_queue_count)(dev, queue_id); +} + +/** + * Check if the DD bit of the specific RX descriptor in the queue has been set + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue id on the specific port. + * @param offset + * The offset of the descriptor ID from tail. + * @return + * - (1) if the specific DD bit is set. + * - (0) if the specific DD bit is not set. + * - (-ENODEV) if *port_id* invalid. + * - (-ENOTSUP) if the device does not support this function + */ +static inline int +rte_eth_rx_descriptor_done(uint16_t port_id, uint16_t queue_id, uint16_t offset) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_done, -ENOTSUP); + return (*dev->dev_ops->rx_descriptor_done)( \ + dev->data->rx_queues[queue_id], offset); +} + +#define RTE_ETH_RX_DESC_AVAIL 0 /**< Desc available for hw. */ +#define RTE_ETH_RX_DESC_DONE 1 /**< Desc done, filled by hw. */ +#define RTE_ETH_RX_DESC_UNAVAIL 2 /**< Desc used by driver or hw. */ + +/** + * Check the status of a Rx descriptor in the queue + * + * It should be called in a similar context than the Rx function: + * - on a dataplane core + * - not concurrently on the same queue + * + * Since it's a dataplane function, no check is performed on port_id and + * queue_id. The caller must therefore ensure that the port is enabled + * and the queue is configured and running. + * + * Note: accessing to a random descriptor in the ring may trigger cache + * misses and have a performance impact. + * + * @param port_id + * A valid port identifier of the Ethernet device which. + * @param queue_id + * A valid Rx queue identifier on this port. + * @param offset + * The offset of the descriptor starting from tail (0 is the next + * packet to be received by the driver). + * + * @return + * - (RTE_ETH_RX_DESC_AVAIL): Descriptor is available for the hardware to + * receive a packet. + * - (RTE_ETH_RX_DESC_DONE): Descriptor is done, it is filled by hw, but + * not yet processed by the driver (i.e. in the receive queue). + * - (RTE_ETH_RX_DESC_UNAVAIL): Descriptor is unavailable, either hold by + * the driver and not yet returned to hw, or reserved by the hw. + * - (-EINVAL) bad descriptor offset. + * - (-ENOTSUP) if the device does not support this function. + * - (-ENODEV) bad port or queue (only if compiled with debug). + */ +static inline int +rte_eth_rx_descriptor_status(uint16_t port_id, uint16_t queue_id, + uint16_t offset) +{ + struct rte_eth_dev *dev; + void *rxq; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); +#endif + dev = &rte_eth_devices[port_id]; +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_rx_queues) + return -ENODEV; +#endif + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_status, -ENOTSUP); + rxq = dev->data->rx_queues[queue_id]; + + return (*dev->dev_ops->rx_descriptor_status)(rxq, offset); +} + +#define RTE_ETH_TX_DESC_FULL 0 /**< Desc filled for hw, waiting xmit. */ +#define RTE_ETH_TX_DESC_DONE 1 /**< Desc done, packet is transmitted. */ +#define RTE_ETH_TX_DESC_UNAVAIL 2 /**< Desc used by driver or hw. */ + +/** + * Check the status of a Tx descriptor in the queue. + * + * It should be called in a similar context than the Tx function: + * - on a dataplane core + * - not concurrently on the same queue + * + * Since it's a dataplane function, no check is performed on port_id and + * queue_id. The caller must therefore ensure that the port is enabled + * and the queue is configured and running. + * + * Note: accessing to a random descriptor in the ring may trigger cache + * misses and have a performance impact. + * + * @param port_id + * A valid port identifier of the Ethernet device which. + * @param queue_id + * A valid Tx queue identifier on this port. + * @param offset + * The offset of the descriptor starting from tail (0 is the place where + * the next packet will be send). + * + * @return + * - (RTE_ETH_TX_DESC_FULL) Descriptor is being processed by the hw, i.e. + * in the transmit queue. + * - (RTE_ETH_TX_DESC_DONE) Hardware is done with this descriptor, it can + * be reused by the driver. + * - (RTE_ETH_TX_DESC_UNAVAIL): Descriptor is unavailable, reserved by the + * driver or the hardware. + * - (-EINVAL) bad descriptor offset. + * - (-ENOTSUP) if the device does not support this function. + * - (-ENODEV) bad port or queue (only if compiled with debug). + */ +static inline int rte_eth_tx_descriptor_status(uint16_t port_id, + uint16_t queue_id, uint16_t offset) +{ + struct rte_eth_dev *dev; + void *txq; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); +#endif + dev = &rte_eth_devices[port_id]; +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_tx_queues) + return -ENODEV; +#endif + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_descriptor_status, -ENOTSUP); + txq = dev->data->tx_queues[queue_id]; + + return (*dev->dev_ops->tx_descriptor_status)(txq, offset); +} + +/** + * Send a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_burst() function is invoked to transmit output packets + * on the output queue *queue_id* of the Ethernet device designated by its + * *port_id*. + * The *nb_pkts* parameter is the number of packets to send which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them + * allocated from a pool created with rte_pktmbuf_pool_create(). + * The rte_eth_tx_burst() function loops, sending *nb_pkts* packets, + * up to the number of transmit descriptors available in the TX ring of the + * transmit queue. + * For each packet to send, the rte_eth_tx_burst() function performs + * the following operations: + * + * - Pick up the next available descriptor in the transmit ring. + * + * - Free the network buffer previously sent with that descriptor, if any. + * + * - Initialize the transmit descriptor with the information provided + * in the *rte_mbuf data structure. + * + * In the case of a segmented packet composed of a list of *rte_mbuf* buffers, + * the rte_eth_tx_burst() function uses several transmit descriptors + * of the ring. + * + * The rte_eth_tx_burst() function returns the number of packets it + * actually sent. A return value equal to *nb_pkts* means that all packets + * have been sent, and this is likely to signify that other output packets + * could be immediately transmitted again. Applications that implement a + * "send as many packets to transmit as possible" policy can check this + * specific case and keep invoking the rte_eth_tx_burst() function until + * a value less than *nb_pkts* is returned. + * + * It is the responsibility of the rte_eth_tx_burst() function to + * transparently free the memory buffers of packets previously sent. + * This feature is driven by the *tx_free_thresh* value supplied to the + * rte_eth_dev_configure() function at device configuration time. + * When the number of free TX descriptors drops below this threshold, the + * rte_eth_tx_burst() function must [attempt to] free the *rte_mbuf* buffers + * of those packets whose transmission was effectively completed. + * + * If the PMD is DEV_TX_OFFLOAD_MT_LOCKFREE capable, multiple threads can + * invoke this function concurrently on the same tx queue without SW lock. + * @see rte_eth_dev_info_get, struct rte_eth_txconf::offloads + * + * @see rte_eth_tx_prepare to perform some prior checks or adjustments + * for offloads. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param tx_pkts + * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures + * which contain the output packets. + * @param nb_pkts + * The maximum number of packets to transmit. + * @return + * The number of output packets actually stored in transmit descriptors of + * the transmit ring. The return value can be less than the value of the + * *tx_pkts* parameter when the transmit ring is full or has been filled up. + */ +static inline uint16_t +rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0); + + if (queue_id >= dev->data->nb_tx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id); + return 0; + } +#endif + +#ifdef RTE_ETHDEV_RXTX_CALLBACKS + struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id]; + + if (unlikely(cb != NULL)) { + do { + nb_pkts = cb->fn.tx(port_id, queue_id, tx_pkts, nb_pkts, + cb->param); + cb = cb->next; + } while (cb != NULL); + } +#endif + + return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); +} + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prepare() function is invoked to prepare output packets to be + * transmitted on the output queue *queue_id* of the Ethernet device designated + * by its *port_id*. + * The *nb_pkts* parameter is the number of packets to be prepared which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them + * allocated from a pool created with rte_pktmbuf_pool_create(). + * For each packet to send, the rte_eth_tx_prepare() function performs + * the following operations: + * + * - Check if packet meets devices requirements for tx offloads. + * + * - Check limitations about number of segments. + * + * - Check additional requirements when debug is enabled. + * + * - Update and/or reset required checksums when tx offload is set for packet. + * + * Since this function can modify packet data, provided mbufs must be safely + * writable (e.g. modified data cannot be in shared segment). + * + * The rte_eth_tx_prepare() function returns the number of packets ready to be + * sent. A return value equal to *nb_pkts* means that all packets are valid and + * ready to be sent, otherwise stops processing on the first invalid packet and + * leaves the rest packets untouched. + * + * When this functionality is not implemented in the driver, all packets are + * are returned untouched. + * + * @param port_id + * The port identifier of the Ethernet device. + * The value must be a valid port id. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param tx_pkts + * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures + * which contain the output packets. + * @param nb_pkts + * The maximum number of packets to process. + * @return + * The number of packets correct and ready to be sent. The return value can be + * less than the value of the *tx_pkts* parameter when some packet doesn't + * meet devices requirements with rte_errno set appropriately: + * - -EINVAL: offload flags are not correctly set + * - -ENOTSUP: the offload feature is not supported by the hardware + * + */ + +#ifndef RTE_ETHDEV_TX_PREPARE_NOOP + +static inline uint16_t +rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct rte_eth_dev *dev; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_ETHDEV_LOG(ERR, "Invalid TX port_id=%u\n", port_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_tx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + if (!dev->tx_pkt_prepare) + return nb_pkts; + + return (*dev->tx_pkt_prepare)(dev->data->tx_queues[queue_id], + tx_pkts, nb_pkts); +} + +#else + +/* + * Native NOOP operation for compilation targets which doesn't require any + * preparations steps, and functional NOOP may introduce unnecessary performance + * drop. + * + * Generally this is not a good idea to turn it on globally and didn't should + * be used if behavior of tx_preparation can change. + */ + +static inline uint16_t +rte_eth_tx_prepare(__rte_unused uint16_t port_id, + __rte_unused uint16_t queue_id, + __rte_unused struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + return nb_pkts; +} + +#endif + +/** + * Send any packets queued up for transmission on a port and HW queue + * + * This causes an explicit flush of packets previously buffered via the + * rte_eth_tx_buffer() function. It returns the number of packets successfully + * sent to the NIC, and calls the error callback for any unsent packets. Unless + * explicitly set up otherwise, the default callback simply frees the unsent + * packets back to the owning mempool. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param buffer + * Buffer of packets to be transmit. + * @return + * The number of packets successfully sent to the Ethernet device. The error + * callback is called for any packets which could not be sent. + */ +static inline uint16_t +rte_eth_tx_buffer_flush(uint16_t port_id, uint16_t queue_id, + struct rte_eth_dev_tx_buffer *buffer) +{ + uint16_t sent; + uint16_t to_send = buffer->length; + + if (to_send == 0) + return 0; + + sent = rte_eth_tx_burst(port_id, queue_id, buffer->pkts, to_send); + + buffer->length = 0; + + /* All packets sent, or to be dealt with by callback below */ + if (unlikely(sent != to_send)) + buffer->error_callback(&buffer->pkts[sent], + (uint16_t)(to_send - sent), + buffer->error_userdata); + + return sent; +} + +/** + * Buffer a single packet for future transmission on a port and queue + * + * This function takes a single mbuf/packet and buffers it for later + * transmission on the particular port and queue specified. Once the buffer is + * full of packets, an attempt will be made to transmit all the buffered + * packets. In case of error, where not all packets can be transmitted, a + * callback is called with the unsent packets as a parameter. If no callback + * is explicitly set up, the unsent packets are just freed back to the owning + * mempool. The function returns the number of packets actually sent i.e. + * 0 if no buffer flush occurred, otherwise the number of packets successfully + * flushed + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param buffer + * Buffer used to collect packets to be sent. + * @param tx_pkt + * Pointer to the packet mbuf to be sent. + * @return + * 0 = packet has been buffered for later transmission + * N > 0 = packet has been buffered, and the buffer was subsequently flushed, + * causing N packets to be sent, and the error callback to be called for + * the rest. + */ +static __rte_always_inline uint16_t +rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id, + struct rte_eth_dev_tx_buffer *buffer, struct rte_mbuf *tx_pkt) +{ + buffer->pkts[buffer->length++] = tx_pkt; + if (buffer->length < buffer->size) + return 0; + + return rte_eth_tx_buffer_flush(port_id, queue_id, buffer); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHDEV_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h new file mode 100644 index 00000000..33d12b3a --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_core.h @@ -0,0 +1,625 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _RTE_ETHDEV_CORE_H_ +#define _RTE_ETHDEV_CORE_H_ + +/** + * @file + * + * RTE Ethernet Device internal header. + * + * This header contains internal data types. But they are still part of the + * public API because they are used by inline functions in the published API. + * + * Applications should not use these directly. + * + */ + +struct rte_eth_dev_callback; +/** @internal Structure to keep track of registered callbacks */ +TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback); + +/* + * Definitions of all functions exported by an Ethernet driver through the + * the generic structure of type *eth_dev_ops* supplied in the *rte_eth_dev* + * structure associated with an Ethernet device. + */ +struct rte_eth_dev; + +typedef int (*eth_dev_configure_t)(struct rte_eth_dev *dev); +/**< @internal Ethernet device configuration. */ + +typedef int (*eth_dev_start_t)(struct rte_eth_dev *dev); +/**< @internal Function used to start a configured Ethernet device. */ + +typedef void (*eth_dev_stop_t)(struct rte_eth_dev *dev); +/**< @internal Function used to stop a configured Ethernet device. */ + +typedef int (*eth_dev_set_link_up_t)(struct rte_eth_dev *dev); +/**< @internal Function used to link up a configured Ethernet device. */ + +typedef int (*eth_dev_set_link_down_t)(struct rte_eth_dev *dev); +/**< @internal Function used to link down a configured Ethernet device. */ + +typedef void (*eth_dev_close_t)(struct rte_eth_dev *dev); +/**< @internal Function used to close a configured Ethernet device. */ + +typedef int (*eth_dev_reset_t)(struct rte_eth_dev *dev); +/** <@internal Function used to reset a configured Ethernet device. */ + +typedef int (*eth_is_removed_t)(struct rte_eth_dev *dev); +/**< @internal Function used to detect an Ethernet device removal. */ + +typedef void (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to enable the RX promiscuous mode of an Ethernet device. */ + +typedef void (*eth_promiscuous_disable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to disable the RX promiscuous mode of an Ethernet device. */ + +typedef void (*eth_allmulticast_enable_t)(struct rte_eth_dev *dev); +/**< @internal Enable the receipt of all multicast packets by an Ethernet device. */ + +typedef void (*eth_allmulticast_disable_t)(struct rte_eth_dev *dev); +/**< @internal Disable the receipt of all multicast packets by an Ethernet device. */ + +typedef int (*eth_link_update_t)(struct rte_eth_dev *dev, + int wait_to_complete); +/**< @internal Get link speed, duplex mode and state (up/down) of an Ethernet device. */ + +typedef int (*eth_stats_get_t)(struct rte_eth_dev *dev, + struct rte_eth_stats *igb_stats); +/**< @internal Get global I/O statistics of an Ethernet device. */ + +typedef void (*eth_stats_reset_t)(struct rte_eth_dev *dev); +/**< @internal Reset global I/O statistics of an Ethernet device to 0. */ + +typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev, + struct rte_eth_xstat *stats, unsigned n); +/**< @internal Get extended stats of an Ethernet device. */ + +typedef int (*eth_xstats_get_by_id_t)(struct rte_eth_dev *dev, + const uint64_t *ids, + uint64_t *values, + unsigned int n); +/**< @internal Get extended stats of an Ethernet device. */ + +typedef void (*eth_xstats_reset_t)(struct rte_eth_dev *dev); +/**< @internal Reset extended stats of an Ethernet device. */ + +typedef int (*eth_xstats_get_names_t)(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, unsigned size); +/**< @internal Get names of extended stats of an Ethernet device. */ + +typedef int (*eth_xstats_get_names_by_id_t)(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, const uint64_t *ids, + unsigned int size); +/**< @internal Get names of extended stats of an Ethernet device. */ + +typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev, + uint16_t queue_id, + uint8_t stat_idx, + uint8_t is_rx); +/**< @internal Set a queue statistics mapping for a tx/rx queue of an Ethernet device. */ + +typedef void (*eth_dev_infos_get_t)(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +/**< @internal Get specific informations of an Ethernet device. */ + +typedef const uint32_t *(*eth_dev_supported_ptypes_get_t)(struct rte_eth_dev *dev); +/**< @internal Get supported ptypes of an Ethernet device. */ + +typedef int (*eth_queue_start_t)(struct rte_eth_dev *dev, + uint16_t queue_id); +/**< @internal Start rx and tx of a queue of an Ethernet device. */ + +typedef int (*eth_queue_stop_t)(struct rte_eth_dev *dev, + uint16_t queue_id); +/**< @internal Stop rx and tx of a queue of an Ethernet device. */ + +typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + uint16_t nb_rx_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); +/**< @internal Set up a receive queue of an Ethernet device. */ + +typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev, + uint16_t tx_queue_id, + uint16_t nb_tx_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); +/**< @internal Setup a transmit queue of an Ethernet device. */ + +typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Enable interrupt of a receive queue of an Ethernet device. */ + +typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Disable interrupt of a receive queue of an Ethernet device. */ + +typedef void (*eth_queue_release_t)(void *queue); +/**< @internal Release memory resources allocated by given RX/TX queue. */ + +typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Get number of used descriptors on a receive queue. */ + +typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset); +/**< @internal Check DD bit of specific RX descriptor */ + +typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset); +/**< @internal Check the status of a Rx descriptor */ + +typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset); +/**< @internal Check the status of a Tx descriptor */ + +typedef int (*eth_fw_version_get_t)(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); +/**< @internal Get firmware information of an Ethernet device. */ + +typedef int (*eth_tx_done_cleanup_t)(void *txq, uint32_t free_cnt); +/**< @internal Force mbufs to be from TX ring. */ + +typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo); + +typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev, + uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo); + +typedef int (*mtu_set_t)(struct rte_eth_dev *dev, uint16_t mtu); +/**< @internal Set MTU. */ + +typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev, + uint16_t vlan_id, + int on); +/**< @internal filtering of a VLAN Tag Identifier by an Ethernet device. */ + +typedef int (*vlan_tpid_set_t)(struct rte_eth_dev *dev, + enum rte_vlan_type type, uint16_t tpid); +/**< @internal set the outer/inner VLAN-TPID by an Ethernet device. */ + +typedef int (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask); +/**< @internal set VLAN offload function by an Ethernet device. */ + +typedef int (*vlan_pvid_set_t)(struct rte_eth_dev *dev, + uint16_t vlan_id, + int on); +/**< @internal set port based TX VLAN insertion by an Ethernet device. */ + +typedef void (*vlan_strip_queue_set_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + int on); +/**< @internal VLAN stripping enable/disable by an queue of Ethernet device. */ + +typedef uint16_t (*eth_rx_burst_t)(void *rxq, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +/**< @internal Retrieve input packets from a receive queue of an Ethernet device. */ + +typedef uint16_t (*eth_tx_burst_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Send output packets on a transmit queue of an Ethernet device. */ + +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + +typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +/**< @internal Get current flow control parameter on an Ethernet device */ + +typedef int (*flow_ctrl_set_t)(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +/**< @internal Setup flow control parameter on an Ethernet device */ + +typedef int (*priority_flow_ctrl_set_t)(struct rte_eth_dev *dev, + struct rte_eth_pfc_conf *pfc_conf); +/**< @internal Setup priority flow control parameter on an Ethernet device */ + +typedef int (*reta_update_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +/**< @internal Update RSS redirection table on an Ethernet device */ + +typedef int (*reta_query_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +/**< @internal Query RSS redirection table on an Ethernet device */ + +typedef int (*rss_hash_update_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +/**< @internal Update RSS hash configuration of an Ethernet device */ + +typedef int (*rss_hash_conf_get_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +/**< @internal Get current RSS hash configuration of an Ethernet device */ + +typedef int (*eth_dev_led_on_t)(struct rte_eth_dev *dev); +/**< @internal Turn on SW controllable LED on an Ethernet device */ + +typedef int (*eth_dev_led_off_t)(struct rte_eth_dev *dev); +/**< @internal Turn off SW controllable LED on an Ethernet device */ + +typedef void (*eth_mac_addr_remove_t)(struct rte_eth_dev *dev, uint32_t index); +/**< @internal Remove MAC address from receive address register */ + +typedef int (*eth_mac_addr_add_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint32_t index, + uint32_t vmdq); +/**< @internal Set a MAC address into Receive Address Address Register */ + +typedef int (*eth_mac_addr_set_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr); +/**< @internal Set a MAC address into Receive Address Address Register */ + +typedef int (*eth_uc_hash_table_set_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint8_t on); +/**< @internal Set a Unicast Hash bitmap */ + +typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev, + uint8_t on); +/**< @internal Set all Unicast Hash bitmap */ + +typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t tx_rate); +/**< @internal Set queue TX rate */ + +typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, + uint8_t on); +/**< @internal Add a traffic mirroring rule on an Ethernet device */ + +typedef int (*eth_mirror_rule_reset_t)(struct rte_eth_dev *dev, + uint8_t rule_id); +/**< @internal Remove a traffic mirroring rule on an Ethernet device */ + +typedef int (*eth_udp_tunnel_port_add_t)(struct rte_eth_dev *dev, + struct rte_eth_udp_tunnel *tunnel_udp); +/**< @internal Add tunneling UDP port */ + +typedef int (*eth_udp_tunnel_port_del_t)(struct rte_eth_dev *dev, + struct rte_eth_udp_tunnel *tunnel_udp); +/**< @internal Delete tunneling UDP port */ + +typedef int (*eth_set_mc_addr_list_t)(struct rte_eth_dev *dev, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr); +/**< @internal set the list of multicast addresses on an Ethernet device */ + +typedef int (*eth_timesync_enable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to enable IEEE1588/802.1AS timestamping. */ + +typedef int (*eth_timesync_disable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to disable IEEE1588/802.1AS timestamping. */ + +typedef int (*eth_timesync_read_rx_timestamp_t)(struct rte_eth_dev *dev, + struct timespec *timestamp, + uint32_t flags); +/**< @internal Function used to read an RX IEEE1588/802.1AS timestamp. */ + +typedef int (*eth_timesync_read_tx_timestamp_t)(struct rte_eth_dev *dev, + struct timespec *timestamp); +/**< @internal Function used to read a TX IEEE1588/802.1AS timestamp. */ + +typedef int (*eth_timesync_adjust_time)(struct rte_eth_dev *dev, int64_t); +/**< @internal Function used to adjust the device clock */ + +typedef int (*eth_timesync_read_time)(struct rte_eth_dev *dev, + struct timespec *timestamp); +/**< @internal Function used to get time from the device clock. */ + +typedef int (*eth_timesync_write_time)(struct rte_eth_dev *dev, + const struct timespec *timestamp); +/**< @internal Function used to get time from the device clock */ + +typedef int (*eth_get_reg_t)(struct rte_eth_dev *dev, + struct rte_dev_reg_info *info); +/**< @internal Retrieve registers */ + +typedef int (*eth_get_eeprom_length_t)(struct rte_eth_dev *dev); +/**< @internal Retrieve eeprom size */ + +typedef int (*eth_get_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Retrieve eeprom data */ + +typedef int (*eth_set_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Program eeprom data */ + +typedef int (*eth_get_module_info_t)(struct rte_eth_dev *dev, + struct rte_eth_dev_module_info *modinfo); +/**< @internal Retrieve type and size of plugin module eeprom */ + +typedef int (*eth_get_module_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Retrieve plugin module eeprom data */ + +typedef int (*eth_l2_tunnel_eth_type_conf_t) + (struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel); +/**< @internal config l2 tunnel ether type */ + +typedef int (*eth_l2_tunnel_offload_set_t) + (struct rte_eth_dev *dev, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + uint32_t mask, + uint8_t en); +/**< @internal enable/disable the l2 tunnel offload functions */ + + +typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev, + enum rte_filter_type filter_type, + enum rte_filter_op filter_op, + void *arg); +/**< @internal Take operations to assigned filter type on an Ethernet device */ + +typedef int (*eth_tm_ops_get_t)(struct rte_eth_dev *dev, void *ops); +/**< @internal Get Traffic Management (TM) operations on an Ethernet device */ + +typedef int (*eth_mtr_ops_get_t)(struct rte_eth_dev *dev, void *ops); +/**< @internal Get Trafffic Metering and Policing (MTR) operations */ + +typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev, + struct rte_eth_dcb_info *dcb_info); +/**< @internal Get dcb information on an Ethernet device */ + +typedef int (*eth_pool_ops_supported_t)(struct rte_eth_dev *dev, + const char *pool); +/**< @internal Test if a port supports specific mempool ops */ + +/** + * @internal A structure containing the functions exported by an Ethernet driver. + */ +struct eth_dev_ops { + eth_dev_configure_t dev_configure; /**< Configure device. */ + eth_dev_start_t dev_start; /**< Start device. */ + eth_dev_stop_t dev_stop; /**< Stop device. */ + eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */ + eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */ + eth_dev_close_t dev_close; /**< Close device. */ + eth_dev_reset_t dev_reset; /**< Reset device. */ + eth_link_update_t link_update; /**< Get device link state. */ + eth_is_removed_t is_removed; + /**< Check if the device was physically removed. */ + + eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ + eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */ + eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */ + eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OFF. */ + eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address. */ + eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address. */ + eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address. */ + eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs. */ + mtu_set_t mtu_set; /**< Set MTU. */ + + eth_stats_get_t stats_get; /**< Get generic device statistics. */ + eth_stats_reset_t stats_reset; /**< Reset generic device statistics. */ + eth_xstats_get_t xstats_get; /**< Get extended device statistics. */ + eth_xstats_reset_t xstats_reset; /**< Reset extended device statistics. */ + eth_xstats_get_names_t xstats_get_names; + /**< Get names of extended statistics. */ + eth_queue_stats_mapping_set_t queue_stats_mapping_set; + /**< Configure per queue stat counter mapping. */ + + eth_dev_infos_get_t dev_infos_get; /**< Get device info. */ + eth_rxq_info_get_t rxq_info_get; /**< retrieve RX queue information. */ + eth_txq_info_get_t txq_info_get; /**< retrieve TX queue information. */ + eth_fw_version_get_t fw_version_get; /**< Get firmware version. */ + eth_dev_supported_ptypes_get_t dev_supported_ptypes_get; + /**< Get packet types supported and identified by device. */ + + vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ + vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */ + vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on queue. */ + vlan_offload_set_t vlan_offload_set; /**< Set VLAN Offload. */ + vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion. */ + + eth_queue_start_t rx_queue_start;/**< Start RX for a queue. */ + eth_queue_stop_t rx_queue_stop; /**< Stop RX for a queue. */ + eth_queue_start_t tx_queue_start;/**< Start TX for a queue. */ + eth_queue_stop_t tx_queue_stop; /**< Stop TX for a queue. */ + eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue. */ + eth_queue_release_t rx_queue_release; /**< Release RX queue. */ + eth_rx_queue_count_t rx_queue_count; + /**< Get the number of used RX descriptors. */ + eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit. */ + eth_rx_descriptor_status_t rx_descriptor_status; + /**< Check the status of a Rx descriptor. */ + eth_tx_descriptor_status_t tx_descriptor_status; + /**< Check the status of a Tx descriptor. */ + eth_rx_enable_intr_t rx_queue_intr_enable; /**< Enable Rx queue interrupt. */ + eth_rx_disable_intr_t rx_queue_intr_disable; /**< Disable Rx queue interrupt. */ + eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue. */ + eth_queue_release_t tx_queue_release; /**< Release TX queue. */ + eth_tx_done_cleanup_t tx_done_cleanup;/**< Free tx ring mbufs */ + + eth_dev_led_on_t dev_led_on; /**< Turn on LED. */ + eth_dev_led_off_t dev_led_off; /**< Turn off LED. */ + + flow_ctrl_get_t flow_ctrl_get; /**< Get flow control. */ + flow_ctrl_set_t flow_ctrl_set; /**< Setup flow control. */ + priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control. */ + + eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array. */ + eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap. */ + + eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule. */ + eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule. */ + + eth_udp_tunnel_port_add_t udp_tunnel_port_add; /** Add UDP tunnel port. */ + eth_udp_tunnel_port_del_t udp_tunnel_port_del; /** Del UDP tunnel port. */ + eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; + /** Config ether type of l2 tunnel. */ + eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; + /** Enable/disable l2 tunnel offload functions. */ + + eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit. */ + + rss_hash_update_t rss_hash_update; /** Configure RSS hash protocols. */ + rss_hash_conf_get_t rss_hash_conf_get; /** Get current RSS hash configuration. */ + reta_update_t reta_update; /** Update redirection table. */ + reta_query_t reta_query; /** Query redirection table. */ + + eth_get_reg_t get_reg; /**< Get registers. */ + eth_get_eeprom_length_t get_eeprom_length; /**< Get eeprom length. */ + eth_get_eeprom_t get_eeprom; /**< Get eeprom data. */ + eth_set_eeprom_t set_eeprom; /**< Set eeprom. */ + + eth_get_module_info_t get_module_info; + /** Get plugin module eeprom attribute. */ + eth_get_module_eeprom_t get_module_eeprom; + /** Get plugin module eeprom data. */ + + eth_filter_ctrl_t filter_ctrl; /**< common filter control. */ + + eth_get_dcb_info get_dcb_info; /** Get DCB information. */ + + eth_timesync_enable_t timesync_enable; + /** Turn IEEE1588/802.1AS timestamping on. */ + eth_timesync_disable_t timesync_disable; + /** Turn IEEE1588/802.1AS timestamping off. */ + eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp; + /** Read the IEEE1588/802.1AS RX timestamp. */ + eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp; + /** Read the IEEE1588/802.1AS TX timestamp. */ + eth_timesync_adjust_time timesync_adjust_time; /** Adjust the device clock. */ + eth_timesync_read_time timesync_read_time; /** Get the device clock time. */ + eth_timesync_write_time timesync_write_time; /** Set the device clock time. */ + + eth_xstats_get_by_id_t xstats_get_by_id; + /**< Get extended device statistic values by ID. */ + eth_xstats_get_names_by_id_t xstats_get_names_by_id; + /**< Get name of extended device statistics by ID. */ + + eth_tm_ops_get_t tm_ops_get; + /**< Get Traffic Management (TM) operations. */ + + eth_mtr_ops_get_t mtr_ops_get; + /**< Get Traffic Metering and Policing (MTR) operations. */ + + eth_pool_ops_supported_t pool_ops_supported; + /**< Test if a port supports specific mempool ops */ +}; + +/** + * @internal + * Structure used to hold information about the callbacks to be called for a + * queue on RX and TX. + */ +struct rte_eth_rxtx_callback { + struct rte_eth_rxtx_callback *next; + union{ + rte_rx_callback_fn rx; + rte_tx_callback_fn tx; + } fn; + void *param; +}; + +/** + * @internal + * The generic data structure associated with each ethernet device. + * + * Pointers to burst-oriented packet receive and transmit functions are + * located at the beginning of the structure, along with the pointer to + * where all the data elements for the particular device are stored in shared + * memory. This split allows the function pointer and driver data to be per- + * process, while the actual configuration data for the device is shared. + */ +struct rte_eth_dev { + eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ + eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */ + struct rte_eth_dev_data *data; /**< Pointer to device data */ + const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ + struct rte_device *device; /**< Backing device */ + struct rte_intr_handle *intr_handle; /**< Device interrupt handle */ + /** User application callbacks for NIC interrupts */ + struct rte_eth_dev_cb_list link_intr_cbs; + /** + * User-supplied functions called from rx_burst to post-process + * received packets before passing them to the user + */ + struct rte_eth_rxtx_callback *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; + /** + * User-supplied functions called from tx_burst to pre-process + * received packets before passing them to the driver for transmission. + */ + struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; + enum rte_eth_dev_state state; /**< Flag indicating the port state */ + void *security_ctx; /**< Context for security ops */ +} __rte_cache_aligned; + +struct rte_eth_dev_sriov; +struct rte_eth_dev_owner; + +/** + * @internal + * The data part, with no function pointers, associated with each ethernet device. + * + * This structure is safe to place in shared memory to be common among different + * processes in a multi-process configuration. + */ +struct rte_eth_dev_data { + char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */ + + void **rx_queues; /**< Array of pointers to RX queues. */ + void **tx_queues; /**< Array of pointers to TX queues. */ + uint16_t nb_rx_queues; /**< Number of RX queues. */ + uint16_t nb_tx_queues; /**< Number of TX queues. */ + + struct rte_eth_dev_sriov sriov; /**< SRIOV data */ + + void *dev_private; /**< PMD-specific private data */ + + struct rte_eth_link dev_link; + /**< Link-level information & status */ + + struct rte_eth_conf dev_conf; /**< Configuration applied to device. */ + uint16_t mtu; /**< Maximum Transmission Unit. */ + + uint32_t min_rx_buf_size; + /**< Common rx buffer size handled by all queues */ + + uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */ + struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */ + uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR]; + /** bitmap array of associating Ethernet MAC addresses to pools */ + struct ether_addr* hash_mac_addrs; + /** Device Ethernet MAC addresses of hash filtering. */ + uint16_t port_id; /**< Device [external] port identifier. */ + __extension__ + uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */ + scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */ + all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */ + dev_started : 1, /**< Device state: STARTED(1) / STOPPED(0). */ + lro : 1; /**< RX LRO is ON(1) / OFF(0) */ + uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT]; + /** Queues state: STARTED(1) / STOPPED(0) */ + uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT]; + /** Queues state: STARTED(1) / STOPPED(0) */ + uint32_t dev_flags; /**< Capabilities */ + enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ + int numa_node; /**< NUMA node connection */ + struct rte_vlan_filter_conf vlan_filter_conf; + /**< VLAN filter configuration. */ + struct rte_eth_dev_owner owner; /**< The port owner. */ +} __rte_cache_aligned; + +/** + * @internal + * The pool of *rte_eth_dev* structures. The size of the pool + * is configured at compile-time in the file. + */ +extern struct rte_eth_dev rte_eth_devices[]; + +#endif /* _RTE_ETHDEV_CORE_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h new file mode 100644 index 00000000..c6d9bc1a --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_driver.h @@ -0,0 +1,357 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _RTE_ETHDEV_DRIVER_H_ +#define _RTE_ETHDEV_DRIVER_H_ + +/** + * @file + * + * RTE Ethernet Device PMD API + * + * These APIs for the use from Ethernet drivers, user applications shouldn't + * use them. + * + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @internal + * Returns a ethdev slot specified by the unique identifier name. + * + * @param name + * The pointer to the Unique identifier name for each Ethernet device + * @return + * - The pointer to the ethdev slot, on success. NULL on error + */ +struct rte_eth_dev *rte_eth_dev_allocated(const char *name); + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param name Unique identifier name for each Ethernet device + * @return + * - Slot in the rte_dev_devices array for a new device; + */ +struct rte_eth_dev *rte_eth_dev_allocate(const char *name); + +/** + * @internal + * Attach to the ethdev already initialized by the primary + * process. + * + * @param name Ethernet device's name. + * @return + * - Success: Slot in the rte_dev_devices array for attached + * device. + * - Error: Null pointer. + */ +struct rte_eth_dev *rte_eth_dev_attach_secondary(const char *name); + +/** + * @internal + * Release the specified ethdev port. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. + * @return + * - 0 on success, negative on error + */ +int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); + +/** + * @internal + * Release device queues and clear its configuration to force the user + * application to reconfigure it. It is for internal use only. + * + * @param dev + * Pointer to struct rte_eth_dev. + * + * @return + * void + */ +void _rte_eth_dev_reset(struct rte_eth_dev *dev); + +/** + * @internal Executes all the user application registered callbacks for + * the specific device. It is for DPDK internal user only. User + * application should not call it directly. + * + * @param dev + * Pointer to struct rte_eth_dev. + * @param event + * Eth device interrupt event type. + * @param ret_param + * To pass data back to user application. + * This allows the user application to decide if a particular function + * is permitted or not. + * + * @return + * int + */ +int _rte_eth_dev_callback_process(struct rte_eth_dev *dev, + enum rte_eth_event_type event, void *ret_param); + +/** + * @internal + * This is the last step of device probing. + * It must be called after a port is allocated and initialized successfully. + * + * The notification RTE_ETH_EVENT_NEW is sent to other entities + * (libraries and applications). + * The state is set as RTE_ETH_DEV_ATTACHED. + * + * @param dev + * New ethdev port. + */ +void rte_eth_dev_probing_finish(struct rte_eth_dev *dev); + +/** + * Create memzone for HW rings. + * malloc can't be used as the physical address is needed. + * If the memzone is already created, then this function returns a ptr + * to the old one. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure + * @param name + * The name of the memory zone + * @param queue_id + * The index of the queue to add to name + * @param size + * The sizeof of the memory area + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + */ +const struct rte_memzone * +rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name, + uint16_t queue_id, size_t size, + unsigned align, int socket_id); + +/** + * @internal + * Atomically set the link status for the specific device. + * It is for use by DPDK device driver use only. + * User applications should not call it + * + * @param dev + * Pointer to struct rte_eth_dev. + * @param link + * New link status value. + * @return + * Same convention as eth_link_update operation. + * 0 if link up status has changed + * -1 if link up status was unchanged + */ +static inline int +rte_eth_linkstatus_set(struct rte_eth_dev *dev, + const struct rte_eth_link *new_link) +{ + volatile uint64_t *dev_link + = (volatile uint64_t *)&(dev->data->dev_link); + union { + uint64_t val64; + struct rte_eth_link link; + } orig; + + RTE_BUILD_BUG_ON(sizeof(*new_link) != sizeof(uint64_t)); + + orig.val64 = rte_atomic64_exchange(dev_link, + *(const uint64_t *)new_link); + + return (orig.link.link_status == new_link->link_status) ? -1 : 0; +} + +/** + * @internal + * Atomically get the link speed and status. + * + * @param dev + * Pointer to struct rte_eth_dev. + * @param link + * link status value. + */ +static inline void +rte_eth_linkstatus_get(const struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + volatile uint64_t *src = (uint64_t *)&(dev->data->dev_link); + uint64_t *dst = (uint64_t *)link; + + RTE_BUILD_BUG_ON(sizeof(*link) != sizeof(uint64_t)); + +#ifdef __LP64__ + /* if cpu arch has 64 bit unsigned lon then implicitly atomic */ + *dst = *src; +#else + /* can't use rte_atomic64_read because it returns signed int */ + do { + *dst = *src; + } while (!rte_atomic64_cmpset(src, *dst, *dst)); +#endif +} + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Allocate an unique switch domain identifier. + * + * A pool of switch domain identifiers which can be allocated on request. This + * will enabled devices which support the concept of switch domains to request + * a switch domain id which is guaranteed to be unique from other devices + * running in the same process. + * + * @param domain_id + * switch domain identifier parameter to pass back to application + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_switch_domain_alloc(uint16_t *domain_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Free switch domain. + * + * Return a switch domain identifier to the pool of free identifiers after it is + * no longer in use by device. + * + * @param domain_id + * switch domain identifier to free + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_switch_domain_free(uint16_t domain_id); + +/** Generic Ethernet device arguments */ +struct rte_eth_devargs { + uint16_t ports[RTE_MAX_ETHPORTS]; + /** port/s number to enable on a multi-port single function */ + uint16_t nb_ports; + /** number of ports in ports field */ + uint16_t representor_ports[RTE_MAX_ETHPORTS]; + /** representor port/s identifier to enable on device */ + uint16_t nb_representor_ports; + /** number of ports in representor port field */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * PMD helper function to parse ethdev arguments + * + * @param devargs + * device arguments + * @param eth_devargs + * parsed ethdev specific arguments. + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_devargs_parse(const char *devargs, struct rte_eth_devargs *eth_devargs); + + +typedef int (*ethdev_init_t)(struct rte_eth_dev *ethdev, void *init_params); +typedef int (*ethdev_bus_specific_init)(struct rte_eth_dev *ethdev, + void *bus_specific_init_params); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * PMD helper function for the creation of a new ethdev ports. + * + * @param device + * rte_device handle. + * @param name + * port name. + * @param priv_data_size + * size of private data required for port. + * @param bus_specific_init + * port bus specific initialisation callback function + * @param bus_init_params + * port bus specific initialisation parameters + * @param ethdev_init + * device specific port initialization callback function + * @param init_params + * port initialisation parameters + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_dev_create(struct rte_device *device, const char *name, + size_t priv_data_size, + ethdev_bus_specific_init bus_specific_init, void *bus_init_params, + ethdev_init_t ethdev_init, void *init_params); + + +typedef int (*ethdev_uninit_t)(struct rte_eth_dev *ethdev); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * PMD helper function for cleaing up the resources of a ethdev port on it's + * destruction. + * + * @param ethdev + * ethdev handle of port. + * @param ethdev_uninit + * device specific port un-initialise callback function + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_dev_destroy(struct rte_eth_dev *ethdev, ethdev_uninit_t ethdev_uninit); + +/** + * PMD helper function to check if keeping CRC is requested + * + * @note + * When CRC_STRIP offload flag is removed and default behavior switch to + * strip CRC, as planned, this helper function is not that useful and will be + * removed. In PMDs this function will be replaced with check: + * if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) + * + * @param rx_offloads + * offload bits to be applied + * + * @return + * Return positive if keeping CRC is requested, + * zero if stripping CRC is requested + */ +static inline int +rte_eth_dev_must_keep_crc(uint64_t rx_offloads) +{ + if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP) + return 0; + + /* no KEEP_CRC or CRC_STRIP offload flags means keep CRC */ + return 1; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHDEV_DRIVER_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_pci.h b/lib/librte_ethdev/rte_ethdev_pci.h new file mode 100644 index 00000000..f652596f --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_pci.h @@ -0,0 +1,210 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Brocade Communications Systems, Inc. + * Author: Jan Blunck + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHDEV_PCI_H_ +#define _RTE_ETHDEV_PCI_H_ + +#include +#include +#include +#include +#include + +/** + * Copy pci device info to the Ethernet device data. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. + * @param pci_dev + * The *pci_dev* pointer is the address of the *rte_pci_device* structure. + */ +static inline void +rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, + struct rte_pci_device *pci_dev) +{ + if ((eth_dev == NULL) || (pci_dev == NULL)) { + RTE_ETHDEV_LOG(ERR, "NULL pointer eth_dev=%p pci_dev=%p", + (void *)eth_dev, (void *)pci_dev); + return; + } + + eth_dev->intr_handle = &pci_dev->intr_handle; + + eth_dev->data->dev_flags = 0; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_RMV) + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV; + + eth_dev->data->kdrv = pci_dev->kdrv; + eth_dev->data->numa_node = pci_dev->device.numa_node; +} + +static inline int +eth_dev_pci_specific_init(struct rte_eth_dev *eth_dev, void *bus_device) { + struct rte_pci_device *pci_dev = bus_device; + + if (!pci_dev) + return -ENODEV; + + rte_eth_copy_pci_info(eth_dev, pci_dev); + + return 0; +} + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param dev + * Pointer to the PCI device + * + * @param private_data_size + * Size of private data structure + * + * @return + * A pointer to a rte_eth_dev or NULL if allocation failed. + */ +static inline struct rte_eth_dev * +rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size) +{ + struct rte_eth_dev *eth_dev; + const char *name; + + if (!dev) + return NULL; + + name = dev->device.name; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev = rte_eth_dev_allocate(name); + if (!eth_dev) + return NULL; + + if (private_data_size) { + eth_dev->data->dev_private = rte_zmalloc_socket(name, + private_data_size, RTE_CACHE_LINE_SIZE, + dev->device.numa_node); + if (!eth_dev->data->dev_private) { + rte_eth_dev_release_port(eth_dev); + return NULL; + } + } + } else { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) + return NULL; + } + + eth_dev->device = &dev->device; + rte_eth_copy_pci_info(eth_dev, dev); + return eth_dev; +} + +static inline void +rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + + eth_dev->data->dev_private = NULL; + + /* + * Secondary process will check the name to attach. + * Clear this field to avoid attaching a released ports. + */ + eth_dev->data->name[0] = '\0'; + + eth_dev->device = NULL; + eth_dev->intr_handle = NULL; + + /* free ether device */ + rte_eth_dev_release_port(eth_dev); +} + +typedef int (*eth_dev_pci_callback_t)(struct rte_eth_dev *eth_dev); + +/** + * @internal + * Wrapper for use by pci drivers in a .probe function to attach to a ethdev + * interface. + */ +static inline int +rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev, + size_t private_data_size, eth_dev_pci_callback_t dev_init) +{ + struct rte_eth_dev *eth_dev; + int ret; + + eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size); + if (!eth_dev) + return -ENOMEM; + + RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL); + ret = dev_init(eth_dev); + if (ret) + rte_eth_dev_pci_release(eth_dev); + else + rte_eth_dev_probing_finish(eth_dev); + + return ret; +} + +/** + * @internal + * Wrapper for use by pci drivers in a .remove function to detach a ethdev + * interface. + */ +static inline int +rte_eth_dev_pci_generic_remove(struct rte_pci_device *pci_dev, + eth_dev_pci_callback_t dev_uninit) +{ + struct rte_eth_dev *eth_dev; + int ret; + + eth_dev = rte_eth_dev_allocated(pci_dev->device.name); + if (!eth_dev) + return -ENODEV; + + if (dev_uninit) { + ret = dev_uninit(eth_dev); + if (ret) + return ret; + } + + rte_eth_dev_pci_release(eth_dev); + return 0; +} + +#endif /* _RTE_ETHDEV_PCI_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_vdev.h b/lib/librte_ethdev/rte_ethdev_vdev.h new file mode 100644 index 00000000..259feda3 --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_vdev.h @@ -0,0 +1,84 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Brocade Communications Systems, Inc. + * Author: Jan Blunck + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHDEV_VDEV_H_ +#define _RTE_ETHDEV_VDEV_H_ + +#include +#include +#include +#include + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param dev + * Pointer to virtual device + * + * @param private_data_size + * Size of private data structure + * + * @return + * A pointer to a rte_eth_dev or NULL if allocation failed. + */ +static inline struct rte_eth_dev * +rte_eth_vdev_allocate(struct rte_vdev_device *dev, size_t private_data_size) +{ + struct rte_eth_dev *eth_dev; + const char *name = rte_vdev_device_name(dev); + + eth_dev = rte_eth_dev_allocate(name); + if (!eth_dev) + return NULL; + + if (private_data_size) { + eth_dev->data->dev_private = rte_zmalloc_socket(name, + private_data_size, RTE_CACHE_LINE_SIZE, + dev->device.numa_node); + if (!eth_dev->data->dev_private) { + rte_eth_dev_release_port(eth_dev); + return NULL; + } + } + + eth_dev->device = &dev->device; + eth_dev->intr_handle = NULL; + + eth_dev->data->kdrv = RTE_KDRV_NONE; + eth_dev->data->numa_node = dev->device.numa_node; + return eth_dev; +} + +#endif /* _RTE_ETHDEV_VDEV_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map new file mode 100644 index 00000000..38f117f0 --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_version.map @@ -0,0 +1,255 @@ +DPDK_2.2 { + global: + + rte_eth_add_rx_callback; + rte_eth_add_tx_callback; + rte_eth_allmulticast_disable; + rte_eth_allmulticast_enable; + rte_eth_allmulticast_get; + rte_eth_dev_allocate; + rte_eth_dev_allocated; + rte_eth_dev_attach; + rte_eth_dev_callback_register; + rte_eth_dev_callback_unregister; + rte_eth_dev_close; + rte_eth_dev_configure; + rte_eth_dev_count; + rte_eth_dev_default_mac_addr_set; + rte_eth_dev_detach; + rte_eth_dev_filter_supported; + rte_eth_dev_flow_ctrl_get; + rte_eth_dev_flow_ctrl_set; + rte_eth_dev_get_dcb_info; + rte_eth_dev_get_eeprom; + rte_eth_dev_get_eeprom_length; + rte_eth_dev_get_mtu; + rte_eth_dev_get_reg_info; + rte_eth_dev_get_vlan_offload; + rte_eth_devices; + rte_eth_dev_info_get; + rte_eth_dev_is_valid_port; + rte_eth_dev_mac_addr_add; + rte_eth_dev_mac_addr_remove; + rte_eth_dev_priority_flow_ctrl_set; + rte_eth_dev_release_port; + rte_eth_dev_rss_hash_conf_get; + rte_eth_dev_rss_hash_update; + rte_eth_dev_rss_reta_query; + rte_eth_dev_rss_reta_update; + rte_eth_dev_rx_intr_ctl; + rte_eth_dev_rx_intr_ctl_q; + rte_eth_dev_rx_intr_disable; + rte_eth_dev_rx_intr_enable; + rte_eth_dev_rx_queue_start; + rte_eth_dev_rx_queue_stop; + rte_eth_dev_set_eeprom; + rte_eth_dev_set_link_down; + rte_eth_dev_set_link_up; + rte_eth_dev_set_mc_addr_list; + rte_eth_dev_set_mtu; + rte_eth_dev_set_rx_queue_stats_mapping; + rte_eth_dev_set_tx_queue_stats_mapping; + rte_eth_dev_set_vlan_offload; + rte_eth_dev_set_vlan_pvid; + rte_eth_dev_set_vlan_strip_on_queue; + rte_eth_dev_socket_id; + rte_eth_dev_start; + rte_eth_dev_stop; + rte_eth_dev_tx_queue_start; + rte_eth_dev_tx_queue_stop; + rte_eth_dev_uc_all_hash_table_set; + rte_eth_dev_uc_hash_table_set; + rte_eth_dev_vlan_filter; + rte_eth_dma_zone_reserve; + rte_eth_led_off; + rte_eth_led_on; + rte_eth_link; + rte_eth_link_get; + rte_eth_link_get_nowait; + rte_eth_macaddr_get; + rte_eth_mirror_rule_reset; + rte_eth_mirror_rule_set; + rte_eth_promiscuous_disable; + rte_eth_promiscuous_enable; + rte_eth_promiscuous_get; + rte_eth_remove_rx_callback; + rte_eth_remove_tx_callback; + rte_eth_rx_queue_info_get; + rte_eth_rx_queue_setup; + rte_eth_set_queue_rate_limit; + rte_eth_stats; + rte_eth_stats_get; + rte_eth_stats_reset; + rte_eth_timesync_adjust_time; + rte_eth_timesync_disable; + rte_eth_timesync_enable; + rte_eth_timesync_read_rx_timestamp; + rte_eth_timesync_read_time; + rte_eth_timesync_read_tx_timestamp; + rte_eth_timesync_write_time; + rte_eth_tx_queue_info_get; + rte_eth_tx_queue_setup; + rte_eth_xstats_get; + rte_eth_xstats_reset; + + local: *; +}; + +DPDK_16.04 { + global: + + rte_eth_dev_get_supported_ptypes; + rte_eth_dev_l2_tunnel_eth_type_conf; + rte_eth_dev_l2_tunnel_offload_set; + rte_eth_dev_set_vlan_ether_type; + rte_eth_dev_udp_tunnel_port_add; + rte_eth_dev_udp_tunnel_port_delete; + rte_eth_speed_bitflag; + rte_eth_tx_buffer_count_callback; + rte_eth_tx_buffer_drop_callback; + rte_eth_tx_buffer_init; + rte_eth_tx_buffer_set_err_callback; + +} DPDK_2.2; + +DPDK_16.07 { + global: + + rte_eth_add_first_rx_callback; + rte_eth_dev_get_name_by_port; + rte_eth_dev_get_port_by_name; + rte_eth_xstats_get_names; + +} DPDK_16.04; + +DPDK_17.02 { + global: + + _rte_eth_dev_reset; + rte_eth_dev_fw_version_get; + +} DPDK_16.07; + +DPDK_17.05 { + global: + + rte_eth_dev_attach_secondary; + rte_eth_find_next; + rte_eth_tx_done_cleanup; + rte_eth_xstats_get_by_id; + rte_eth_xstats_get_id_by_name; + rte_eth_xstats_get_names_by_id; + +} DPDK_17.02; + +DPDK_17.08 { + global: + + _rte_eth_dev_callback_process; + rte_eth_dev_adjust_nb_rx_tx_desc; + rte_tm_capabilities_get; + rte_tm_get_number_of_leaf_nodes; + rte_tm_hierarchy_commit; + rte_tm_level_capabilities_get; + rte_tm_mark_ip_dscp; + rte_tm_mark_ip_ecn; + rte_tm_mark_vlan_dei; + rte_tm_node_add; + rte_tm_node_capabilities_get; + rte_tm_node_cman_update; + rte_tm_node_delete; + rte_tm_node_parent_update; + rte_tm_node_resume; + rte_tm_node_shaper_update; + rte_tm_node_shared_shaper_update; + rte_tm_node_shared_wred_context_update; + rte_tm_node_stats_read; + rte_tm_node_stats_update; + rte_tm_node_suspend; + rte_tm_node_type_get; + rte_tm_node_wfq_weight_mode_update; + rte_tm_node_wred_context_update; + rte_tm_shaper_profile_add; + rte_tm_shaper_profile_delete; + rte_tm_shared_shaper_add_update; + rte_tm_shared_shaper_delete; + rte_tm_shared_wred_context_add_update; + rte_tm_shared_wred_context_delete; + rte_tm_wred_profile_add; + rte_tm_wred_profile_delete; + +} DPDK_17.05; + +DPDK_17.11 { + global: + + rte_eth_dev_get_sec_ctx; + rte_eth_dev_pool_ops_supported; + rte_eth_dev_reset; + +} DPDK_17.08; + +DPDK_18.02 { + global: + + rte_eth_dev_filter_ctrl; + +} DPDK_17.11; + +DPDK_18.05 { + global: + + rte_eth_dev_count_avail; + rte_eth_dev_probing_finish; + rte_eth_find_next_owned_by; + rte_flow_copy; + rte_flow_create; + rte_flow_destroy; + rte_flow_error_set; + rte_flow_flush; + rte_flow_isolate; + rte_flow_query; + rte_flow_validate; + +} DPDK_18.02; + +DPDK_18.08 { + global: + + rte_eth_dev_logtype; + +} DPDK_18.05; + +EXPERIMENTAL { + global: + + rte_eth_devargs_parse; + rte_eth_dev_count_total; + rte_eth_dev_create; + rte_eth_dev_destroy; + rte_eth_dev_get_module_eeprom; + rte_eth_dev_get_module_info; + rte_eth_dev_is_removed; + rte_eth_dev_owner_delete; + rte_eth_dev_owner_get; + rte_eth_dev_owner_new; + rte_eth_dev_owner_set; + rte_eth_dev_owner_unset; + rte_eth_dev_rx_offload_name; + rte_eth_dev_tx_offload_name; + rte_eth_switch_domain_alloc; + rte_eth_switch_domain_free; + rte_flow_expand_rss; + rte_mtr_capabilities_get; + rte_mtr_create; + rte_mtr_destroy; + rte_mtr_meter_disable; + rte_mtr_meter_dscp_table_update; + rte_mtr_meter_enable; + rte_mtr_meter_profile_add; + rte_mtr_meter_profile_delete; + rte_mtr_meter_profile_update; + rte_mtr_policer_actions_update; + rte_mtr_stats_read; + rte_mtr_stats_update; +}; diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c new file mode 100644 index 00000000..cff4b520 --- /dev/null +++ b/lib/librte_ethdev/rte_flow.c @@ -0,0 +1,635 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox Technologies, Ltd + */ + +#include +#include +#include +#include + +#include +#include +#include +#include "rte_ethdev.h" +#include "rte_flow_driver.h" +#include "rte_flow.h" + +/** + * Flow elements description tables. + */ +struct rte_flow_desc_data { + const char *name; + size_t size; +}; + +/** Generate flow_item[] entry. */ +#define MK_FLOW_ITEM(t, s) \ + [RTE_FLOW_ITEM_TYPE_ ## t] = { \ + .name = # t, \ + .size = s, \ + } + +/** Information about known flow pattern items. */ +static const struct rte_flow_desc_data rte_flow_desc_item[] = { + MK_FLOW_ITEM(END, 0), + MK_FLOW_ITEM(VOID, 0), + MK_FLOW_ITEM(INVERT, 0), + MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)), + MK_FLOW_ITEM(PF, 0), + MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)), + MK_FLOW_ITEM(PHY_PORT, sizeof(struct rte_flow_item_phy_port)), + MK_FLOW_ITEM(PORT_ID, sizeof(struct rte_flow_item_port_id)), + MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), + MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)), + MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)), + MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)), + MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)), + MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)), + MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)), + MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)), + MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)), + MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)), + MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)), + MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)), + MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)), + MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)), + MK_FLOW_ITEM(GENEVE, sizeof(struct rte_flow_item_geneve)), + MK_FLOW_ITEM(VXLAN_GPE, sizeof(struct rte_flow_item_vxlan_gpe)), + MK_FLOW_ITEM(ARP_ETH_IPV4, sizeof(struct rte_flow_item_arp_eth_ipv4)), + MK_FLOW_ITEM(IPV6_EXT, sizeof(struct rte_flow_item_ipv6_ext)), + MK_FLOW_ITEM(ICMP6, sizeof(struct rte_flow_item_icmp6)), + MK_FLOW_ITEM(ICMP6_ND_NS, sizeof(struct rte_flow_item_icmp6_nd_ns)), + MK_FLOW_ITEM(ICMP6_ND_NA, sizeof(struct rte_flow_item_icmp6_nd_na)), + MK_FLOW_ITEM(ICMP6_ND_OPT, sizeof(struct rte_flow_item_icmp6_nd_opt)), + MK_FLOW_ITEM(ICMP6_ND_OPT_SLA_ETH, + sizeof(struct rte_flow_item_icmp6_nd_opt_sla_eth)), + MK_FLOW_ITEM(ICMP6_ND_OPT_TLA_ETH, + sizeof(struct rte_flow_item_icmp6_nd_opt_tla_eth)), +}; + +/** Generate flow_action[] entry. */ +#define MK_FLOW_ACTION(t, s) \ + [RTE_FLOW_ACTION_TYPE_ ## t] = { \ + .name = # t, \ + .size = s, \ + } + +/** Information about known flow actions. */ +static const struct rte_flow_desc_data rte_flow_desc_action[] = { + MK_FLOW_ACTION(END, 0), + MK_FLOW_ACTION(VOID, 0), + MK_FLOW_ACTION(PASSTHRU, 0), + MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)), + MK_FLOW_ACTION(FLAG, 0), + MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)), + MK_FLOW_ACTION(DROP, 0), + MK_FLOW_ACTION(COUNT, sizeof(struct rte_flow_action_count)), + MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), + MK_FLOW_ACTION(PF, 0), + MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)), + MK_FLOW_ACTION(PHY_PORT, sizeof(struct rte_flow_action_phy_port)), + MK_FLOW_ACTION(PORT_ID, sizeof(struct rte_flow_action_port_id)), + MK_FLOW_ACTION(OF_SET_MPLS_TTL, + sizeof(struct rte_flow_action_of_set_mpls_ttl)), + MK_FLOW_ACTION(OF_DEC_MPLS_TTL, 0), + MK_FLOW_ACTION(OF_SET_NW_TTL, + sizeof(struct rte_flow_action_of_set_nw_ttl)), + MK_FLOW_ACTION(OF_DEC_NW_TTL, 0), + MK_FLOW_ACTION(OF_COPY_TTL_OUT, 0), + MK_FLOW_ACTION(OF_COPY_TTL_IN, 0), + MK_FLOW_ACTION(OF_POP_VLAN, 0), + MK_FLOW_ACTION(OF_PUSH_VLAN, + sizeof(struct rte_flow_action_of_push_vlan)), + MK_FLOW_ACTION(OF_SET_VLAN_VID, + sizeof(struct rte_flow_action_of_set_vlan_vid)), + MK_FLOW_ACTION(OF_SET_VLAN_PCP, + sizeof(struct rte_flow_action_of_set_vlan_pcp)), + MK_FLOW_ACTION(OF_POP_MPLS, + sizeof(struct rte_flow_action_of_pop_mpls)), + MK_FLOW_ACTION(OF_PUSH_MPLS, + sizeof(struct rte_flow_action_of_push_mpls)), +}; + +static int +flow_err(uint16_t port_id, int ret, struct rte_flow_error *error) +{ + if (ret == 0) + return 0; + if (rte_eth_dev_is_removed(port_id)) + return rte_flow_error_set(error, EIO, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(EIO)); + return ret; +} + +/* Get generic flow operations structure from a port. */ +const struct rte_flow_ops * +rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops; + int code; + + if (unlikely(!rte_eth_dev_is_valid_port(port_id))) + code = ENODEV; + else if (unlikely(!dev->dev_ops->filter_ctrl || + dev->dev_ops->filter_ctrl(dev, + RTE_ETH_FILTER_GENERIC, + RTE_ETH_FILTER_GET, + &ops) || + !ops)) + code = ENOSYS; + else + return ops; + rte_flow_error_set(error, code, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(code)); + return NULL; +} + +/* Check whether a flow rule can be created on a given port. */ +int +rte_flow_validate(uint16_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->validate)) + return flow_err(port_id, ops->validate(dev, attr, pattern, + actions, error), error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Create a flow rule on a given port. */ +struct rte_flow * +rte_flow_create(uint16_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_flow *flow; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return NULL; + if (likely(!!ops->create)) { + flow = ops->create(dev, attr, pattern, actions, error); + if (flow == NULL) + flow_err(port_id, -rte_errno, error); + return flow; + } + rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); + return NULL; +} + +/* Destroy a flow rule on a given port. */ +int +rte_flow_destroy(uint16_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->destroy)) + return flow_err(port_id, ops->destroy(dev, flow, error), + error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Destroy all flow rules associated with a port. */ +int +rte_flow_flush(uint16_t port_id, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->flush)) + return flow_err(port_id, ops->flush(dev, error), error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Query an existing flow rule. */ +int +rte_flow_query(uint16_t port_id, + struct rte_flow *flow, + const struct rte_flow_action *action, + void *data, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (!ops) + return -rte_errno; + if (likely(!!ops->query)) + return flow_err(port_id, ops->query(dev, flow, action, data, + error), error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Restrict ingress traffic to the defined flow rules. */ +int +rte_flow_isolate(uint16_t port_id, + int set, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (!ops) + return -rte_errno; + if (likely(!!ops->isolate)) + return flow_err(port_id, ops->isolate(dev, set, error), error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Initialize flow error structure. */ +int +rte_flow_error_set(struct rte_flow_error *error, + int code, + enum rte_flow_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_flow_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return -code; +} + +/** Pattern item specification types. */ +enum item_spec_type { + ITEM_SPEC, + ITEM_LAST, + ITEM_MASK, +}; + +/** Compute storage space needed by item specification and copy it. */ +static size_t +flow_item_spec_copy(void *buf, const struct rte_flow_item *item, + enum item_spec_type type) +{ + size_t size = 0; + const void *data = + type == ITEM_SPEC ? item->spec : + type == ITEM_LAST ? item->last : + type == ITEM_MASK ? item->mask : + NULL; + + if (!item->spec || !data) + goto empty; + switch (item->type) { + union { + const struct rte_flow_item_raw *raw; + } spec; + union { + const struct rte_flow_item_raw *raw; + } last; + union { + const struct rte_flow_item_raw *raw; + } mask; + union { + const struct rte_flow_item_raw *raw; + } src; + union { + struct rte_flow_item_raw *raw; + } dst; + size_t off; + + case RTE_FLOW_ITEM_TYPE_RAW: + spec.raw = item->spec; + last.raw = item->last ? item->last : item->spec; + mask.raw = item->mask ? item->mask : &rte_flow_item_raw_mask; + src.raw = data; + dst.raw = buf; + off = RTE_ALIGN_CEIL(sizeof(struct rte_flow_item_raw), + sizeof(*src.raw->pattern)); + if (type == ITEM_SPEC || + (type == ITEM_MASK && + ((spec.raw->length & mask.raw->length) >= + (last.raw->length & mask.raw->length)))) + size = spec.raw->length & mask.raw->length; + else + size = last.raw->length & mask.raw->length; + size = off + size * sizeof(*src.raw->pattern); + if (dst.raw) { + memcpy(dst.raw, src.raw, sizeof(*src.raw)); + dst.raw->pattern = memcpy((uint8_t *)dst.raw + off, + src.raw->pattern, + size - off); + } + break; + default: + size = rte_flow_desc_item[item->type].size; + if (buf) + memcpy(buf, data, size); + break; + } +empty: + return RTE_ALIGN_CEIL(size, sizeof(double)); +} + +/** Compute storage space needed by action configuration and copy it. */ +static size_t +flow_action_conf_copy(void *buf, const struct rte_flow_action *action) +{ + size_t size = 0; + + if (!action->conf) + goto empty; + switch (action->type) { + union { + const struct rte_flow_action_rss *rss; + } src; + union { + struct rte_flow_action_rss *rss; + } dst; + size_t off; + + case RTE_FLOW_ACTION_TYPE_RSS: + src.rss = action->conf; + dst.rss = buf; + off = 0; + if (dst.rss) + *dst.rss = (struct rte_flow_action_rss){ + .func = src.rss->func, + .level = src.rss->level, + .types = src.rss->types, + .key_len = src.rss->key_len, + .queue_num = src.rss->queue_num, + }; + off += sizeof(*src.rss); + if (src.rss->key_len) { + off = RTE_ALIGN_CEIL(off, sizeof(double)); + size = sizeof(*src.rss->key) * src.rss->key_len; + if (dst.rss) + dst.rss->key = memcpy + ((void *)((uintptr_t)dst.rss + off), + src.rss->key, size); + off += size; + } + if (src.rss->queue_num) { + off = RTE_ALIGN_CEIL(off, sizeof(double)); + size = sizeof(*src.rss->queue) * src.rss->queue_num; + if (dst.rss) + dst.rss->queue = memcpy + ((void *)((uintptr_t)dst.rss + off), + src.rss->queue, size); + off += size; + } + size = off; + break; + default: + size = rte_flow_desc_action[action->type].size; + if (buf) + memcpy(buf, action->conf, size); + break; + } +empty: + return RTE_ALIGN_CEIL(size, sizeof(double)); +} + +/** Store a full rte_flow description. */ +size_t +rte_flow_copy(struct rte_flow_desc *desc, size_t len, + const struct rte_flow_attr *attr, + const struct rte_flow_item *items, + const struct rte_flow_action *actions) +{ + struct rte_flow_desc *fd = NULL; + size_t tmp; + size_t off1 = 0; + size_t off2 = 0; + size_t size = 0; + +store: + if (items) { + const struct rte_flow_item *item; + + item = items; + if (fd) + fd->items = (void *)&fd->data[off1]; + do { + struct rte_flow_item *dst = NULL; + + if ((size_t)item->type >= + RTE_DIM(rte_flow_desc_item) || + !rte_flow_desc_item[item->type].name) { + rte_errno = ENOTSUP; + return 0; + } + if (fd) + dst = memcpy(fd->data + off1, item, + sizeof(*item)); + off1 += sizeof(*item); + if (item->spec) { + if (fd) + dst->spec = fd->data + off2; + off2 += flow_item_spec_copy + (fd ? fd->data + off2 : NULL, item, + ITEM_SPEC); + } + if (item->last) { + if (fd) + dst->last = fd->data + off2; + off2 += flow_item_spec_copy + (fd ? fd->data + off2 : NULL, item, + ITEM_LAST); + } + if (item->mask) { + if (fd) + dst->mask = fd->data + off2; + off2 += flow_item_spec_copy + (fd ? fd->data + off2 : NULL, item, + ITEM_MASK); + } + off2 = RTE_ALIGN_CEIL(off2, sizeof(double)); + } while ((item++)->type != RTE_FLOW_ITEM_TYPE_END); + off1 = RTE_ALIGN_CEIL(off1, sizeof(double)); + } + if (actions) { + const struct rte_flow_action *action; + + action = actions; + if (fd) + fd->actions = (void *)&fd->data[off1]; + do { + struct rte_flow_action *dst = NULL; + + if ((size_t)action->type >= + RTE_DIM(rte_flow_desc_action) || + !rte_flow_desc_action[action->type].name) { + rte_errno = ENOTSUP; + return 0; + } + if (fd) + dst = memcpy(fd->data + off1, action, + sizeof(*action)); + off1 += sizeof(*action); + if (action->conf) { + if (fd) + dst->conf = fd->data + off2; + off2 += flow_action_conf_copy + (fd ? fd->data + off2 : NULL, action); + } + off2 = RTE_ALIGN_CEIL(off2, sizeof(double)); + } while ((action++)->type != RTE_FLOW_ACTION_TYPE_END); + } + if (fd != NULL) + return size; + off1 = RTE_ALIGN_CEIL(off1, sizeof(double)); + tmp = RTE_ALIGN_CEIL(offsetof(struct rte_flow_desc, data), + sizeof(double)); + size = tmp + off1 + off2; + if (size > len) + return size; + fd = desc; + if (fd != NULL) { + *fd = (const struct rte_flow_desc) { + .size = size, + .attr = *attr, + }; + tmp -= offsetof(struct rte_flow_desc, data); + off2 = tmp + off1; + off1 = tmp; + goto store; + } + return 0; +} + +/** + * Expand RSS flows into several possible flows according to the RSS hash + * fields requested and the driver capabilities. + */ +int __rte_experimental +rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size, + const struct rte_flow_item *pattern, uint64_t types, + const struct rte_flow_expand_node graph[], + int graph_root_index) +{ + const int elt_n = 8; + const struct rte_flow_item *item; + const struct rte_flow_expand_node *node = &graph[graph_root_index]; + const int *next_node; + const int *stack[elt_n]; + int stack_pos = 0; + struct rte_flow_item flow_items[elt_n]; + unsigned int i; + size_t lsize; + size_t user_pattern_size = 0; + void *addr = NULL; + + lsize = offsetof(struct rte_flow_expand_rss, entry) + + elt_n * sizeof(buf->entry[0]); + if (lsize <= size) { + buf->entry[0].priority = 0; + buf->entry[0].pattern = (void *)&buf->entry[elt_n]; + buf->entries = 0; + addr = buf->entry[0].pattern; + } + for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + const struct rte_flow_expand_node *next = NULL; + + for (i = 0; node->next && node->next[i]; ++i) { + next = &graph[node->next[i]]; + if (next->type == item->type) + break; + } + if (next) + node = next; + user_pattern_size += sizeof(*item); + } + user_pattern_size += sizeof(*item); /* Handle END item. */ + lsize += user_pattern_size; + /* Copy the user pattern in the first entry of the buffer. */ + if (lsize <= size) { + rte_memcpy(addr, pattern, user_pattern_size); + addr = (void *)(((uintptr_t)addr) + user_pattern_size); + buf->entries = 1; + } + /* Start expanding. */ + memset(flow_items, 0, sizeof(flow_items)); + user_pattern_size -= sizeof(*item); + next_node = node->next; + stack[stack_pos] = next_node; + node = next_node ? &graph[*next_node] : NULL; + while (node) { + flow_items[stack_pos].type = node->type; + if (node->rss_types & types) { + /* + * compute the number of items to copy from the + * expansion and copy it. + * When the stack_pos is 0, there are 1 element in it, + * plus the addition END item. + */ + int elt = stack_pos + 2; + + flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END; + lsize += elt * sizeof(*item) + user_pattern_size; + if (lsize <= size) { + size_t n = elt * sizeof(*item); + + buf->entry[buf->entries].priority = + stack_pos + 1; + buf->entry[buf->entries].pattern = addr; + buf->entries++; + rte_memcpy(addr, buf->entry[0].pattern, + user_pattern_size); + addr = (void *)(((uintptr_t)addr) + + user_pattern_size); + rte_memcpy(addr, flow_items, n); + addr = (void *)(((uintptr_t)addr) + n); + } + } + /* Go deeper. */ + if (node->next) { + next_node = node->next; + if (stack_pos++ == elt_n) { + rte_errno = E2BIG; + return -rte_errno; + } + stack[stack_pos] = next_node; + } else if (*(next_node + 1)) { + /* Follow up with the next possibility. */ + ++next_node; + } else { + /* Move to the next path. */ + if (stack_pos) + next_node = stack[--stack_pos]; + next_node++; + stack[stack_pos] = next_node; + } + node = *next_node ? &graph[*next_node] : NULL; + }; + return lsize; +} diff --git a/lib/librte_ethdev/rte_flow.h b/lib/librte_ethdev/rte_flow.h new file mode 100644 index 00000000..f8ba71cd --- /dev/null +++ b/lib/librte_ethdev/rte_flow.h @@ -0,0 +1,2208 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox Technologies, Ltd + */ + +#ifndef RTE_FLOW_H_ +#define RTE_FLOW_H_ + +/** + * @file + * RTE generic flow API + * + * This interface provides the ability to program packet matching and + * associated actions in hardware through flow rules. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Flow rule attributes. + * + * Priorities are set on a per rule based within groups. + * + * Lower values denote higher priority, the highest priority for a flow rule + * is 0, so that a flow that matches for than one rule, the rule with the + * lowest priority value will always be matched. + * + * Although optional, applications are encouraged to group similar rules as + * much as possible to fully take advantage of hardware capabilities + * (e.g. optimized matching) and work around limitations (e.g. a single + * pattern type possibly allowed in a given group). Applications should be + * aware that groups are not linked by default, and that they must be + * explicitly linked by the application using the JUMP action. + * + * Priority levels are arbitrary and up to the application, they + * do not need to be contiguous nor start from 0, however the maximum number + * varies between devices and may be affected by existing flow rules. + * + * If a packet is matched by several rules of a given group for a given + * priority level, the outcome is undefined. It can take any path, may be + * duplicated or even cause unrecoverable errors. + * + * Note that support for more than a single group and priority level is not + * guaranteed. + * + * Flow rules can apply to inbound and/or outbound traffic (ingress/egress). + * + * Several pattern items and actions are valid and can be used in both + * directions. Those valid for only one direction are described as such. + * + * At least one direction must be specified. + * + * Specifying both directions at once for a given rule is not recommended + * but may be valid in a few cases (e.g. shared counter). + */ +struct rte_flow_attr { + uint32_t group; /**< Priority group. */ + uint32_t priority; /**< Rule priority level within group. */ + uint32_t ingress:1; /**< Rule applies to ingress traffic. */ + uint32_t egress:1; /**< Rule applies to egress traffic. */ + /** + * Instead of simply matching the properties of traffic as it would + * appear on a given DPDK port ID, enabling this attribute transfers + * a flow rule to the lowest possible level of any device endpoints + * found in the pattern. + * + * When supported, this effectively enables an application to + * re-route traffic not necessarily intended for it (e.g. coming + * from or addressed to different physical ports, VFs or + * applications) at the device level. + * + * It complements the behavior of some pattern items such as + * RTE_FLOW_ITEM_TYPE_PHY_PORT and is meaningless without them. + * + * When transferring flow rules, ingress and egress attributes keep + * their original meaning, as if processing traffic emitted or + * received by the application. + */ + uint32_t transfer:1; + uint32_t reserved:29; /**< Reserved, must be zero. */ +}; + +/** + * Matching pattern item types. + * + * Pattern items fall in two categories: + * + * - Matching protocol headers and packet data, usually associated with a + * specification structure. These must be stacked in the same order as the + * protocol layers to match inside packets, starting from the lowest. + * + * - Matching meta-data or affecting pattern processing, often without a + * specification structure. Since they do not match packet contents, their + * position in the list is usually not relevant. + * + * See the description of individual types for more information. Those + * marked with [META] fall into the second category. + */ +enum rte_flow_item_type { + /** + * [META] + * + * End marker for item lists. Prevents further processing of items, + * thereby ending the pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_END, + + /** + * [META] + * + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_VOID, + + /** + * [META] + * + * Inverted matching, i.e. process packets that do not match the + * pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_INVERT, + + /** + * Matches any protocol in place of the current layer, a single ANY + * may also stand for several protocol layers. + * + * See struct rte_flow_item_any. + */ + RTE_FLOW_ITEM_TYPE_ANY, + + /** + * [META] + * + * Matches traffic originating from (ingress) or going to (egress) + * the physical function of the current device. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_PF, + + /** + * [META] + * + * Matches traffic originating from (ingress) or going to (egress) a + * given virtual function of the current device. + * + * See struct rte_flow_item_vf. + */ + RTE_FLOW_ITEM_TYPE_VF, + + /** + * [META] + * + * Matches traffic originating from (ingress) or going to (egress) a + * physical port of the underlying device. + * + * See struct rte_flow_item_phy_port. + */ + RTE_FLOW_ITEM_TYPE_PHY_PORT, + + /** + * [META] + * + * Matches traffic originating from (ingress) or going to (egress) a + * given DPDK port ID. + * + * See struct rte_flow_item_port_id. + */ + RTE_FLOW_ITEM_TYPE_PORT_ID, + + /** + * Matches a byte string of a given length at a given offset. + * + * See struct rte_flow_item_raw. + */ + RTE_FLOW_ITEM_TYPE_RAW, + + /** + * Matches an Ethernet header. + * + * See struct rte_flow_item_eth. + */ + RTE_FLOW_ITEM_TYPE_ETH, + + /** + * Matches an 802.1Q/ad VLAN tag. + * + * See struct rte_flow_item_vlan. + */ + RTE_FLOW_ITEM_TYPE_VLAN, + + /** + * Matches an IPv4 header. + * + * See struct rte_flow_item_ipv4. + */ + RTE_FLOW_ITEM_TYPE_IPV4, + + /** + * Matches an IPv6 header. + * + * See struct rte_flow_item_ipv6. + */ + RTE_FLOW_ITEM_TYPE_IPV6, + + /** + * Matches an ICMP header. + * + * See struct rte_flow_item_icmp. + */ + RTE_FLOW_ITEM_TYPE_ICMP, + + /** + * Matches a UDP header. + * + * See struct rte_flow_item_udp. + */ + RTE_FLOW_ITEM_TYPE_UDP, + + /** + * Matches a TCP header. + * + * See struct rte_flow_item_tcp. + */ + RTE_FLOW_ITEM_TYPE_TCP, + + /** + * Matches a SCTP header. + * + * See struct rte_flow_item_sctp. + */ + RTE_FLOW_ITEM_TYPE_SCTP, + + /** + * Matches a VXLAN header. + * + * See struct rte_flow_item_vxlan. + */ + RTE_FLOW_ITEM_TYPE_VXLAN, + + /** + * Matches a E_TAG header. + * + * See struct rte_flow_item_e_tag. + */ + RTE_FLOW_ITEM_TYPE_E_TAG, + + /** + * Matches a NVGRE header. + * + * See struct rte_flow_item_nvgre. + */ + RTE_FLOW_ITEM_TYPE_NVGRE, + + /** + * Matches a MPLS header. + * + * See struct rte_flow_item_mpls. + */ + RTE_FLOW_ITEM_TYPE_MPLS, + + /** + * Matches a GRE header. + * + * See struct rte_flow_item_gre. + */ + RTE_FLOW_ITEM_TYPE_GRE, + + /** + * [META] + * + * Fuzzy pattern match, expect faster than default. + * + * This is for device that support fuzzy matching option. + * Usually a fuzzy matching is fast but the cost is accuracy. + * + * See struct rte_flow_item_fuzzy. + */ + RTE_FLOW_ITEM_TYPE_FUZZY, + + /** + * Matches a GTP header. + * + * Configure flow for GTP packets. + * + * See struct rte_flow_item_gtp. + */ + RTE_FLOW_ITEM_TYPE_GTP, + + /** + * Matches a GTP header. + * + * Configure flow for GTP-C packets. + * + * See struct rte_flow_item_gtp. + */ + RTE_FLOW_ITEM_TYPE_GTPC, + + /** + * Matches a GTP header. + * + * Configure flow for GTP-U packets. + * + * See struct rte_flow_item_gtp. + */ + RTE_FLOW_ITEM_TYPE_GTPU, + + /** + * Matches a ESP header. + * + * See struct rte_flow_item_esp. + */ + RTE_FLOW_ITEM_TYPE_ESP, + + /** + * Matches a GENEVE header. + * + * See struct rte_flow_item_geneve. + */ + RTE_FLOW_ITEM_TYPE_GENEVE, + + /** + * Matches a VXLAN-GPE header. + * + * See struct rte_flow_item_vxlan_gpe. + */ + RTE_FLOW_ITEM_TYPE_VXLAN_GPE, + + /** + * Matches an ARP header for Ethernet/IPv4. + * + * See struct rte_flow_item_arp_eth_ipv4. + */ + RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4, + + /** + * Matches the presence of any IPv6 extension header. + * + * See struct rte_flow_item_ipv6_ext. + */ + RTE_FLOW_ITEM_TYPE_IPV6_EXT, + + /** + * Matches any ICMPv6 header. + * + * See struct rte_flow_item_icmp6. + */ + RTE_FLOW_ITEM_TYPE_ICMP6, + + /** + * Matches an ICMPv6 neighbor discovery solicitation. + * + * See struct rte_flow_item_icmp6_nd_ns. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS, + + /** + * Matches an ICMPv6 neighbor discovery advertisement. + * + * See struct rte_flow_item_icmp6_nd_na. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA, + + /** + * Matches the presence of any ICMPv6 neighbor discovery option. + * + * See struct rte_flow_item_icmp6_nd_opt. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT, + + /** + * Matches an ICMPv6 neighbor discovery source Ethernet link-layer + * address option. + * + * See struct rte_flow_item_icmp6_nd_opt_sla_eth. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH, + + /** + * Matches an ICMPv6 neighbor discovery target Ethernet link-layer + * address option. + * + * See struct rte_flow_item_icmp6_nd_opt_tla_eth. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH, + + /** + * Matches specified mark field. + * + * See struct rte_flow_item_mark. + */ + RTE_FLOW_ITEM_TYPE_MARK, +}; + +/** + * RTE_FLOW_ITEM_TYPE_ANY + * + * Matches any protocol in place of the current layer, a single ANY may also + * stand for several protocol layers. + * + * This is usually specified as the first pattern item when looking for a + * protocol anywhere in a packet. + * + * A zeroed mask stands for any number of layers. + */ +struct rte_flow_item_any { + uint32_t num; /**< Number of layers covered. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ANY. */ +#ifndef __cplusplus +static const struct rte_flow_item_any rte_flow_item_any_mask = { + .num = 0x00000000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VF + * + * Matches traffic originating from (ingress) or going to (egress) a given + * virtual function of the current device. + * + * If supported, should work even if the virtual function is not managed by + * the application and thus not associated with a DPDK port ID. + * + * Note this pattern item does not match VF representors traffic which, as + * separate entities, should be addressed through their own DPDK port IDs. + * + * - Can be specified multiple times to match traffic addressed to several + * VF IDs. + * - Can be combined with a PF item to match both PF and VF traffic. + * + * A zeroed mask can be used to match any VF ID. + */ +struct rte_flow_item_vf { + uint32_t id; /**< VF ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VF. */ +#ifndef __cplusplus +static const struct rte_flow_item_vf rte_flow_item_vf_mask = { + .id = 0x00000000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_PHY_PORT + * + * Matches traffic originating from (ingress) or going to (egress) a + * physical port of the underlying device. + * + * The first PHY_PORT item overrides the physical port normally associated + * with the specified DPDK input port (port_id). This item can be provided + * several times to match additional physical ports. + * + * Note that physical ports are not necessarily tied to DPDK input ports + * (port_id) when those are not under DPDK control. Possible values are + * specific to each device, they are not necessarily indexed from zero and + * may not be contiguous. + * + * As a device property, the list of allowed values as well as the value + * associated with a port_id should be retrieved by other means. + * + * A zeroed mask can be used to match any port index. + */ +struct rte_flow_item_phy_port { + uint32_t index; /**< Physical port index. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_PHY_PORT. */ +#ifndef __cplusplus +static const struct rte_flow_item_phy_port rte_flow_item_phy_port_mask = { + .index = 0x00000000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_PORT_ID + * + * Matches traffic originating from (ingress) or going to (egress) a given + * DPDK port ID. + * + * Normally only supported if the port ID in question is known by the + * underlying PMD and related to the device the flow rule is created + * against. + * + * This must not be confused with @p PHY_PORT which refers to the physical + * port of a device, whereas @p PORT_ID refers to a struct rte_eth_dev + * object on the application side (also known as "port representor" + * depending on the kind of underlying device). + */ +struct rte_flow_item_port_id { + uint32_t id; /**< DPDK port ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_PORT_ID. */ +#ifndef __cplusplus +static const struct rte_flow_item_port_id rte_flow_item_port_id_mask = { + .id = 0xffffffff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_RAW + * + * Matches a byte string of a given length at a given offset. + * + * Offset is either absolute (using the start of the packet) or relative to + * the end of the previous matched item in the stack, in which case negative + * values are allowed. + * + * If search is enabled, offset is used as the starting point. The search + * area can be delimited by setting limit to a nonzero value, which is the + * maximum number of bytes after offset where the pattern may start. + * + * Matching a zero-length pattern is allowed, doing so resets the relative + * offset for subsequent items. + * + * This type does not support ranges (struct rte_flow_item.last). + */ +struct rte_flow_item_raw { + uint32_t relative:1; /**< Look for pattern after the previous item. */ + uint32_t search:1; /**< Search pattern from offset (see also limit). */ + uint32_t reserved:30; /**< Reserved, must be set to zero. */ + int32_t offset; /**< Absolute or relative offset for pattern. */ + uint16_t limit; /**< Search area limit for start of pattern. */ + uint16_t length; /**< Pattern length. */ + const uint8_t *pattern; /**< Byte string to look for. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_RAW. */ +#ifndef __cplusplus +static const struct rte_flow_item_raw rte_flow_item_raw_mask = { + .relative = 1, + .search = 1, + .reserved = 0x3fffffff, + .offset = 0xffffffff, + .limit = 0xffff, + .length = 0xffff, + .pattern = NULL, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ETH + * + * Matches an Ethernet header. + * + * The @p type field either stands for "EtherType" or "TPID" when followed + * by so-called layer 2.5 pattern items such as RTE_FLOW_ITEM_TYPE_VLAN. In + * the latter case, @p type refers to that of the outer header, with the + * inner EtherType/TPID provided by the subsequent pattern item. This is the + * same order as on the wire. + */ +struct rte_flow_item_eth { + struct ether_addr dst; /**< Destination MAC. */ + struct ether_addr src; /**< Source MAC. */ + rte_be16_t type; /**< EtherType or TPID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */ +#ifndef __cplusplus +static const struct rte_flow_item_eth rte_flow_item_eth_mask = { + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .type = RTE_BE16(0x0000), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VLAN + * + * Matches an 802.1Q/ad VLAN tag. + * + * The corresponding standard outer EtherType (TPID) values are + * ETHER_TYPE_VLAN or ETHER_TYPE_QINQ. It can be overridden by the preceding + * pattern item. + */ +struct rte_flow_item_vlan { + rte_be16_t tci; /**< Tag control information. */ + rte_be16_t inner_type; /**< Inner EtherType or TPID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */ +#ifndef __cplusplus +static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = { + .tci = RTE_BE16(0x0fff), + .inner_type = RTE_BE16(0x0000), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_IPV4 + * + * Matches an IPv4 header. + * + * Note: IPv4 options are handled by dedicated pattern items. + */ +struct rte_flow_item_ipv4 { + struct ipv4_hdr hdr; /**< IPv4 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV4. */ +#ifndef __cplusplus +static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = { + .hdr = { + .src_addr = RTE_BE32(0xffffffff), + .dst_addr = RTE_BE32(0xffffffff), + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_IPV6. + * + * Matches an IPv6 header. + * + * Note: IPv6 options are handled by dedicated pattern items, see + * RTE_FLOW_ITEM_TYPE_IPV6_EXT. + */ +struct rte_flow_item_ipv6 { + struct ipv6_hdr hdr; /**< IPv6 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6. */ +#ifndef __cplusplus +static const struct rte_flow_item_ipv6 rte_flow_item_ipv6_mask = { + .hdr = { + .src_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + .dst_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP. + * + * Matches an ICMP header. + */ +struct rte_flow_item_icmp { + struct icmp_hdr hdr; /**< ICMP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp rte_flow_item_icmp_mask = { + .hdr = { + .icmp_type = 0xff, + .icmp_code = 0xff, + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_UDP. + * + * Matches a UDP header. + */ +struct rte_flow_item_udp { + struct udp_hdr hdr; /**< UDP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_UDP. */ +#ifndef __cplusplus +static const struct rte_flow_item_udp rte_flow_item_udp_mask = { + .hdr = { + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_TCP. + * + * Matches a TCP header. + */ +struct rte_flow_item_tcp { + struct tcp_hdr hdr; /**< TCP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_TCP. */ +#ifndef __cplusplus +static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = { + .hdr = { + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_SCTP. + * + * Matches a SCTP header. + */ +struct rte_flow_item_sctp { + struct sctp_hdr hdr; /**< SCTP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_SCTP. */ +#ifndef __cplusplus +static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = { + .hdr = { + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VXLAN. + * + * Matches a VXLAN header (RFC 7348). + */ +struct rte_flow_item_vxlan { + uint8_t flags; /**< Normally 0x08 (I flag). */ + uint8_t rsvd0[3]; /**< Reserved, normally 0x000000. */ + uint8_t vni[3]; /**< VXLAN identifier. */ + uint8_t rsvd1; /**< Reserved, normally 0x00. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN. */ +#ifndef __cplusplus +static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = { + .vni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_E_TAG. + * + * Matches a E-tag header. + * + * The corresponding standard outer EtherType (TPID) value is + * ETHER_TYPE_ETAG. It can be overridden by the preceding pattern item. + */ +struct rte_flow_item_e_tag { + /** + * E-Tag control information (E-TCI). + * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b). + */ + rte_be16_t epcp_edei_in_ecid_b; + /** Reserved (2b), GRP (2b), E-CID base (12b). */ + rte_be16_t rsvd_grp_ecid_b; + uint8_t in_ecid_e; /**< Ingress E-CID ext. */ + uint8_t ecid_e; /**< E-CID ext. */ + rte_be16_t inner_type; /**< Inner EtherType or TPID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_E_TAG. */ +#ifndef __cplusplus +static const struct rte_flow_item_e_tag rte_flow_item_e_tag_mask = { + .rsvd_grp_ecid_b = RTE_BE16(0x3fff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_NVGRE. + * + * Matches a NVGRE header. + */ +struct rte_flow_item_nvgre { + /** + * Checksum (1b), undefined (1b), key bit (1b), sequence number (1b), + * reserved 0 (9b), version (3b). + * + * c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637. + */ + rte_be16_t c_k_s_rsvd0_ver; + rte_be16_t protocol; /**< Protocol type (0x6558). */ + uint8_t tni[3]; /**< Virtual subnet ID. */ + uint8_t flow_id; /**< Flow ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_NVGRE. */ +#ifndef __cplusplus +static const struct rte_flow_item_nvgre rte_flow_item_nvgre_mask = { + .tni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_MPLS. + * + * Matches a MPLS header. + */ +struct rte_flow_item_mpls { + /** + * Label (20b), TC (3b), Bottom of Stack (1b). + */ + uint8_t label_tc_s[3]; + uint8_t ttl; /** Time-to-Live. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_MPLS. */ +#ifndef __cplusplus +static const struct rte_flow_item_mpls rte_flow_item_mpls_mask = { + .label_tc_s = "\xff\xff\xf0", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_GRE. + * + * Matches a GRE header. + */ +struct rte_flow_item_gre { + /** + * Checksum (1b), reserved 0 (12b), version (3b). + * Refer to RFC 2784. + */ + rte_be16_t c_rsvd0_ver; + rte_be16_t protocol; /**< Protocol type. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_GRE. */ +#ifndef __cplusplus +static const struct rte_flow_item_gre rte_flow_item_gre_mask = { + .protocol = RTE_BE16(0xffff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_FUZZY + * + * Fuzzy pattern match, expect faster than default. + * + * This is for device that support fuzzy match option. + * Usually a fuzzy match is fast but the cost is accuracy. + * i.e. Signature Match only match pattern's hash value, but it is + * possible two different patterns have the same hash value. + * + * Matching accuracy level can be configure by threshold. + * Driver can divide the range of threshold and map to different + * accuracy levels that device support. + * + * Threshold 0 means perfect match (no fuzziness), while threshold + * 0xffffffff means fuzziest match. + */ +struct rte_flow_item_fuzzy { + uint32_t thresh; /**< Accuracy threshold. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_FUZZY. */ +#ifndef __cplusplus +static const struct rte_flow_item_fuzzy rte_flow_item_fuzzy_mask = { + .thresh = 0xffffffff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_GTP. + * + * Matches a GTPv1 header. + */ +struct rte_flow_item_gtp { + /** + * Version (3b), protocol type (1b), reserved (1b), + * Extension header flag (1b), + * Sequence number flag (1b), + * N-PDU number flag (1b). + */ + uint8_t v_pt_rsv_flags; + uint8_t msg_type; /**< Message type. */ + rte_be16_t msg_len; /**< Message length. */ + rte_be32_t teid; /**< Tunnel endpoint identifier. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_GTP. */ +#ifndef __cplusplus +static const struct rte_flow_item_gtp rte_flow_item_gtp_mask = { + .teid = RTE_BE32(0xffffffff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ESP + * + * Matches an ESP header. + */ +struct rte_flow_item_esp { + struct esp_hdr hdr; /**< ESP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ESP. */ +#ifndef __cplusplus +static const struct rte_flow_item_esp rte_flow_item_esp_mask = { + .hdr = { + .spi = 0xffffffff, + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_GENEVE. + * + * Matches a GENEVE header. + */ +struct rte_flow_item_geneve { + /** + * Version (2b), length of the options fields (6b), OAM packet (1b), + * critical options present (1b), reserved 0 (6b). + */ + rte_be16_t ver_opt_len_o_c_rsvd0; + rte_be16_t protocol; /**< Protocol type. */ + uint8_t vni[3]; /**< Virtual Network Identifier. */ + uint8_t rsvd1; /**< Reserved, normally 0x00. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_GENEVE. */ +#ifndef __cplusplus +static const struct rte_flow_item_geneve rte_flow_item_geneve_mask = { + .vni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VXLAN_GPE (draft-ietf-nvo3-vxlan-gpe-05). + * + * Matches a VXLAN-GPE header. + */ +struct rte_flow_item_vxlan_gpe { + uint8_t flags; /**< Normally 0x0c (I and P flags). */ + uint8_t rsvd0[2]; /**< Reserved, normally 0x0000. */ + uint8_t protocol; /**< Protocol type. */ + uint8_t vni[3]; /**< VXLAN identifier. */ + uint8_t rsvd1; /**< Reserved, normally 0x00. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN_GPE. */ +#ifndef __cplusplus +static const struct rte_flow_item_vxlan_gpe rte_flow_item_vxlan_gpe_mask = { + .vni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4 + * + * Matches an ARP header for Ethernet/IPv4. + */ +struct rte_flow_item_arp_eth_ipv4 { + rte_be16_t hrd; /**< Hardware type, normally 1. */ + rte_be16_t pro; /**< Protocol type, normally 0x0800. */ + uint8_t hln; /**< Hardware address length, normally 6. */ + uint8_t pln; /**< Protocol address length, normally 4. */ + rte_be16_t op; /**< Opcode (1 for request, 2 for reply). */ + struct ether_addr sha; /**< Sender hardware address. */ + rte_be32_t spa; /**< Sender IPv4 address. */ + struct ether_addr tha; /**< Target hardware address. */ + rte_be32_t tpa; /**< Target IPv4 address. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4. */ +#ifndef __cplusplus +static const struct rte_flow_item_arp_eth_ipv4 +rte_flow_item_arp_eth_ipv4_mask = { + .sha.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .spa = RTE_BE32(0xffffffff), + .tha.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .tpa = RTE_BE32(0xffffffff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_IPV6_EXT + * + * Matches the presence of any IPv6 extension header. + * + * Normally preceded by any of: + * + * - RTE_FLOW_ITEM_TYPE_IPV6 + * - RTE_FLOW_ITEM_TYPE_IPV6_EXT + */ +struct rte_flow_item_ipv6_ext { + uint8_t next_hdr; /**< Next header. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6_EXT. */ +#ifndef __cplusplus +static const +struct rte_flow_item_ipv6_ext rte_flow_item_ipv6_ext_mask = { + .next_hdr = 0xff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6 + * + * Matches any ICMPv6 header. + */ +struct rte_flow_item_icmp6 { + uint8_t type; /**< ICMPv6 type. */ + uint8_t code; /**< ICMPv6 code. */ + uint16_t checksum; /**< ICMPv6 checksum. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp6 rte_flow_item_icmp6_mask = { + .type = 0xff, + .code = 0xff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS + * + * Matches an ICMPv6 neighbor discovery solicitation. + */ +struct rte_flow_item_icmp6_nd_ns { + uint8_t type; /**< ICMPv6 type, normally 135. */ + uint8_t code; /**< ICMPv6 code, normally 0. */ + rte_be16_t checksum; /**< ICMPv6 checksum. */ + rte_be32_t reserved; /**< Reserved, normally 0. */ + uint8_t target_addr[16]; /**< Target address. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS. */ +#ifndef __cplusplus +static const +struct rte_flow_item_icmp6_nd_ns rte_flow_item_icmp6_nd_ns_mask = { + .target_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA + * + * Matches an ICMPv6 neighbor discovery advertisement. + */ +struct rte_flow_item_icmp6_nd_na { + uint8_t type; /**< ICMPv6 type, normally 136. */ + uint8_t code; /**< ICMPv6 code, normally 0. */ + rte_be16_t checksum; /**< ICMPv6 checksum. */ + /** + * Route flag (1b), solicited flag (1b), override flag (1b), + * reserved (29b). + */ + rte_be32_t rso_reserved; + uint8_t target_addr[16]; /**< Target address. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA. */ +#ifndef __cplusplus +static const +struct rte_flow_item_icmp6_nd_na rte_flow_item_icmp6_nd_na_mask = { + .target_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT + * + * Matches the presence of any ICMPv6 neighbor discovery option. + * + * Normally preceded by any of: + * + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT + */ +struct rte_flow_item_icmp6_nd_opt { + uint8_t type; /**< ND option type. */ + uint8_t length; /**< ND option length. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp6_nd_opt +rte_flow_item_icmp6_nd_opt_mask = { + .type = 0xff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH + * + * Matches an ICMPv6 neighbor discovery source Ethernet link-layer address + * option. + * + * Normally preceded by any of: + * + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT + */ +struct rte_flow_item_icmp6_nd_opt_sla_eth { + uint8_t type; /**< ND option type, normally 1. */ + uint8_t length; /**< ND option length, normally 1. */ + struct ether_addr sla; /**< Source Ethernet LLA. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp6_nd_opt_sla_eth +rte_flow_item_icmp6_nd_opt_sla_eth_mask = { + .sla.addr_bytes = "\xff\xff\xff\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH + * + * Matches an ICMPv6 neighbor discovery target Ethernet link-layer address + * option. + * + * Normally preceded by any of: + * + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT + */ +struct rte_flow_item_icmp6_nd_opt_tla_eth { + uint8_t type; /**< ND option type, normally 2. */ + uint8_t length; /**< ND option length, normally 1. */ + struct ether_addr tla; /**< Target Ethernet LLA. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp6_nd_opt_tla_eth +rte_flow_item_icmp6_nd_opt_tla_eth_mask = { + .tla.addr_bytes = "\xff\xff\xff\xff\xff\xff", +}; +#endif + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ITEM_TYPE_MARK + * + * Matches an arbitrary integer value which was set using the ``MARK`` action + * in a previously matched rule. + * + * This item can only be specified once as a match criteria as the ``MARK`` + * action can only be specified once in a flow action. + * + * This value is arbitrary and application-defined. Maximum allowed value + * depends on the underlying implementation. + * + * Depending on the underlying implementation the MARK item may be supported on + * the physical device, with virtual groups in the PMD or not at all. + */ +struct rte_flow_item_mark { + uint32_t id; /**< Integer value to match against. */ +}; + +/** + * Matching pattern item definition. + * + * A pattern is formed by stacking items starting from the lowest protocol + * layer to match. This stacking restriction does not apply to meta items + * which can be placed anywhere in the stack without affecting the meaning + * of the resulting pattern. + * + * Patterns are terminated by END items. + * + * The spec field should be a valid pointer to a structure of the related + * item type. It may remain unspecified (NULL) in many cases to request + * broad (nonspecific) matching. In such cases, last and mask must also be + * set to NULL. + * + * Optionally, last can point to a structure of the same type to define an + * inclusive range. This is mostly supported by integer and address fields, + * may cause errors otherwise. Fields that do not support ranges must be set + * to 0 or to the same value as the corresponding fields in spec. + * + * Only the fields defined to nonzero values in the default masks (see + * rte_flow_item_{name}_mask constants) are considered relevant by + * default. This can be overridden by providing a mask structure of the + * same type with applicable bits set to one. It can also be used to + * partially filter out specific fields (e.g. as an alternate mean to match + * ranges of IP addresses). + * + * Mask is a simple bit-mask applied before interpreting the contents of + * spec and last, which may yield unexpected results if not used + * carefully. For example, if for an IPv4 address field, spec provides + * 10.1.2.3, last provides 10.3.4.5 and mask provides 255.255.0.0, the + * effective range becomes 10.1.0.0 to 10.3.255.255. + */ +struct rte_flow_item { + enum rte_flow_item_type type; /**< Item type. */ + const void *spec; /**< Pointer to item specification structure. */ + const void *last; /**< Defines an inclusive range (spec to last). */ + const void *mask; /**< Bit-mask applied to spec and last. */ +}; + +/** + * Action types. + * + * Each possible action is represented by a type. Some have associated + * configuration structures. Several actions combined in a list can be + * assigned to a flow rule and are performed in order. + * + * They fall in three categories: + * + * - Actions that modify the fate of matching traffic, for instance by + * dropping or assigning it a specific destination. + * + * - Actions that modify matching traffic contents or its properties. This + * includes adding/removing encapsulation, encryption, compression and + * marks. + * + * - Actions related to the flow rule itself, such as updating counters or + * making it non-terminating. + * + * Flow rules being terminating by default, not specifying any action of the + * fate kind results in undefined behavior. This applies to both ingress and + * egress. + * + * PASSTHRU, when supported, makes a flow rule non-terminating. + */ +enum rte_flow_action_type { + /** + * End marker for action lists. Prevents further processing of + * actions, thereby ending the list. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_END, + + /** + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_VOID, + + /** + * Leaves traffic up for additional processing by subsequent flow + * rules; makes a flow rule non-terminating. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PASSTHRU, + + /** + * RTE_FLOW_ACTION_TYPE_JUMP + * + * Redirects packets to a group on the current device. + * + * See struct rte_flow_action_jump. + */ + RTE_FLOW_ACTION_TYPE_JUMP, + + /** + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * See struct rte_flow_action_mark. + */ + RTE_FLOW_ACTION_TYPE_MARK, + + /** + * Flags packets. Similar to MARK without a specific value; only + * sets the PKT_RX_FDIR mbuf flag. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_FLAG, + + /** + * Assigns packets to a given queue index. + * + * See struct rte_flow_action_queue. + */ + RTE_FLOW_ACTION_TYPE_QUEUE, + + /** + * Drops packets. + * + * PASSTHRU overrides this action if both are specified. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_DROP, + + /** + * Enables counters for this flow rule. + * + * These counters can be retrieved and reset through rte_flow_query(), + * see struct rte_flow_query_count. + * + * See struct rte_flow_action_count. + */ + RTE_FLOW_ACTION_TYPE_COUNT, + + /** + * Similar to QUEUE, except RSS is additionally performed on packets + * to spread them among several queues according to the provided + * parameters. + * + * See struct rte_flow_action_rss. + */ + RTE_FLOW_ACTION_TYPE_RSS, + + /** + * Directs matching traffic to the physical function (PF) of the + * current device. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PF, + + /** + * Directs matching traffic to a given virtual function of the + * current device. + * + * See struct rte_flow_action_vf. + */ + RTE_FLOW_ACTION_TYPE_VF, + + /** + * Directs packets to a given physical port index of the underlying + * device. + * + * See struct rte_flow_action_phy_port. + */ + RTE_FLOW_ACTION_TYPE_PHY_PORT, + + /** + * Directs matching traffic to a given DPDK port ID. + * + * See struct rte_flow_action_port_id. + */ + RTE_FLOW_ACTION_TYPE_PORT_ID, + + /** + * Traffic metering and policing (MTR). + * + * See struct rte_flow_action_meter. + * See file rte_mtr.h for MTR object configuration. + */ + RTE_FLOW_ACTION_TYPE_METER, + + /** + * Redirects packets to security engine of current device for security + * processing as specified by security session. + * + * See struct rte_flow_action_security. + */ + RTE_FLOW_ACTION_TYPE_SECURITY, + + /** + * Implements OFPAT_SET_MPLS_TTL ("MPLS TTL") as defined by the + * OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_set_mpls_ttl. + */ + RTE_FLOW_ACTION_TYPE_OF_SET_MPLS_TTL, + + /** + * Implements OFPAT_DEC_MPLS_TTL ("decrement MPLS TTL") as defined + * by the OpenFlow Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_DEC_MPLS_TTL, + + /** + * Implements OFPAT_SET_NW_TTL ("IP TTL") as defined by the OpenFlow + * Switch Specification. + * + * See struct rte_flow_action_of_set_nw_ttl. + */ + RTE_FLOW_ACTION_TYPE_OF_SET_NW_TTL, + + /** + * Implements OFPAT_DEC_NW_TTL ("decrement IP TTL") as defined by + * the OpenFlow Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_DEC_NW_TTL, + + /** + * Implements OFPAT_COPY_TTL_OUT ("copy TTL "outwards" -- from + * next-to-outermost to outermost") as defined by the OpenFlow + * Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_COPY_TTL_OUT, + + /** + * Implements OFPAT_COPY_TTL_IN ("copy TTL "inwards" -- from + * outermost to next-to-outermost") as defined by the OpenFlow + * Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_COPY_TTL_IN, + + /** + * Implements OFPAT_POP_VLAN ("pop the outer VLAN tag") as defined + * by the OpenFlow Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, + + /** + * Implements OFPAT_PUSH_VLAN ("push a new VLAN tag") as defined by + * the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_push_vlan. + */ + RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN, + + /** + * Implements OFPAT_SET_VLAN_VID ("set the 802.1q VLAN id") as + * defined by the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_set_vlan_vid. + */ + RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID, + + /** + * Implements OFPAT_SET_LAN_PCP ("set the 802.1q priority") as + * defined by the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_set_vlan_pcp. + */ + RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP, + + /** + * Implements OFPAT_POP_MPLS ("pop the outer MPLS tag") as defined + * by the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_pop_mpls. + */ + RTE_FLOW_ACTION_TYPE_OF_POP_MPLS, + + /** + * Implements OFPAT_PUSH_MPLS ("push a new MPLS tag") as defined by + * the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_push_mpls. + */ + RTE_FLOW_ACTION_TYPE_OF_PUSH_MPLS, + + /** + * Encapsulate flow in VXLAN tunnel as defined in + * rte_flow_action_vxlan_encap action structure. + * + * See struct rte_flow_action_vxlan_encap. + */ + RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, + + /** + * Decapsulate outer most VXLAN tunnel from matched flow. + * + * If flow pattern does not define a valid VXLAN tunnel (as specified by + * RFC7348) then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION + * error. + */ + RTE_FLOW_ACTION_TYPE_VXLAN_DECAP, + + /** + * Encapsulate flow in NVGRE tunnel defined in the + * rte_flow_action_nvgre_encap action structure. + * + * See struct rte_flow_action_nvgre_encap. + */ + RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP, + + /** + * Decapsulate outer most NVGRE tunnel from matched flow. + * + * If flow pattern does not define a valid NVGRE tunnel (as specified by + * RFC7637) then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION + * error. + */ + RTE_FLOW_ACTION_TYPE_NVGRE_DECAP, +}; + +/** + * RTE_FLOW_ACTION_TYPE_MARK + * + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * This value is arbitrary and application-defined. Maximum allowed value + * depends on the underlying implementation. It is returned in the + * hash.fdir.hi mbuf field. + */ +struct rte_flow_action_mark { + uint32_t id; /**< Integer value to return with packets. */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ACTION_TYPE_JUMP + * + * Redirects packets to a group on the current device. + * + * In a hierarchy of groups, which can be used to represent physical or logical + * flow tables on the device, this action allows the action to be a redirect to + * a group on that device. + */ +struct rte_flow_action_jump { + uint32_t group; +}; + +/** + * RTE_FLOW_ACTION_TYPE_QUEUE + * + * Assign packets to a given queue index. + */ +struct rte_flow_action_queue { + uint16_t index; /**< Queue index to use. */ +}; + + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ACTION_TYPE_COUNT + * + * Adds a counter action to a matched flow. + * + * If more than one count action is specified in a single flow rule, then each + * action must specify a unique id. + * + * Counters can be retrieved and reset through ``rte_flow_query()``, see + * ``struct rte_flow_query_count``. + * + * The shared flag indicates whether the counter is unique to the flow rule the + * action is specified with, or whether it is a shared counter. + * + * For a count action with the shared flag set, then then a global device + * namespace is assumed for the counter id, so that any matched flow rules using + * a count action with the same counter id on the same port will contribute to + * that counter. + * + * For ports within the same switch domain then the counter id namespace extends + * to all ports within that switch domain. + */ +struct rte_flow_action_count { + uint32_t shared:1; /**< Share counter ID with other flow rules. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t id; /**< Counter ID. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_COUNT (query) + * + * Query structure to retrieve and reset flow rule counters. + */ +struct rte_flow_query_count { + uint32_t reset:1; /**< Reset counters after query [in]. */ + uint32_t hits_set:1; /**< hits field is set [out]. */ + uint32_t bytes_set:1; /**< bytes field is set [out]. */ + uint32_t reserved:29; /**< Reserved, must be zero [in, out]. */ + uint64_t hits; /**< Number of hits for this rule [out]. */ + uint64_t bytes; /**< Number of bytes through this rule [out]. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_RSS + * + * Similar to QUEUE, except RSS is additionally performed on packets to + * spread them among several queues according to the provided parameters. + * + * Unlike global RSS settings used by other DPDK APIs, unsetting the + * @p types field does not disable RSS in a flow rule. Doing so instead + * requests safe unspecified "best-effort" settings from the underlying PMD, + * which depending on the flow rule, may result in anything ranging from + * empty (single queue) to all-inclusive RSS. + * + * Note: RSS hash result is stored in the hash.rss mbuf field which overlaps + * hash.fdir.lo. Since the MARK action sets the hash.fdir.hi field only, + * both can be requested simultaneously. + */ +struct rte_flow_action_rss { + enum rte_eth_hash_function func; /**< RSS hash function to apply. */ + /** + * Packet encapsulation level RSS hash @p types apply to. + * + * - @p 0 requests the default behavior. Depending on the packet + * type, it can mean outermost, innermost, anything in between or + * even no RSS. + * + * It basically stands for the innermost encapsulation level RSS + * can be performed on according to PMD and device capabilities. + * + * - @p 1 requests RSS to be performed on the outermost packet + * encapsulation level. + * + * - @p 2 and subsequent values request RSS to be performed on the + * specified inner packet encapsulation level, from outermost to + * innermost (lower to higher values). + * + * Values other than @p 0 are not necessarily supported. + * + * Requesting a specific RSS level on unrecognized traffic results + * in undefined behavior. For predictable results, it is recommended + * to make the flow rule pattern match packet headers up to the + * requested encapsulation level so that only matching traffic goes + * through. + */ + uint32_t level; + uint64_t types; /**< Specific RSS hash types (see ETH_RSS_*). */ + uint32_t key_len; /**< Hash key length in bytes. */ + uint32_t queue_num; /**< Number of entries in @p queue. */ + const uint8_t *key; /**< Hash key. */ + const uint16_t *queue; /**< Queue indices to use. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_VF + * + * Directs matching traffic to a given virtual function of the current + * device. + * + * Packets matched by a VF pattern item can be redirected to their original + * VF ID instead of the specified one. This parameter may not be available + * and is not guaranteed to work properly if the VF part is matched by a + * prior flow rule or if packets are not addressed to a VF in the first + * place. + */ +struct rte_flow_action_vf { + uint32_t original:1; /**< Use original VF ID if possible. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t id; /**< VF ID. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_PHY_PORT + * + * Directs packets to a given physical port index of the underlying + * device. + * + * @see RTE_FLOW_ITEM_TYPE_PHY_PORT + */ +struct rte_flow_action_phy_port { + uint32_t original:1; /**< Use original port index if possible. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t index; /**< Physical port index. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_PORT_ID + * + * Directs matching traffic to a given DPDK port ID. + * + * @see RTE_FLOW_ITEM_TYPE_PORT_ID + */ +struct rte_flow_action_port_id { + uint32_t original:1; /**< Use original DPDK port ID if possible. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t id; /**< DPDK port ID. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_METER + * + * Traffic metering and policing (MTR). + * + * Packets matched by items of this type can be either dropped or passed to the + * next item with their color set by the MTR object. + */ +struct rte_flow_action_meter { + uint32_t mtr_id; /**< MTR object ID created with rte_mtr_create(). */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_SECURITY + * + * Perform the security action on flows matched by the pattern items + * according to the configuration of the security session. + * + * This action modifies the payload of matched flows. For INLINE_CRYPTO, the + * security protocol headers and IV are fully provided by the application as + * specified in the flow pattern. The payload of matching packets is + * encrypted on egress, and decrypted and authenticated on ingress. + * For INLINE_PROTOCOL, the security protocol is fully offloaded to HW, + * providing full encapsulation and decapsulation of packets in security + * protocols. The flow pattern specifies both the outer security header fields + * and the inner packet fields. The security session specified in the action + * must match the pattern parameters. + * + * The security session specified in the action must be created on the same + * port as the flow action that is being specified. + * + * The ingress/egress flow attribute should match that specified in the + * security session if the security session supports the definition of the + * direction. + * + * Multiple flows can be configured to use the same security session. + */ +struct rte_flow_action_security { + void *security_session; /**< Pointer to security session structure. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_SET_MPLS_TTL + * + * Implements OFPAT_SET_MPLS_TTL ("MPLS TTL") as defined by the OpenFlow + * Switch Specification. + */ +struct rte_flow_action_of_set_mpls_ttl { + uint8_t mpls_ttl; /**< MPLS TTL. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_SET_NW_TTL + * + * Implements OFPAT_SET_NW_TTL ("IP TTL") as defined by the OpenFlow Switch + * Specification. + */ +struct rte_flow_action_of_set_nw_ttl { + uint8_t nw_ttl; /**< IP TTL. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN + * + * Implements OFPAT_PUSH_VLAN ("push a new VLAN tag") as defined by the + * OpenFlow Switch Specification. + */ +struct rte_flow_action_of_push_vlan { + rte_be16_t ethertype; /**< EtherType. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID + * + * Implements OFPAT_SET_VLAN_VID ("set the 802.1q VLAN id") as defined by + * the OpenFlow Switch Specification. + */ +struct rte_flow_action_of_set_vlan_vid { + rte_be16_t vlan_vid; /**< VLAN id. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP + * + * Implements OFPAT_SET_LAN_PCP ("set the 802.1q priority") as defined by + * the OpenFlow Switch Specification. + */ +struct rte_flow_action_of_set_vlan_pcp { + uint8_t vlan_pcp; /**< VLAN priority. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_POP_MPLS + * + * Implements OFPAT_POP_MPLS ("pop the outer MPLS tag") as defined by the + * OpenFlow Switch Specification. + */ +struct rte_flow_action_of_pop_mpls { + rte_be16_t ethertype; /**< EtherType. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_PUSH_MPLS + * + * Implements OFPAT_PUSH_MPLS ("push a new MPLS tag") as defined by the + * OpenFlow Switch Specification. + */ +struct rte_flow_action_of_push_mpls { + rte_be16_t ethertype; /**< EtherType. */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP + * + * VXLAN tunnel end-point encapsulation data definition + * + * The tunnel definition is provided through the flow item pattern, the + * provided pattern must conform to RFC7348 for the tunnel specified. The flow + * definition must be provided in order from the RTE_FLOW_ITEM_TYPE_ETH + * definition up the end item which is specified by RTE_FLOW_ITEM_TYPE_END. + * + * The mask field allows user to specify which fields in the flow item + * definitions can be ignored and which have valid data and can be used + * verbatim. + * + * Note: the last field is not used in the definition of a tunnel and can be + * ignored. + * + * Valid flow definition for RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP include: + * + * - ETH / IPV4 / UDP / VXLAN / END + * - ETH / IPV6 / UDP / VXLAN / END + * - ETH / VLAN / IPV4 / UDP / VXLAN / END + * + */ +struct rte_flow_action_vxlan_encap { + /** + * Encapsulating vxlan tunnel definition + * (terminated by the END pattern item). + */ + struct rte_flow_item *definition; +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP + * + * NVGRE tunnel end-point encapsulation data definition + * + * The tunnel definition is provided through the flow item pattern the + * provided pattern must conform with RFC7637. The flow definition must be + * provided in order from the RTE_FLOW_ITEM_TYPE_ETH definition up the end item + * which is specified by RTE_FLOW_ITEM_TYPE_END. + * + * The mask field allows user to specify which fields in the flow item + * definitions can be ignored and which have valid data and can be used + * verbatim. + * + * Note: the last field is not used in the definition of a tunnel and can be + * ignored. + * + * Valid flow definition for RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP include: + * + * - ETH / IPV4 / NVGRE / END + * - ETH / VLAN / IPV6 / NVGRE / END + * + */ +struct rte_flow_action_nvgre_encap { + /** + * Encapsulating vxlan tunnel definition + * (terminated by the END pattern item). + */ + struct rte_flow_item *definition; +}; + +/* + * Definition of a single action. + * + * A list of actions is terminated by a END action. + * + * For simple actions without a configuration structure, conf remains NULL. + */ +struct rte_flow_action { + enum rte_flow_action_type type; /**< Action type. */ + const void *conf; /**< Pointer to action configuration structure. */ +}; + +/** + * Opaque type returned after successfully creating a flow. + * + * This handle can be used to manage and query the related flow (e.g. to + * destroy it or retrieve counters). + */ +struct rte_flow; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_flow_error.cause. + */ +enum rte_flow_error_type { + RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */ + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */ + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */ + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */ + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */ + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */ + RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, /**< Transfer field. */ + RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */ + RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */ + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, /**< Item specification. */ + RTE_FLOW_ERROR_TYPE_ITEM_LAST, /**< Item specification range. */ + RTE_FLOW_ERROR_TYPE_ITEM_MASK, /**< Item specification mask. */ + RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */ + RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */ + RTE_FLOW_ERROR_TYPE_ACTION_CONF, /**< Action configuration. */ + RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */ +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_flow_error { + enum rte_flow_error_type type; /**< Cause field and error types. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * Check whether a flow rule can be created on a given port. + * + * The flow rule is validated for correctness and whether it could be accepted + * by the device given sufficient resources. The rule is checked against the + * current device mode and queue configuration. The flow rule may also + * optionally be validated against existing flow rules and device resources. + * This function has no effect on the target device. + * + * The returned value is guaranteed to remain valid only as long as no + * successful calls to rte_flow_create() or rte_flow_destroy() are made in + * the meantime and no device parameter affecting flow rules in any way are + * modified, due to possible collisions or resource limitations (although in + * such cases EINVAL should not be returned). + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 if flow rule is valid and can be created. A negative errno value + * otherwise (rte_errno is also set), the following errors are defined: + * + * -ENOSYS: underlying device does not support this functionality. + * + * -EIO: underlying device is removed. + * + * -EINVAL: unknown or invalid rule specification. + * + * -ENOTSUP: valid but unsupported rule specification (e.g. partial + * bit-masks are unsupported). + * + * -EEXIST: collision with an existing rule. Only returned if device + * supports flow rule collision checking and there was a flow rule + * collision. Not receiving this return code is no guarantee that creating + * the rule will not fail due to a collision. + * + * -ENOMEM: not enough memory to execute the function, or if the device + * supports resource validation, resource limitation on the device. + * + * -EBUSY: action cannot be performed due to busy device resources, may + * succeed if the affected queues or even the entire port are in a stopped + * state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()). + */ +int +rte_flow_validate(uint16_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Create a flow rule on a given port. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * A valid handle in case of success, NULL otherwise and rte_errno is set + * to the positive version of one of the error codes defined for + * rte_flow_validate(). + */ +struct rte_flow * +rte_flow_create(uint16_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Destroy a flow rule on a given port. + * + * Failure to destroy a flow rule handle may occur when other flow rules + * depend on it, and destroying it would result in an inconsistent state. + * + * This function is only guaranteed to succeed if handles are destroyed in + * reverse order of their creation. + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to destroy. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_destroy(uint16_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error); + +/** + * Destroy all flow rules associated with a port. + * + * In the unlikely event of failure, handles are still considered destroyed + * and no longer valid but the port must be assumed to be in an inconsistent + * state. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_flush(uint16_t port_id, + struct rte_flow_error *error); + +/** + * Query an existing flow rule. + * + * This function allows retrieving flow-specific data such as counters. + * Data is gathered by special actions which must be present in the flow + * rule definition. + * + * \see RTE_FLOW_ACTION_TYPE_COUNT + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to query. + * @param action + * Action definition as defined in original flow rule. + * @param[in, out] data + * Pointer to storage for the associated query data type. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_query(uint16_t port_id, + struct rte_flow *flow, + const struct rte_flow_action *action, + void *data, + struct rte_flow_error *error); + +/** + * Restrict ingress traffic to the defined flow rules. + * + * Isolated mode guarantees that all ingress traffic comes from defined flow + * rules only (current and future). + * + * Besides making ingress more deterministic, it allows PMDs to safely reuse + * resources otherwise assigned to handle the remaining traffic, such as + * global RSS configuration settings, VLAN filters, MAC address entries, + * legacy filter API rules and so on in order to expand the set of possible + * flow rule types. + * + * Calling this function as soon as possible after device initialization, + * ideally before the first call to rte_eth_dev_configure(), is recommended + * to avoid possible failures due to conflicting settings. + * + * Once effective, leaving isolated mode may not be possible depending on + * PMD implementation. + * + * Additionally, the following functionality has no effect on the underlying + * port and may return errors such as ENOTSUP ("not supported"): + * + * - Toggling promiscuous mode. + * - Toggling allmulticast mode. + * - Configuring MAC addresses. + * - Configuring multicast addresses. + * - Configuring VLAN filters. + * - Configuring Rx filters through the legacy API (e.g. FDIR). + * - Configuring global RSS settings. + * + * @param port_id + * Port identifier of Ethernet device. + * @param set + * Nonzero to enter isolated mode, attempt to leave it otherwise. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_isolate(uint16_t port_id, int set, struct rte_flow_error *error); + +/** + * Initialize flow error structure. + * + * @param[out] error + * Pointer to flow error structure (may be NULL). + * @param code + * Related error code (rte_errno). + * @param type + * Cause field and error types. + * @param cause + * Object responsible for the error. + * @param message + * Human-readable error message. + * + * @return + * Negative error code (errno value) and rte_errno is set. + */ +int +rte_flow_error_set(struct rte_flow_error *error, + int code, + enum rte_flow_error_type type, + const void *cause, + const char *message); + +/** + * Generic flow representation. + * + * This form is sufficient to describe an rte_flow independently from any + * PMD implementation and allows for replayability and identification. + */ +struct rte_flow_desc { + size_t size; /**< Allocated space including data[]. */ + struct rte_flow_attr attr; /**< Attributes. */ + struct rte_flow_item *items; /**< Items. */ + struct rte_flow_action *actions; /**< Actions. */ + uint8_t data[]; /**< Storage for items/actions. */ +}; + +/** + * Copy an rte_flow rule description. + * + * @param[in] fd + * Flow rule description. + * @param[in] len + * Total size of allocated data for the flow description. + * @param[in] attr + * Flow rule attributes. + * @param[in] items + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * + * @return + * If len is greater or equal to the size of the flow, the total size of the + * flow description and its data. + * If len is lower than the size of the flow, the number of bytes that would + * have been written to desc had it been sufficient. Nothing is written. + */ +size_t +rte_flow_copy(struct rte_flow_desc *fd, size_t len, + const struct rte_flow_attr *attr, + const struct rte_flow_item *items, + const struct rte_flow_action *actions); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_H_ */ diff --git a/lib/librte_ethdev/rte_flow_driver.h b/lib/librte_ethdev/rte_flow_driver.h new file mode 100644 index 00000000..688f7230 --- /dev/null +++ b/lib/librte_ethdev/rte_flow_driver.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox Technologies, Ltd + */ + +#ifndef RTE_FLOW_DRIVER_H_ +#define RTE_FLOW_DRIVER_H_ + +/** + * @file + * RTE generic flow API (driver side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include + +#include "rte_ethdev.h" +#include "rte_flow.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Generic flow operations structure implemented and returned by PMDs. + * + * To implement this API, PMDs must handle the RTE_ETH_FILTER_GENERIC filter + * type in their .filter_ctrl callback function (struct eth_dev_ops) as well + * as the RTE_ETH_FILTER_GET filter operation. + * + * If successful, this operation must result in a pointer to a PMD-specific + * struct rte_flow_ops written to the argument address as described below: + * + * \code + * + * // PMD filter_ctrl callback + * + * static const struct rte_flow_ops pmd_flow_ops = { ... }; + * + * switch (filter_type) { + * case RTE_ETH_FILTER_GENERIC: + * if (filter_op != RTE_ETH_FILTER_GET) + * return -EINVAL; + * *(const void **)arg = &pmd_flow_ops; + * return 0; + * } + * + * \endcode + * + * See also rte_flow_ops_get(). + * + * These callback functions are not supposed to be used by applications + * directly, which must rely on the API defined in rte_flow.h. + * + * Public-facing wrapper functions perform a few consistency checks so that + * unimplemented (i.e. NULL) callbacks simply return -ENOTSUP. These + * callbacks otherwise only differ by their first argument (with port ID + * already resolved to a pointer to struct rte_eth_dev). + */ +struct rte_flow_ops { + /** See rte_flow_validate(). */ + int (*validate) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_create(). */ + struct rte_flow *(*create) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_destroy(). */ + int (*destroy) + (struct rte_eth_dev *, + struct rte_flow *, + struct rte_flow_error *); + /** See rte_flow_flush(). */ + int (*flush) + (struct rte_eth_dev *, + struct rte_flow_error *); + /** See rte_flow_query(). */ + int (*query) + (struct rte_eth_dev *, + struct rte_flow *, + const struct rte_flow_action *, + void *, + struct rte_flow_error *); + /** See rte_flow_isolate(). */ + int (*isolate) + (struct rte_eth_dev *, + int, + struct rte_flow_error *); +}; + +/** + * Get generic flow operations structure from a port. + * + * @param port_id + * Port identifier to query. + * @param[out] error + * Pointer to flow error structure. + * + * @return + * The flow operations structure associated with port_id, NULL in case of + * error, in which case rte_errno is set and the error structure contains + * additional details. + */ +const struct rte_flow_ops * +rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error); + +/** Helper macro to build input graph for rte_flow_expand_rss(). */ +#define RTE_FLOW_EXPAND_RSS_NEXT(...) \ + (const int []){ \ + __VA_ARGS__, 0, \ + } + +/** Node object of input graph for rte_flow_expand_rss(). */ +struct rte_flow_expand_node { + const int *const next; + /**< + * List of next node indexes. Index 0 is interpreted as a terminator. + */ + const enum rte_flow_item_type type; + /**< Pattern item type of current node. */ + uint64_t rss_types; + /**< + * RSS types bit-field associated with this node + * (see ETH_RSS_* definitions). + */ +}; + +/** Object returned by rte_flow_expand_rss(). */ +struct rte_flow_expand_rss { + uint32_t entries; + /**< Number of entries @p patterns and @p priorities. */ + struct { + struct rte_flow_item *pattern; /**< Expanded pattern array. */ + uint32_t priority; /**< Priority offset for each expansion. */ + } entry[]; +}; + +/** + * Expand RSS flows into several possible flows according to the RSS hash + * fields requested and the driver capabilities. + * + * @b EXPERIMENTAL: this API may change without prior notice + * + * @param[out] buf + * Buffer to store the result expansion. + * @param[in] size + * Buffer size in bytes. If 0, @p buf can be NULL. + * @param[in] pattern + * User flow pattern. + * @param[in] types + * RSS types to expand (see ETH_RSS_* definitions). + * @param[in] graph + * Input graph to expand @p pattern according to @p types. + * @param[in] graph_root_index + * Index of root node in @p graph, typically 0. + * + * @return + * A positive value representing the size of @p buf in bytes regardless of + * @p size on success, a negative errno value otherwise and rte_errno is + * set, the following errors are defined: + * + * -E2BIG: graph-depth @p graph is too deep. + */ +int __rte_experimental +rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size, + const struct rte_flow_item *pattern, uint64_t types, + const struct rte_flow_expand_node graph[], + int graph_root_index); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_DRIVER_H_ */ diff --git a/lib/librte_ethdev/rte_mtr.c b/lib/librte_ethdev/rte_mtr.c new file mode 100644 index 00000000..1046cb5f --- /dev/null +++ b/lib/librte_ethdev/rte_mtr.c @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include + +#include +#include "rte_compat.h" +#include "rte_ethdev.h" +#include "rte_mtr_driver.h" +#include "rte_mtr.h" + +/* Get generic traffic metering & policing operations structure from a port. */ +const struct rte_mtr_ops * +rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_mtr_ops *ops; + + if (!rte_eth_dev_is_valid_port(port_id)) { + rte_mtr_error_set(error, + ENODEV, + RTE_MTR_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENODEV)); + return NULL; + } + + if ((dev->dev_ops->mtr_ops_get == NULL) || + (dev->dev_ops->mtr_ops_get(dev, &ops) != 0) || + (ops == NULL)) { + rte_mtr_error_set(error, + ENOSYS, + RTE_MTR_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENOSYS)); + return NULL; + } + + return ops; +} + +#define RTE_MTR_FUNC(port_id, func) \ +({ \ + const struct rte_mtr_ops *ops = \ + rte_mtr_ops_get(port_id, error); \ + if (ops == NULL) \ + return -rte_errno; \ + \ + if (ops->func == NULL) \ + return -rte_mtr_error_set(error, \ + ENOSYS, \ + RTE_MTR_ERROR_TYPE_UNSPECIFIED, \ + NULL, \ + rte_strerror(ENOSYS)); \ + \ + ops->func; \ +}) + +/* MTR capabilities get */ +int __rte_experimental +rte_mtr_capabilities_get(uint16_t port_id, + struct rte_mtr_capabilities *cap, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, capabilities_get)(dev, + cap, error); +} + +/* MTR meter profile add */ +int __rte_experimental +rte_mtr_meter_profile_add(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_meter_profile *profile, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_profile_add)(dev, + meter_profile_id, profile, error); +} + +/** MTR meter profile delete */ +int __rte_experimental +rte_mtr_meter_profile_delete(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_profile_delete)(dev, + meter_profile_id, error); +} + +/** MTR object create */ +int __rte_experimental +rte_mtr_create(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_params *params, + int shared, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, create)(dev, + mtr_id, params, shared, error); +} + +/** MTR object destroy */ +int __rte_experimental +rte_mtr_destroy(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, destroy)(dev, + mtr_id, error); +} + +/** MTR object meter enable */ +int __rte_experimental +rte_mtr_meter_enable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_enable)(dev, + mtr_id, error); +} + +/** MTR object meter disable */ +int __rte_experimental +rte_mtr_meter_disable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_disable)(dev, + mtr_id, error); +} + +/** MTR object meter profile update */ +int __rte_experimental +rte_mtr_meter_profile_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_profile_update)(dev, + mtr_id, meter_profile_id, error); +} + +/** MTR object meter DSCP table update */ +int __rte_experimental +rte_mtr_meter_dscp_table_update(uint16_t port_id, + uint32_t mtr_id, + enum rte_mtr_color *dscp_table, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_dscp_table_update)(dev, + mtr_id, dscp_table, error); +} + +/** MTR object policer action update */ +int __rte_experimental +rte_mtr_policer_actions_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t action_mask, + enum rte_mtr_policer_action *actions, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, policer_actions_update)(dev, + mtr_id, action_mask, actions, error); +} + +/** MTR object enabled stats update */ +int __rte_experimental +rte_mtr_stats_update(uint16_t port_id, + uint32_t mtr_id, + uint64_t stats_mask, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, stats_update)(dev, + mtr_id, stats_mask, error); +} + +/** MTR object stats read */ +int __rte_experimental +rte_mtr_stats_read(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, stats_read)(dev, + mtr_id, stats, stats_mask, clear, error); +} diff --git a/lib/librte_ethdev/rte_mtr.h b/lib/librte_ethdev/rte_mtr.h new file mode 100644 index 00000000..c4819b27 --- /dev/null +++ b/lib/librte_ethdev/rte_mtr.h @@ -0,0 +1,730 @@ +/*- + * BSD LICENSE + * + * Copyright 2017 Intel Corporation + * Copyright 2017 NXP + * Copyright 2017 Cavium + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_MTR_H__ +#define __INCLUDE_RTE_MTR_H__ + +/** + * @file + * RTE Generic Traffic Metering and Policing API + * + * This interface provides the ability to configure the traffic metering and + * policing (MTR) in a generic way. + * + * The processing done for each input packet hitting a MTR object is: + * A) Traffic metering: The packet is assigned a color (the meter output + * color), based on the previous history of the flow reflected in the + * current state of the MTR object, according to the specific traffic + * metering algorithm. The traffic metering algorithm can typically work + * in color aware mode, in which case the input packet already has an + * initial color (the input color), or in color blind mode, which is + * equivalent to considering all input packets initially colored as green. + * B) Policing: There is a separate policer action configured for each meter + * output color, which can: + * a) Drop the packet. + * b) Keep the same packet color: the policer output color matches the + * meter output color (essentially a no-op action). + * c) Recolor the packet: the policer output color is different than + * the meter output color. + * The policer output color is the output color of the packet, which is + * set in the packet meta-data (i.e. struct rte_mbuf::sched::color). + * C) Statistics: The set of counters maintained for each MTR object is + * configurable and subject to the implementation support. This set + * includes the number of packets and bytes dropped or passed for each + * output color. + * + * Once successfully created, an MTR object is linked to one or several flows + * through the meter action of the flow API. + * A) Whether an MTR object is private to a flow or potentially shared by + * several flows has to be specified at creation time. + * B) Several meter actions can be potentially registered for the same flow. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Color + */ +enum rte_mtr_color { + RTE_MTR_GREEN = 0, /**< Green */ + RTE_MTR_YELLOW, /**< Yellow */ + RTE_MTR_RED, /**< Red */ + RTE_MTR_COLORS /**< Number of colors. */ +}; + +/** + * Statistics counter type + */ +enum rte_mtr_stats_type { + /** Number of packets passed as green by the policer. */ + RTE_MTR_STATS_N_PKTS_GREEN = 1 << 0, + + /** Number of packets passed as yellow by the policer. */ + RTE_MTR_STATS_N_PKTS_YELLOW = 1 << 1, + + /** Number of packets passed as red by the policer. */ + RTE_MTR_STATS_N_PKTS_RED = 1 << 2, + + /** Number of packets dropped by the policer. */ + RTE_MTR_STATS_N_PKTS_DROPPED = 1 << 3, + + /** Number of bytes passed as green by the policer. */ + RTE_MTR_STATS_N_BYTES_GREEN = 1 << 4, + + /** Number of bytes passed as yellow by the policer. */ + RTE_MTR_STATS_N_BYTES_YELLOW = 1 << 5, + + /** Number of bytes passed as red by the policer. */ + RTE_MTR_STATS_N_BYTES_RED = 1 << 6, + + /** Number of bytes dropped by the policer. */ + RTE_MTR_STATS_N_BYTES_DROPPED = 1 << 7, +}; + +/** + * Statistics counters + */ +struct rte_mtr_stats { + /** Number of packets passed by the policer (per color). */ + uint64_t n_pkts[RTE_MTR_COLORS]; + + /** Number of bytes passed by the policer (per color). */ + uint64_t n_bytes[RTE_MTR_COLORS]; + + /** Number of packets dropped by the policer. */ + uint64_t n_pkts_dropped; + + /** Number of bytes passed by the policer. */ + uint64_t n_bytes_dropped; +}; + +/** + * Traffic metering algorithms + */ +enum rte_mtr_algorithm { + /** No traffic metering performed, the output color is the same as the + * input color for every input packet. The meter of the MTR object is + * working in pass-through mode, having same effect as meter disable. + * @see rte_mtr_meter_disable() + */ + RTE_MTR_NONE = 0, + + /** Single Rate Three Color Marker (srTCM) - IETF RFC 2697. */ + RTE_MTR_SRTCM_RFC2697, + + /** Two Rate Three Color Marker (trTCM) - IETF RFC 2698. */ + RTE_MTR_TRTCM_RFC2698, + + /** Two Rate Three Color Marker (trTCM) - IETF RFC 4115. */ + RTE_MTR_TRTCM_RFC4115, +}; + +/** + * Meter profile + */ +struct rte_mtr_meter_profile { + /** Traffic metering algorithm. */ + enum rte_mtr_algorithm alg; + + RTE_STD_C11 + union { + /** Items only valid when *alg* is set to srTCM - RFC 2697. */ + struct { + /** Committed Information Rate (CIR) (bytes/second). */ + uint64_t cir; + + /** Committed Burst Size (CBS) (bytes). */ + uint64_t cbs; + + /** Excess Burst Size (EBS) (bytes). */ + uint64_t ebs; + } srtcm_rfc2697; + + /** Items only valid when *alg* is set to trTCM - RFC 2698. */ + struct { + /** Committed Information Rate (CIR) (bytes/second). */ + uint64_t cir; + + /** Peak Information Rate (PIR) (bytes/second). */ + uint64_t pir; + + /** Committed Burst Size (CBS) (byes). */ + uint64_t cbs; + + /** Peak Burst Size (PBS) (bytes). */ + uint64_t pbs; + } trtcm_rfc2698; + + /** Items only valid when *alg* is set to trTCM - RFC 4115. */ + struct { + /** Committed Information Rate (CIR) (bytes/second). */ + uint64_t cir; + + /** Excess Information Rate (EIR) (bytes/second). */ + uint64_t eir; + + /** Committed Burst Size (CBS) (byes). */ + uint64_t cbs; + + /** Excess Burst Size (EBS) (bytes). */ + uint64_t ebs; + } trtcm_rfc4115; + }; +}; + +/** + * Policer actions + */ +enum rte_mtr_policer_action { + /** Recolor the packet as green. */ + MTR_POLICER_ACTION_COLOR_GREEN = 0, + + /** Recolor the packet as yellow. */ + MTR_POLICER_ACTION_COLOR_YELLOW, + + /** Recolor the packet as red. */ + MTR_POLICER_ACTION_COLOR_RED, + + /** Drop the packet. */ + MTR_POLICER_ACTION_DROP, +}; + +/** + * Parameters for each traffic metering & policing object + * + * @see enum rte_mtr_stats_type + */ +struct rte_mtr_params { + /** Meter profile ID. */ + uint32_t meter_profile_id; + + /** Meter input color in case of MTR object chaining. When non-zero: if + * a previous MTR object is enabled in the same flow, then the color + * determined by the latest MTR object in the same flow is used as the + * input color by the current MTR object, otherwise the current MTR + * object uses the *dscp_table* to determine the input color. When zero: + * the color determined by any previous MTR object in same flow is + * ignored by the current MTR object, which uses the *dscp_table* to + * determine the input color. + */ + int use_prev_mtr_color; + + /** Meter input color. When non-NULL: it points to a pre-allocated and + * pre-populated table with exactly 64 elements providing the input + * color for each value of the IPv4/IPv6 Differentiated Services Code + * Point (DSCP) input packet field. When NULL: it is equivalent to + * setting this parameter to an all-green populated table (i.e. table + * with all the 64 elements set to green color). The color blind mode + * is configured by setting *use_prev_mtr_color* to 0 and *dscp_table* + * to either NULL or to an all-green populated table. When + * *use_prev_mtr_color* is non-zero value or when *dscp_table* contains + * at least one yellow or red color element, then the color aware mode + * is configured. + */ + enum rte_mtr_color *dscp_table; + + /** Non-zero to enable the meter, zero to disable the meter at the time + * of MTR object creation. Ignored when the meter profile indicated by + * *meter_profile_id* is set to NONE. + * @see rte_mtr_meter_disable() + */ + int meter_enable; + + /** Policer actions (per meter output color). */ + enum rte_mtr_policer_action action[RTE_MTR_COLORS]; + + /** Set of stats counters to be enabled. + * @see enum rte_mtr_stats_type + */ + uint64_t stats_mask; +}; + +/** + * MTR capabilities + */ +struct rte_mtr_capabilities { + /** Maximum number of MTR objects. */ + uint32_t n_max; + + /** Maximum number of MTR objects that can be shared by multiple flows. + * The value of zero indicates that shared MTR objects are not + * supported. The maximum value is *n_max*. + */ + uint32_t n_shared_max; + + /** When non-zero, this flag indicates that all the MTR objects that + * cannot be shared by multiple flows have identical capability set. + */ + int identical; + + /** When non-zero, this flag indicates that all the MTR objects that + * can be shared by multiple flows have identical capability set. + */ + int shared_identical; + + /** Maximum number of flows that can share the same MTR object. The + * value of zero is invalid. The value of 1 means that shared MTR + * objects not supported. + */ + uint32_t shared_n_flows_per_mtr_max; + + /** Maximum number of MTR objects that can be part of the same flow. The + * value of zero is invalid. The value of 1 indicates that MTR object + * chaining is not supported. The maximum value is *n_max*. + */ + uint32_t chaining_n_mtrs_per_flow_max; + + /** + * When non-zero, it indicates that the packet color identified by one + * MTR object can be used as the packet input color by any subsequent + * MTR object from the same flow. When zero, it indicates that the color + * determined by one MTR object is always ignored by any subsequent MTR + * object from the same flow. Only valid when MTR chaining is supported, + * i.e. *chaining_n_mtrs_per_flow_max* is greater than 1. When non-zero, + * it also means that the color aware mode is supported by at least one + * metering algorithm. + */ + int chaining_use_prev_mtr_color_supported; + + /** + * When non-zero, it indicates that the packet color identified by one + * MTR object is always used as the packet input color by any subsequent + * MTR object that is part of the same flow. When zero, it indicates + * that whether the color determined by one MTR object is either ignored + * or used as the packet input color by any subsequent MTR object from + * the same flow is individually configurable for each MTR object. Only + * valid when *chaining_use_prev_mtr_color_supported* is non-zero. + */ + int chaining_use_prev_mtr_color_enforced; + + /** Maximum number of MTR objects that can have their meter configured + * to run the srTCM RFC 2697 algorithm. The value of 0 indicates this + * metering algorithm is not supported. The maximum value is *n_max*. + */ + uint32_t meter_srtcm_rfc2697_n_max; + + /** Maximum number of MTR objects that can have their meter configured + * to run the trTCM RFC 2698 algorithm. The value of 0 indicates this + * metering algorithm is not supported. The maximum value is *n_max*. + */ + uint32_t meter_trtcm_rfc2698_n_max; + + /** Maximum number of MTR objects that can have their meter configured + * to run the trTCM RFC 4115 algorithm. The value of 0 indicates this + * metering algorithm is not supported. The maximum value is *n_max*. + */ + uint32_t meter_trtcm_rfc4115_n_max; + + /** Maximum traffic rate that can be metered by a single MTR object. For + * srTCM RFC 2697, this is the maximum CIR rate. For trTCM RFC 2698, + * this is the maximum PIR rate. For trTCM RFC 4115, this is the maximum + * value for the sum of PIR and EIR rates. + */ + uint64_t meter_rate_max; + + /** + * When non-zero, it indicates that color aware mode is supported for + * the srTCM RFC 2697 metering algorithm. + */ + int color_aware_srtcm_rfc2697_supported; + + /** + * When non-zero, it indicates that color aware mode is supported for + * the trTCM RFC 2698 metering algorithm. + */ + int color_aware_trtcm_rfc2698_supported; + + /** + * When non-zero, it indicates that color aware mode is supported for + * the trTCM RFC 4115 metering algorithm. + */ + int color_aware_trtcm_rfc4115_supported; + + /** When non-zero, it indicates that the policer packet recolor actions + * are supported. + * @see enum rte_mtr_policer_action + */ + int policer_action_recolor_supported; + + /** When non-zero, it indicates that the policer packet drop action is + * supported. + * @see enum rte_mtr_policer_action + */ + int policer_action_drop_supported; + + /** Set of supported statistics counter types. + * @see enum rte_mtr_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_mtr_error::cause. + */ +enum rte_mtr_error_type { + RTE_MTR_ERROR_TYPE_NONE, /**< No error. */ + RTE_MTR_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_MTR_ERROR_TYPE_METER_PROFILE_ID, + RTE_MTR_ERROR_TYPE_METER_PROFILE, + RTE_MTR_ERROR_TYPE_MTR_ID, + RTE_MTR_ERROR_TYPE_MTR_PARAMS, + RTE_MTR_ERROR_TYPE_POLICER_ACTION_GREEN, + RTE_MTR_ERROR_TYPE_POLICER_ACTION_YELLOW, + RTE_MTR_ERROR_TYPE_POLICER_ACTION_RED, + RTE_MTR_ERROR_TYPE_STATS_MASK, + RTE_MTR_ERROR_TYPE_STATS, + RTE_MTR_ERROR_TYPE_SHARED, +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_mtr_error { + enum rte_mtr_error_type type; /**< Cause field and error type. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * MTR capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] cap + * MTR capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_capabilities_get(uint16_t port_id, + struct rte_mtr_capabilities *cap, + struct rte_mtr_error *error); + +/** + * Meter profile add + * + * Create a new meter profile with ID set to *meter_profile_id*. The new profile + * is used to create one or several MTR objects. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] meter_profile_id + * ID for the new meter profile. Needs to be unused by any of the existing + * meter profiles added for the current port. + * @param[in] profile + * Meter profile parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_profile_add(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_meter_profile *profile, + struct rte_mtr_error *error); + +/** + * Meter profile delete + * + * Delete an existing meter profile. This operation fails when there is + * currently at least one user (i.e. MTR object) of this profile. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] meter_profile_id + * Meter profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_profile_delete(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error); + +/** + * MTR object create + * + * Create a new MTR object for the current port. This object is run as part of + * associated flow action for traffic metering and policing. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be unused by any of the existing MTR objects. + * created for the current port. + * @param[in] params + * MTR object params. Needs to be pre-allocated and valid. + * @param[in] shared + * Non-zero when this MTR object can be shared by multiple flows, zero when + * this MTR object can be used by a single flow. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_flow_action_type::RTE_FLOW_ACTION_TYPE_METER + */ +int __rte_experimental +rte_mtr_create(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_params *params, + int shared, + struct rte_mtr_error *error); + +/** + * MTR object destroy + * + * Delete an existing MTR object. This operation fails when there is currently + * at least one user (i.e. flow) of this MTR object. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * created for the current port. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_destroy(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error); + +/** + * MTR object meter disable + * + * Disable the meter of an existing MTR object. In disabled state, the meter of + * the current MTR object works in pass-through mode, meaning that for each + * input packet the meter output color is always the same as the input color. In + * particular, when the meter of the current MTR object is configured in color + * blind mode, the input color is always green, so the meter output color is + * also always green. Note that the policer and the statistics of the current + * MTR object are working as usual while the meter is disabled. No action is + * taken and this function returns successfully when the meter of the current + * MTR object is already disabled. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_disable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error); + +/** + * MTR object meter enable + * + * Enable the meter of an existing MTR object. If the MTR object has its meter + * already enabled, then no action is taken and this function returns + * successfully. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_enable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error); + +/** + * MTR object meter profile update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] meter_profile_id + * Meter profile ID for the current MTR object. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_profile_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error); + +/** + * MTR object DSCP table update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] dscp_table + * When non-NULL: it points to a pre-allocated and pre-populated table with + * exactly 64 elements providing the input color for each value of the + * IPv4/IPv6 Differentiated Services Code Point (DSCP) input packet field. + * When NULL: it is equivalent to setting this parameter to an “all-green” + * populated table (i.e. table with all the 64 elements set to green color). + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_dscp_table_update(uint16_t port_id, + uint32_t mtr_id, + enum rte_mtr_color *dscp_table, + struct rte_mtr_error *error); + +/** + * MTR object policer actions update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] action_mask + * Bit mask indicating which policer actions need to be updated. One or more + * policer actions can be updated in a single function invocation. To update + * the policer action associated with color C, bit (1 << C) needs to be set in + * *action_mask* and element at position C in the *actions* array needs to be + * valid. + * @param[in] actions + * Pre-allocated and pre-populated array of policer actions. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_policer_actions_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t action_mask, + enum rte_mtr_policer_action *actions, + struct rte_mtr_error *error); + +/** + * MTR object enabled statistics counters update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] stats_mask + * Mask of statistics counter types to be enabled for the current MTR object. + * Any statistics counter type not included in this set is to be disabled for + * the current MTR object. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_mtr_stats_type + */ +int __rte_experimental +rte_mtr_stats_update(uint16_t port_id, + uint32_t mtr_id, + uint64_t stats_mask, + struct rte_mtr_error *error); + +/** + * MTR object statistics counters read + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[out] stats + * When non-NULL, it contains the current value for the statistics counters + * enabled for the current MTR object. + * @param[out] stats_mask + * When non-NULL, it contains the mask of statistics counter types that are + * currently enabled for this MTR object, indicating which of the counters + * retrieved with the *stats* structure are valid. + * @param[in] clear + * When this parameter has a non-zero value, the statistics counters are + * cleared (i.e. set to zero) immediately after they have been read, + * otherwise the statistics counters are left untouched. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_mtr_stats_type + */ +int __rte_experimental +rte_mtr_stats_read(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_mtr_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_MTR_H__ */ diff --git a/lib/librte_ethdev/rte_mtr_driver.h b/lib/librte_ethdev/rte_mtr_driver.h new file mode 100644 index 00000000..c9a6d7c3 --- /dev/null +++ b/lib/librte_ethdev/rte_mtr_driver.h @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef __INCLUDE_RTE_MTR_DRIVER_H__ +#define __INCLUDE_RTE_MTR_DRIVER_H__ + +/** + * @file + * RTE Generic Traffic Metering and Policing API (Driver Side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include + +#include +#include "rte_ethdev.h" +#include "rte_mtr.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (*rte_mtr_capabilities_get_t)(struct rte_eth_dev *dev, + struct rte_mtr_capabilities *cap, + struct rte_mtr_error *error); +/**< @internal MTR capabilities get */ + +typedef int (*rte_mtr_meter_profile_add_t)(struct rte_eth_dev *dev, + uint32_t meter_profile_id, + struct rte_mtr_meter_profile *profile, + struct rte_mtr_error *error); +/**< @internal MTR meter profile add */ + +typedef int (*rte_mtr_meter_profile_delete_t)(struct rte_eth_dev *dev, + uint32_t meter_profile_id, + struct rte_mtr_error *error); +/**< @internal MTR meter profile delete */ + +typedef int (*rte_mtr_create_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_params *params, + int shared, + struct rte_mtr_error *error); +/**< @internal MTR object create */ + +typedef int (*rte_mtr_destroy_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_error *error); +/**< @internal MTR object destroy */ + +typedef int (*rte_mtr_meter_enable_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_error *error); +/**< @internal MTR object meter enable */ + +typedef int (*rte_mtr_meter_disable_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_error *error); +/**< @internal MTR object meter disable */ + +typedef int (*rte_mtr_meter_profile_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error); +/**< @internal MTR object meter profile update */ + +typedef int (*rte_mtr_meter_dscp_table_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + enum rte_mtr_color *dscp_table, + struct rte_mtr_error *error); +/**< @internal MTR object meter DSCP table update */ + +typedef int (*rte_mtr_policer_actions_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + uint32_t action_mask, + enum rte_mtr_policer_action *actions, + struct rte_mtr_error *error); +/**< @internal MTR object policer action update*/ + +typedef int (*rte_mtr_stats_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + uint64_t stats_mask, + struct rte_mtr_error *error); +/**< @internal MTR object enabled stats update */ + +typedef int (*rte_mtr_stats_read_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_mtr_error *error); +/**< @internal MTR object stats read */ + +struct rte_mtr_ops { + /** MTR capabilities get */ + rte_mtr_capabilities_get_t capabilities_get; + + /** MTR meter profile add */ + rte_mtr_meter_profile_add_t meter_profile_add; + + /** MTR meter profile delete */ + rte_mtr_meter_profile_delete_t meter_profile_delete; + + /** MTR object create */ + rte_mtr_create_t create; + + /** MTR object destroy */ + rte_mtr_destroy_t destroy; + + /** MTR object meter enable */ + rte_mtr_meter_enable_t meter_enable; + + /** MTR object meter disable */ + rte_mtr_meter_disable_t meter_disable; + + /** MTR object meter profile update */ + rte_mtr_meter_profile_update_t meter_profile_update; + + /** MTR object meter DSCP table update */ + rte_mtr_meter_dscp_table_update_t meter_dscp_table_update; + + /** MTR object policer action update */ + rte_mtr_policer_actions_update_t policer_actions_update; + + /** MTR object enabled stats update */ + rte_mtr_stats_update_t stats_update; + + /** MTR object stats read */ + rte_mtr_stats_read_t stats_read; +}; + +/** + * Initialize generic error structure. + * + * This function also sets rte_errno to a given value. + * + * @param[out] error + * Pointer to error structure (may be NULL). + * @param[in] code + * Related error code (rte_errno). + * @param[in] type + * Cause field and error type. + * @param[in] cause + * Object responsible for the error. + * @param[in] message + * Human-readable error message. + * + * @return + * Error code. + */ +static inline int +rte_mtr_error_set(struct rte_mtr_error *error, + int code, + enum rte_mtr_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_mtr_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return code; +} + +/** + * Get generic traffic metering and policing operations structure from a port + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] error + * Error details + * + * @return + * The traffic metering and policing operations structure associated with + * port_id on success, NULL otherwise. + */ +const struct rte_mtr_ops * +rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_MTR_DRIVER_H__ */ diff --git a/lib/librte_ethdev/rte_tm.c b/lib/librte_ethdev/rte_tm.c new file mode 100644 index 00000000..9709454f --- /dev/null +++ b/lib/librte_ethdev/rte_tm.c @@ -0,0 +1,409 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include + +#include +#include "rte_ethdev.h" +#include "rte_tm_driver.h" +#include "rte_tm.h" + +/* Get generic traffic manager operations structure from a port. */ +const struct rte_tm_ops * +rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_tm_ops *ops; + + if (!rte_eth_dev_is_valid_port(port_id)) { + rte_tm_error_set(error, + ENODEV, + RTE_TM_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENODEV)); + return NULL; + } + + if ((dev->dev_ops->tm_ops_get == NULL) || + (dev->dev_ops->tm_ops_get(dev, &ops) != 0) || + (ops == NULL)) { + rte_tm_error_set(error, + ENOSYS, + RTE_TM_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENOSYS)); + return NULL; + } + + return ops; +} + +#define RTE_TM_FUNC(port_id, func) \ +({ \ + const struct rte_tm_ops *ops = \ + rte_tm_ops_get(port_id, error); \ + if (ops == NULL) \ + return -rte_errno; \ + \ + if (ops->func == NULL) \ + return -rte_tm_error_set(error, \ + ENOSYS, \ + RTE_TM_ERROR_TYPE_UNSPECIFIED, \ + NULL, \ + rte_strerror(ENOSYS)); \ + \ + ops->func; \ +}) + +/* Get number of leaf nodes */ +int +rte_tm_get_number_of_leaf_nodes(uint16_t port_id, + uint32_t *n_leaf_nodes, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_tm_ops *ops = + rte_tm_ops_get(port_id, error); + + if (ops == NULL) + return -rte_errno; + + if (n_leaf_nodes == NULL) { + rte_tm_error_set(error, + EINVAL, + RTE_TM_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(EINVAL)); + return -rte_errno; + } + + *n_leaf_nodes = dev->data->nb_tx_queues; + return 0; +} + +/* Check node type (leaf or non-leaf) */ +int +rte_tm_node_type_get(uint16_t port_id, + uint32_t node_id, + int *is_leaf, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_type_get)(dev, + node_id, is_leaf, error); +} + +/* Get capabilities */ +int rte_tm_capabilities_get(uint16_t port_id, + struct rte_tm_capabilities *cap, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, capabilities_get)(dev, + cap, error); +} + +/* Get level capabilities */ +int rte_tm_level_capabilities_get(uint16_t port_id, + uint32_t level_id, + struct rte_tm_level_capabilities *cap, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, level_capabilities_get)(dev, + level_id, cap, error); +} + +/* Get node capabilities */ +int rte_tm_node_capabilities_get(uint16_t port_id, + uint32_t node_id, + struct rte_tm_node_capabilities *cap, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_capabilities_get)(dev, + node_id, cap, error); +} + +/* Add WRED profile */ +int rte_tm_wred_profile_add(uint16_t port_id, + uint32_t wred_profile_id, + struct rte_tm_wred_params *profile, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, wred_profile_add)(dev, + wred_profile_id, profile, error); +} + +/* Delete WRED profile */ +int rte_tm_wred_profile_delete(uint16_t port_id, + uint32_t wred_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, wred_profile_delete)(dev, + wred_profile_id, error); +} + +/* Add/update shared WRED context */ +int rte_tm_shared_wred_context_add_update(uint16_t port_id, + uint32_t shared_wred_context_id, + uint32_t wred_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_wred_context_add_update)(dev, + shared_wred_context_id, wred_profile_id, error); +} + +/* Delete shared WRED context */ +int rte_tm_shared_wred_context_delete(uint16_t port_id, + uint32_t shared_wred_context_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_wred_context_delete)(dev, + shared_wred_context_id, error); +} + +/* Add shaper profile */ +int rte_tm_shaper_profile_add(uint16_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_shaper_params *profile, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shaper_profile_add)(dev, + shaper_profile_id, profile, error); +} + +/* Delete WRED profile */ +int rte_tm_shaper_profile_delete(uint16_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shaper_profile_delete)(dev, + shaper_profile_id, error); +} + +/* Add shared shaper */ +int rte_tm_shared_shaper_add_update(uint16_t port_id, + uint32_t shared_shaper_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_shaper_add_update)(dev, + shared_shaper_id, shaper_profile_id, error); +} + +/* Delete shared shaper */ +int rte_tm_shared_shaper_delete(uint16_t port_id, + uint32_t shared_shaper_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_shaper_delete)(dev, + shared_shaper_id, error); +} + +/* Add node to port traffic manager hierarchy */ +int rte_tm_node_add(uint16_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + uint32_t level_id, + struct rte_tm_node_params *params, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_add)(dev, + node_id, parent_node_id, priority, weight, level_id, + params, error); +} + +/* Delete node from traffic manager hierarchy */ +int rte_tm_node_delete(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_delete)(dev, + node_id, error); +} + +/* Suspend node */ +int rte_tm_node_suspend(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_suspend)(dev, + node_id, error); +} + +/* Resume node */ +int rte_tm_node_resume(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_resume)(dev, + node_id, error); +} + +/* Commit the initial port traffic manager hierarchy */ +int rte_tm_hierarchy_commit(uint16_t port_id, + int clear_on_fail, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, hierarchy_commit)(dev, + clear_on_fail, error); +} + +/* Update node parent */ +int rte_tm_node_parent_update(uint16_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_parent_update)(dev, + node_id, parent_node_id, priority, weight, error); +} + +/* Update node private shaper */ +int rte_tm_node_shaper_update(uint16_t port_id, + uint32_t node_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_shaper_update)(dev, + node_id, shaper_profile_id, error); +} + +/* Update node shared shapers */ +int rte_tm_node_shared_shaper_update(uint16_t port_id, + uint32_t node_id, + uint32_t shared_shaper_id, + int add, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_shared_shaper_update)(dev, + node_id, shared_shaper_id, add, error); +} + +/* Update node stats */ +int rte_tm_node_stats_update(uint16_t port_id, + uint32_t node_id, + uint64_t stats_mask, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_stats_update)(dev, + node_id, stats_mask, error); +} + +/* Update WFQ weight mode */ +int rte_tm_node_wfq_weight_mode_update(uint16_t port_id, + uint32_t node_id, + int *wfq_weight_mode, + uint32_t n_sp_priorities, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_wfq_weight_mode_update)(dev, + node_id, wfq_weight_mode, n_sp_priorities, error); +} + +/* Update node congestion management mode */ +int rte_tm_node_cman_update(uint16_t port_id, + uint32_t node_id, + enum rte_tm_cman_mode cman, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_cman_update)(dev, + node_id, cman, error); +} + +/* Update node private WRED context */ +int rte_tm_node_wred_context_update(uint16_t port_id, + uint32_t node_id, + uint32_t wred_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_wred_context_update)(dev, + node_id, wred_profile_id, error); +} + +/* Update node shared WRED context */ +int rte_tm_node_shared_wred_context_update(uint16_t port_id, + uint32_t node_id, + uint32_t shared_wred_context_id, + int add, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_shared_wred_context_update)(dev, + node_id, shared_wred_context_id, add, error); +} + +/* Read and/or clear stats counters for specific node */ +int rte_tm_node_stats_read(uint16_t port_id, + uint32_t node_id, + struct rte_tm_node_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_stats_read)(dev, + node_id, stats, stats_mask, clear, error); +} + +/* Packet marking - VLAN DEI */ +int rte_tm_mark_vlan_dei(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, mark_vlan_dei)(dev, + mark_green, mark_yellow, mark_red, error); +} + +/* Packet marking - IPv4/IPv6 ECN */ +int rte_tm_mark_ip_ecn(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, mark_ip_ecn)(dev, + mark_green, mark_yellow, mark_red, error); +} + +/* Packet marking - IPv4/IPv6 DSCP */ +int rte_tm_mark_ip_dscp(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, mark_ip_dscp)(dev, + mark_green, mark_yellow, mark_red, error); +} diff --git a/lib/librte_ethdev/rte_tm.h b/lib/librte_ethdev/rte_tm.h new file mode 100644 index 00000000..955f02ff --- /dev/null +++ b/lib/librte_ethdev/rte_tm.h @@ -0,0 +1,1965 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 Cavium. + * Copyright(c) 2017 NXP. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TM_H__ +#define __INCLUDE_RTE_TM_H__ + +/** + * @file + * RTE Generic Traffic Manager API + * + * This interface provides the ability to configure the traffic manager in a + * generic way. It includes features such as: hierarchical scheduling, + * traffic shaping, congestion management, packet marking, etc. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ + +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Ethernet framing overhead. + * + * Overhead fields per Ethernet frame: + * 1. Preamble: 7 bytes; + * 2. Start of Frame Delimiter (SFD): 1 byte; + * 3. Inter-Frame Gap (IFG): 12 bytes. + * + * One of the typical values for the *pkt_length_adjust* field of the shaper + * profile. + * + * @see struct rte_tm_shaper_params + */ +#define RTE_TM_ETH_FRAMING_OVERHEAD 20 + +/** + * Ethernet framing overhead including the Frame Check Sequence (FCS) field. + * Useful when FCS is generated and added at the end of the Ethernet frame on + * TX side without any SW intervention. + * + * One of the typical values for the pkt_length_adjust field of the shaper + * profile. + * + * @see struct rte_tm_shaper_params + */ +#define RTE_TM_ETH_FRAMING_OVERHEAD_FCS 24 + +/** + * Invalid WRED profile ID. + * + * @see struct rte_tm_node_params + * @see rte_tm_node_add() + * @see rte_tm_node_wred_context_update() + */ +#define RTE_TM_WRED_PROFILE_ID_NONE UINT32_MAX + +/** + *Invalid shaper profile ID. + * + * @see struct rte_tm_node_params + * @see rte_tm_node_add() + * @see rte_tm_node_shaper_update() + */ +#define RTE_TM_SHAPER_PROFILE_ID_NONE UINT32_MAX + +/** + * Node ID for the parent of the root node. + * + * @see rte_tm_node_add() + */ +#define RTE_TM_NODE_ID_NULL UINT32_MAX + +/** + * Node level ID used to disable level ID checking. + * + * @see rte_tm_node_add() + */ +#define RTE_TM_NODE_LEVEL_ID_ANY UINT32_MAX + +/** + * Color + */ +enum rte_tm_color { + RTE_TM_GREEN = 0, /**< Green */ + RTE_TM_YELLOW, /**< Yellow */ + RTE_TM_RED, /**< Red */ + RTE_TM_COLORS /**< Number of colors */ +}; + +/** + * Node statistics counter type + */ +enum rte_tm_stats_type { + /** Number of packets scheduled from current node. */ + RTE_TM_STATS_N_PKTS = 1 << 0, + + /** Number of bytes scheduled from current node. */ + RTE_TM_STATS_N_BYTES = 1 << 1, + + /** Number of green packets dropped by current leaf node. */ + RTE_TM_STATS_N_PKTS_GREEN_DROPPED = 1 << 2, + + /** Number of yellow packets dropped by current leaf node. */ + RTE_TM_STATS_N_PKTS_YELLOW_DROPPED = 1 << 3, + + /** Number of red packets dropped by current leaf node. */ + RTE_TM_STATS_N_PKTS_RED_DROPPED = 1 << 4, + + /** Number of green bytes dropped by current leaf node. */ + RTE_TM_STATS_N_BYTES_GREEN_DROPPED = 1 << 5, + + /** Number of yellow bytes dropped by current leaf node. */ + RTE_TM_STATS_N_BYTES_YELLOW_DROPPED = 1 << 6, + + /** Number of red bytes dropped by current leaf node. */ + RTE_TM_STATS_N_BYTES_RED_DROPPED = 1 << 7, + + /** Number of packets currently waiting in the packet queue of current + * leaf node. + */ + RTE_TM_STATS_N_PKTS_QUEUED = 1 << 8, + + /** Number of bytes currently waiting in the packet queue of current + * leaf node. + */ + RTE_TM_STATS_N_BYTES_QUEUED = 1 << 9, +}; + +/** + * Node statistics counters + */ +struct rte_tm_node_stats { + /** Number of packets scheduled from current node. */ + uint64_t n_pkts; + + /** Number of bytes scheduled from current node. */ + uint64_t n_bytes; + + /** Statistics counters for leaf nodes only. */ + struct { + /** Number of packets dropped by current leaf node per each + * color. + */ + uint64_t n_pkts_dropped[RTE_TM_COLORS]; + + /** Number of bytes dropped by current leaf node per each + * color. + */ + uint64_t n_bytes_dropped[RTE_TM_COLORS]; + + /** Number of packets currently waiting in the packet queue of + * current leaf node. + */ + uint64_t n_pkts_queued; + + /** Number of bytes currently waiting in the packet queue of + * current leaf node. + */ + uint64_t n_bytes_queued; + } leaf; +}; + +/** + * Traffic manager dynamic updates + */ +enum rte_tm_dynamic_update_type { + /** Dynamic parent node update. The new parent node is located on same + * hierarchy level as the former parent node. Consequently, the node + * whose parent is changed preserves its hierarchy level. + */ + RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL = 1 << 0, + + /** Dynamic parent node update. The new parent node is located on + * different hierarchy level than the former parent node. Consequently, + * the node whose parent is changed also changes its hierarchy level. + */ + RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL = 1 << 1, + + /** Dynamic node add/delete. */ + RTE_TM_UPDATE_NODE_ADD_DELETE = 1 << 2, + + /** Suspend/resume nodes. */ + RTE_TM_UPDATE_NODE_SUSPEND_RESUME = 1 << 3, + + /** Dynamic switch between byte-based and packet-based WFQ weights. */ + RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE = 1 << 4, + + /** Dynamic update on number of SP priorities. */ + RTE_TM_UPDATE_NODE_N_SP_PRIORITIES = 1 << 5, + + /** Dynamic update of congestion management mode for leaf nodes. */ + RTE_TM_UPDATE_NODE_CMAN = 1 << 6, + + /** Dynamic update of the set of enabled stats counter types. */ + RTE_TM_UPDATE_NODE_STATS = 1 << 7, +}; + +/** + * Traffic manager capabilities + */ +struct rte_tm_capabilities { + /** Maximum number of nodes. */ + uint32_t n_nodes_max; + + /** Maximum number of levels (i.e. number of nodes connecting the root + * node with any leaf node, including the root and the leaf). + */ + uint32_t n_levels_max; + + /** When non-zero, this flag indicates that all the non-leaf nodes + * (with the exception of the root node) have identical capability set. + */ + int non_leaf_nodes_identical; + + /** When non-zero, this flag indicates that all the leaf nodes have + * identical capability set. + */ + int leaf_nodes_identical; + + /** Maximum number of shapers, either private or shared. In case the + * implementation does not share any resources between private and + * shared shapers, it is typically equal to the sum of + * *shaper_private_n_max* and *shaper_shared_n_max*. The + * value of zero indicates that traffic shaping is not supported. + */ + uint32_t shaper_n_max; + + /** Maximum number of private shapers. Indicates the maximum number of + * nodes that can concurrently have their private shaper enabled. The + * value of zero indicates that private shapers are not supported. + */ + uint32_t shaper_private_n_max; + + /** Maximum number of private shapers that support dual rate shaping. + * Indicates the maximum number of nodes that can concurrently have + * their private shaper enabled with dual rate support. Only valid when + * private shapers are supported. The value of zero indicates that dual + * rate shaping is not available for private shapers. The maximum value + * is *shaper_private_n_max*. + */ + int shaper_private_dual_rate_n_max; + + /** Minimum committed/peak rate (bytes per second) for any private + * shaper. Valid only when private shapers are supported. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for any private + * shaper. Valid only when private shapers are supported. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers. The value of zero indicates that + * shared shapers are not supported. + */ + uint32_t shaper_shared_n_max; + + /** Maximum number of nodes that can share the same shared shaper. + * Only valid when shared shapers are supported. + */ + uint32_t shaper_shared_n_nodes_per_shaper_max; + + /** Maximum number of shared shapers a node can be part of. This + * parameter indicates that there is at least one node that can be + * configured with this many shared shapers, which might not be true for + * all the nodes. Only valid when shared shapers are supported, in which + * case it ranges from 1 to *shaper_shared_n_max*. + */ + uint32_t shaper_shared_n_shapers_per_node_max; + + /** Maximum number of shared shapers that can be configured with dual + * rate shaping. The value of zero indicates that dual rate shaping + * support is not available for shared shapers. + */ + uint32_t shaper_shared_dual_rate_n_max; + + /** Minimum committed/peak rate (bytes per second) for any shared + * shaper. Only valid when shared shapers are supported. + */ + uint64_t shaper_shared_rate_min; + + /** Maximum committed/peak rate (bytes per second) for any shared + * shaper. Only valid when shared shapers are supported. + */ + uint64_t shaper_shared_rate_max; + + /** Minimum value allowed for packet length adjustment for any private + * or shared shaper. + */ + int shaper_pkt_length_adjust_min; + + /** Maximum value allowed for packet length adjustment for any private + * or shared shaper. + */ + int shaper_pkt_length_adjust_max; + + /** Maximum number of children nodes. This parameter indicates that + * there is at least one non-leaf node that can be configured with this + * many children nodes, which might not be true for all the non-leaf + * nodes. + */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. This parameter + * indicates that there is at least one non-leaf node that can be + * configured with this many priority levels for managing its children + * nodes, which might not be true for all the non-leaf nodes. The value + * of zero is invalid. The value of 1 indicates that only priority 0 is + * supported, which essentially means that Strict Priority (SP) + * algorithm is not supported. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the same priority at + * any given time, i.e. maximum size of the WFQ sibling node group. This + * parameter indicates there is at least one non-leaf node that meets + * this condition, which might not be true for all the non-leaf nodes. + * The value of zero is invalid. The value of 1 indicates that WFQ + * algorithm is not supported. The maximum value is + * *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have more than one child + * node at any given time, i.e. maximum number of WFQ sibling node + * groups that have two or more members. This parameter indicates there + * is at least one non-leaf node that meets this condition, which might + * not be true for all the non-leaf nodes. The value of zero states that + * WFQ algorithm is not supported. The value of 1 indicates that + * (*sched_sp_n_priorities_max* - 1) priority levels have at most one + * child node, so there can be only one priority level with two or + * more sibling nodes making up a WFQ group. The maximum value is: + * min(floor(*sched_n_children_max* / 2), *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that all sibling nodes + * with same priority have the same WFQ weight, so WFQ is reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + + /** WRED packet mode support. When non-zero, this parameter indicates + * that there is atleast one leaf node that supports the WRED packet + * mode, which might not be true for all the leaf nodes. In packet + * mode, the WRED thresholds specify the queue length in packets, as + * opposed to bytes. + */ + int cman_wred_packet_mode_supported; + + /** WRED byte mode support. When non-zero, this parameter indicates that + * there is atleast one leaf node that supports the WRED byte mode, + * which might not be true for all the leaf nodes. In byte mode, the + * WRED thresholds specify the queue length in bytes, as opposed to + * packets. + */ + int cman_wred_byte_mode_supported; + + /** Head drop algorithm support. When non-zero, this parameter + * indicates that there is at least one leaf node that supports the head + * drop algorithm, which might not be true for all the leaf nodes. + */ + int cman_head_drop_supported; + + /** Maximum number of WRED contexts, either private or shared. In case + * the implementation does not share any resources between private and + * shared WRED contexts, it is typically equal to the sum of + * *cman_wred_context_private_n_max* and + * *cman_wred_context_shared_n_max*. The value of zero indicates that + * WRED is not supported. + */ + uint32_t cman_wred_context_n_max; + + /** Maximum number of private WRED contexts. Indicates the maximum + * number of leaf nodes that can concurrently have their private WRED + * context enabled. The value of zero indicates that private WRED + * contexts are not supported. + */ + uint32_t cman_wred_context_private_n_max; + + /** Maximum number of shared WRED contexts. The value of zero + * indicates that shared WRED contexts are not supported. + */ + uint32_t cman_wred_context_shared_n_max; + + /** Maximum number of leaf nodes that can share the same WRED context. + * Only valid when shared WRED contexts are supported. + */ + uint32_t cman_wred_context_shared_n_nodes_per_context_max; + + /** Maximum number of shared WRED contexts a leaf node can be part of. + * This parameter indicates that there is at least one leaf node that + * can be configured with this many shared WRED contexts, which might + * not be true for all the leaf nodes. Only valid when shared WRED + * contexts are supported, in which case it ranges from 1 to + * *cman_wred_context_shared_n_max*. + */ + uint32_t cman_wred_context_shared_n_contexts_per_node_max; + + /** Support for VLAN DEI packet marking (per color). */ + int mark_vlan_dei_supported[RTE_TM_COLORS]; + + /** Support for IPv4/IPv6 ECN marking of TCP packets (per color). */ + int mark_ip_ecn_tcp_supported[RTE_TM_COLORS]; + + /** Support for IPv4/IPv6 ECN marking of SCTP packets (per color). */ + int mark_ip_ecn_sctp_supported[RTE_TM_COLORS]; + + /** Support for IPv4/IPv6 DSCP packet marking (per color). */ + int mark_ip_dscp_supported[RTE_TM_COLORS]; + + /** Set of supported dynamic update operations. + * @see enum rte_tm_dynamic_update_type + */ + uint64_t dynamic_update_mask; + + /** Set of supported statistics counter types. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Traffic manager level capabilities + */ +struct rte_tm_level_capabilities { + /** Maximum number of nodes for the current hierarchy level. */ + uint32_t n_nodes_max; + + /** Maximum number of non-leaf nodes for the current hierarchy level. + * The value of 0 indicates that current level only supports leaf + * nodes. The maximum value is *n_nodes_max*. + */ + uint32_t n_nodes_nonleaf_max; + + /** Maximum number of leaf nodes for the current hierarchy level. The + * value of 0 indicates that current level only supports non-leaf + * nodes. The maximum value is *n_nodes_max*. + */ + uint32_t n_nodes_leaf_max; + + /** When non-zero, this flag indicates that all the non-leaf nodes on + * this level have identical capability set. Valid only when + * *n_nodes_nonleaf_max* is non-zero. + */ + int non_leaf_nodes_identical; + + /** When non-zero, this flag indicates that all the leaf nodes on this + * level have identical capability set. Valid only when + * *n_nodes_leaf_max* is non-zero. + */ + int leaf_nodes_identical; + + RTE_STD_C11 + union { + /** Items valid only for the non-leaf nodes on this level. */ + struct { + /** Private shaper support. When non-zero, it indicates + * there is at least one non-leaf node on this level + * with private shaper support, which may not be the + * case for all the non-leaf nodes on this level. + */ + int shaper_private_supported; + + /** Dual rate support for private shaper. Valid only + * when private shaper is supported for the non-leaf + * nodes on the current level. When non-zero, it + * indicates there is at least one non-leaf node on this + * level with dual rate private shaper support, which + * may not be the case for all the non-leaf nodes on + * this level. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for + * private shapers of the non-leaf nodes of this level. + * Valid only when private shaper is supported on this + * level. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for + * private shapers of the non-leaf nodes on this level. + * Valid only when private shaper is supported on this + * level. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers that any non-leaf + * node on this level can be part of. The value of zero + * indicates that shared shapers are not supported by + * the non-leaf nodes on this level. When non-zero, it + * indicates there is at least one non-leaf node on this + * level that meets this condition, which may not be the + * case for all the non-leaf nodes on this level. + */ + uint32_t shaper_shared_n_max; + + /** Maximum number of children nodes. This parameter + * indicates that there is at least one non-leaf node on + * this level that can be configured with this many + * children nodes, which might not be true for all the + * non-leaf nodes on this level. + */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. This + * parameter indicates that there is at least one + * non-leaf node on this level that can be configured + * with this many priority levels for managing its + * children nodes, which might not be true for all the + * non-leaf nodes on this level. The value of zero is + * invalid. The value of 1 indicates that only priority + * 0 is supported, which essentially means that Strict + * Priority (SP) algorithm is not supported on this + * level. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the + * same priority at any given time, i.e. maximum size of + * the WFQ sibling node group. This parameter indicates + * there is at least one non-leaf node on this level + * that meets this condition, which may not be true for + * all the non-leaf nodes on this level. The value of + * zero is invalid. The value of 1 indicates that WFQ + * algorithm is not supported on this level. The maximum + * value is *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have + * more than one child node at any given time, i.e. + * maximum number of WFQ sibling node groups that + * have two or more members. This parameter indicates + * there is at least one non-leaf node on this level + * that meets this condition, which might not be true + * for all the non-leaf nodes. The value of zero states + * that WFQ algorithm is not supported on this level. + * The value of 1 indicates that + * (*sched_sp_n_priorities_max* - 1) priority levels on + * this level have at most one child node, so there can + * be only one priority level with two or more sibling + * nodes making up a WFQ group on this level. The + * maximum value is: + * min(floor(*sched_n_children_max* / 2), + * *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that + * all sibling nodes on this level with same priority + * have the same WFQ weight, so on this level WFQ is + * reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + + /** Mask of statistics counter types supported by the + * non-leaf nodes on this level. Every supported + * statistics counter type is supported by at least one + * non-leaf node on this level, which may not be true + * for all the non-leaf nodes on this level. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; + } nonleaf; + + /** Items valid only for the leaf nodes on this level. */ + struct { + /** Private shaper support. When non-zero, it indicates + * there is at least one leaf node on this level with + * private shaper support, which may not be the case for + * all the leaf nodes on this level. + */ + int shaper_private_supported; + + /** Dual rate support for private shaper. Valid only + * when private shaper is supported for the leaf nodes + * on this level. When non-zero, it indicates there is + * at least one leaf node on this level with dual rate + * private shaper support, which may not be the case for + * all the leaf nodes on this level. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for + * private shapers of the leaf nodes of this level. + * Valid only when private shaper is supported for the + * leaf nodes on this level. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for + * private shapers of the leaf nodes on this level. + * Valid only when private shaper is supported for the + * leaf nodes on this level. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers that any leaf node + * on this level can be part of. The value of zero + * indicates that shared shapers are not supported by + * the leaf nodes on this level. When non-zero, it + * indicates there is at least one leaf node on this + * level that meets this condition, which may not be the + * case for all the leaf nodes on this level. + */ + uint32_t shaper_shared_n_max; + + /** WRED packet mode support. When non-zero, this + * parameter indicates that there is atleast one leaf + * node on this level that supports the WRED packet + * mode, which might not be true for all the leaf + * nodes. In packet mode, the WRED thresholds specify + * the queue length in packets, as opposed to bytes. + */ + int cman_wred_packet_mode_supported; + + /** WRED byte mode support. When non-zero, this + * parameter indicates that there is atleast one leaf + * node on this level that supports the WRED byte mode, + * which might not be true for all the leaf nodes. In + * byte mode, the WRED thresholds specify the queue + * length in bytes, as opposed to packets. + */ + int cman_wred_byte_mode_supported; + + /** Head drop algorithm support. When non-zero, this + * parameter indicates that there is at least one leaf + * node on this level that supports the head drop + * algorithm, which might not be true for all the leaf + * nodes on this level. + */ + int cman_head_drop_supported; + + /** Private WRED context support. When non-zero, it + * indicates there is at least one node on this level + * with private WRED context support, which may not be + * true for all the leaf nodes on this level. + */ + int cman_wred_context_private_supported; + + /** Maximum number of shared WRED contexts that any + * leaf node on this level can be part of. The value of + * zero indicates that shared WRED contexts are not + * supported by the leaf nodes on this level. When + * non-zero, it indicates there is at least one leaf + * node on this level that meets this condition, which + * may not be the case for all the leaf nodes on this + * level. + */ + uint32_t cman_wred_context_shared_n_max; + + /** Mask of statistics counter types supported by the + * leaf nodes on this level. Every supported statistics + * counter type is supported by at least one leaf node + * on this level, which may not be true for all the leaf + * nodes on this level. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; + } leaf; + }; +}; + +/** + * Traffic manager node capabilities + */ +struct rte_tm_node_capabilities { + /** Private shaper support for the current node. */ + int shaper_private_supported; + + /** Dual rate shaping support for private shaper of current node. + * Valid only when private shaper is supported by the current node. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for private + * shaper of current node. Valid only when private shaper is supported + * by the current node. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for private + * shaper of current node. Valid only when private shaper is supported + * by the current node. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers the current node can be part of. + * The value of zero indicates that shared shapers are not supported by + * the current node. + */ + uint32_t shaper_shared_n_max; + + RTE_STD_C11 + union { + /** Items valid only for non-leaf nodes. */ + struct { + /** Maximum number of children nodes. */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. The + * value of zero is invalid. The value of 1 indicates + * that only priority 0 is supported, which essentially + * means that Strict Priority (SP) algorithm is not + * supported. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the + * same priority at any given time, i.e. maximum size + * of the WFQ sibling node group. The value of zero + * is invalid. The value of 1 indicates that WFQ + * algorithm is not supported. The maximum value is + * *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have + * more than one child node at any given time, i.e. + * maximum number of WFQ sibling node groups that have + * two or more members. The value of zero states that + * WFQ algorithm is not supported. The value of 1 + * indicates that (*sched_sp_n_priorities_max* - 1) + * priority levels have at most one child node, so there + * can be only one priority level with two or more + * sibling nodes making up a WFQ group. The maximum + * value is: min(floor(*sched_n_children_max* / 2), + * *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that + * all sibling nodes with same priority have the same + * WFQ weight, so WFQ is reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + } nonleaf; + + /** Items valid only for leaf nodes. */ + struct { + /** WRED packet mode support for current node. */ + int cman_wred_packet_mode_supported; + + /** WRED byte mode support for current node. */ + int cman_wred_byte_mode_supported; + + /** Head drop algorithm support for current node. */ + int cman_head_drop_supported; + + /** Private WRED context support for current node. */ + int cman_wred_context_private_supported; + + /** Maximum number of shared WRED contexts the current + * node can be part of. The value of zero indicates that + * shared WRED contexts are not supported by the current + * node. + */ + uint32_t cman_wred_context_shared_n_max; + } leaf; + }; + + /** Mask of statistics counter types supported by the current node. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Congestion management (CMAN) mode + * + * This is used for controlling the admission of packets into a packet queue or + * group of packet queues on congestion. On request of writing a new packet + * into the current queue while the queue is full, the *tail drop* algorithm + * drops the new packet while leaving the queue unmodified, as opposed to *head + * drop* algorithm, which drops the packet at the head of the queue (the oldest + * packet waiting in the queue) and admits the new packet at the tail of the + * queue. + * + * The *Random Early Detection (RED)* algorithm works by proactively dropping + * more and more input packets as the queue occupancy builds up. When the queue + * is full or almost full, RED effectively works as *tail drop*. The *Weighted + * RED* algorithm uses a separate set of RED thresholds for each packet color. + */ +enum rte_tm_cman_mode { + RTE_TM_CMAN_TAIL_DROP = 0, /**< Tail drop */ + RTE_TM_CMAN_HEAD_DROP, /**< Head drop */ + RTE_TM_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */ +}; + +/** + * Random Early Detection (RED) profile + */ +struct rte_tm_red_params { + /** Minimum queue threshold */ + uint32_t min_th; + + /** Maximum queue threshold */ + uint32_t max_th; + + /** Inverse of packet marking probability maximum value (maxp), i.e. + * maxp_inv = 1 / maxp + */ + uint16_t maxp_inv; + + /** Negated log2 of queue weight (wq), i.e. wq = 1 / (2 ^ wq_log2) */ + uint16_t wq_log2; +}; + +/** + * Weighted RED (WRED) profile + * + * Multiple WRED contexts can share the same WRED profile. Each leaf node with + * WRED enabled as its congestion management mode has zero or one private WRED + * context (only one leaf node using it) and/or zero, one or several shared + * WRED contexts (multiple leaf nodes use the same WRED context). A private + * WRED context is used to perform congestion management for a single leaf + * node, while a shared WRED context is used to perform congestion management + * for a group of leaf nodes. + * + * @see struct rte_tm_capabilities::cman_wred_packet_mode_supported + * @see struct rte_tm_capabilities::cman_wred_byte_mode_supported + */ +struct rte_tm_wred_params { + /** One set of RED parameters per packet color */ + struct rte_tm_red_params red_params[RTE_TM_COLORS]; + + /** When non-zero, the *min_th* and *max_th* thresholds are specified + * in packets (WRED packet mode). When zero, the *min_th* and *max_th* + * thresholds are specified in bytes (WRED byte mode) + */ + int packet_mode; +}; + +/** + * Token bucket + */ +struct rte_tm_token_bucket { + /** Token bucket rate (bytes per second) */ + uint64_t rate; + + /** Token bucket size (bytes), a.k.a. max burst size */ + uint64_t size; +}; + +/** + * Shaper (rate limiter) profile + * + * Multiple shaper instances can share the same shaper profile. Each node has + * zero or one private shaper (only one node using it) and/or zero, one or + * several shared shapers (multiple nodes use the same shaper instance). + * A private shaper is used to perform traffic shaping for a single node, while + * a shared shaper is used to perform traffic shaping for a group of nodes. + * + * Single rate shapers use a single token bucket. A single rate shaper can be + * configured by setting the rate of the committed bucket to zero, which + * effectively disables this bucket. The peak bucket is used to limit the rate + * and the burst size for the current shaper. + * + * Dual rate shapers use both the committed and the peak token buckets. The + * rate of the peak bucket has to be bigger than zero, as well as greater than + * or equal to the rate of the committed bucket. + */ +struct rte_tm_shaper_params { + /** Committed token bucket */ + struct rte_tm_token_bucket committed; + + /** Peak token bucket */ + struct rte_tm_token_bucket peak; + + /** Signed value to be added to the length of each packet for the + * purpose of shaping. Can be used to correct the packet length with + * the framing overhead bytes that are also consumed on the wire (e.g. + * RTE_TM_ETH_FRAMING_OVERHEAD_FCS). + */ + int32_t pkt_length_adjust; +}; + +/** + * Node parameters + * + * Each non-leaf node has multiple inputs (its children nodes) and single output + * (which is input to its parent node). It arbitrates its inputs using Strict + * Priority (SP) and Weighted Fair Queuing (WFQ) algorithms to schedule input + * packets to its output while observing its shaping (rate limiting) + * constraints. + * + * Algorithms such as Weighted Round Robin (WRR), Byte-level WRR, Deficit WRR + * (DWRR), etc. are considered approximations of the WFQ ideal and are + * assimilated to WFQ, although an associated implementation-dependent trade-off + * on accuracy, performance and resource usage might exist. + * + * Children nodes with different priorities are scheduled using the SP algorithm + * based on their priority, with zero (0) as the highest priority. Children with + * the same priority are scheduled using the WFQ algorithm according to their + * weights. The WFQ weight of a given child node is relative to the sum of the + * weights of all its sibling nodes that have the same priority, with one (1) as + * the lowest weight. For each SP priority, the WFQ weight mode can be set as + * either byte-based or packet-based. + * + * Each leaf node sits on top of a TX queue of the current Ethernet port. Hence, + * the leaf nodes are predefined, with their node IDs set to 0 .. (N-1), where N + * is the number of TX queues configured for the current Ethernet port. The + * non-leaf nodes have their IDs generated by the application. + */ +struct rte_tm_node_params { + /** Shaper profile for the private shaper. The absence of the private + * shaper for the current node is indicated by setting this parameter + * to RTE_TM_SHAPER_PROFILE_ID_NONE. + */ + uint32_t shaper_profile_id; + + /** User allocated array of valid shared shaper IDs. */ + uint32_t *shared_shaper_id; + + /** Number of shared shaper IDs in the *shared_shaper_id* array. */ + uint32_t n_shared_shapers; + + RTE_STD_C11 + union { + /** Parameters only valid for non-leaf nodes. */ + struct { + /** WFQ weight mode for each SP priority. When NULL, it + * indicates that WFQ is to be used for all priorities. + * When non-NULL, it points to a pre-allocated array of + * *n_sp_priorities* values, with non-zero value for + * byte-mode and zero for packet-mode. + */ + int *wfq_weight_mode; + + /** Number of SP priorities. */ + uint32_t n_sp_priorities; + } nonleaf; + + /** Parameters only valid for leaf nodes. */ + struct { + /** Congestion management mode */ + enum rte_tm_cman_mode cman; + + /** WRED parameters (only valid when *cman* is set to + * WRED). + */ + struct { + /** WRED profile for private WRED context. The + * absence of a private WRED context for the + * current leaf node is indicated by value + * RTE_TM_WRED_PROFILE_ID_NONE. + */ + uint32_t wred_profile_id; + + /** User allocated array of shared WRED context + * IDs. When set to NULL, it indicates that the + * current leaf node should not currently be + * part of any shared WRED contexts. + */ + uint32_t *shared_wred_context_id; + + /** Number of elements in the + * *shared_wred_context_id* array. Only valid + * when *shared_wred_context_id* is non-NULL, + * in which case it should be non-zero. + */ + uint32_t n_shared_wred_contexts; + } wred; + } leaf; + }; + + /** Mask of statistics counter types to be enabled for this node. This + * needs to be a subset of the statistics counter types available for + * the current node. Any statistics counter type not included in this + * set is to be disabled for the current node. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_tm_error::cause. + */ +enum rte_tm_error_type { + RTE_TM_ERROR_TYPE_NONE, /**< No error. */ + RTE_TM_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_TM_ERROR_TYPE_CAPABILITIES, + RTE_TM_ERROR_TYPE_LEVEL_ID, + RTE_TM_ERROR_TYPE_WRED_PROFILE, + RTE_TM_ERROR_TYPE_WRED_PROFILE_GREEN, + RTE_TM_ERROR_TYPE_WRED_PROFILE_YELLOW, + RTE_TM_ERROR_TYPE_WRED_PROFILE_RED, + RTE_TM_ERROR_TYPE_WRED_PROFILE_ID, + RTE_TM_ERROR_TYPE_SHARED_WRED_CONTEXT_ID, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_RATE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_RATE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID, + RTE_TM_ERROR_TYPE_SHARED_SHAPER_ID, + RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID, + RTE_TM_ERROR_TYPE_NODE_PRIORITY, + RTE_TM_ERROR_TYPE_NODE_WEIGHT, + RTE_TM_ERROR_TYPE_NODE_PARAMS, + RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS, + RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE, + RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES, + RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN, + RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS, + RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS, + RTE_TM_ERROR_TYPE_NODE_ID, +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_tm_error { + enum rte_tm_error_type type; /**< Cause field and error type. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * Traffic manager get number of leaf nodes + * + * Each leaf node sits on on top of a TX queue of the current Ethernet port. + * Therefore, the set of leaf nodes is predefined, their number is always equal + * to N (where N is the number of TX queues configured for the current port) + * and their IDs are 0 .. (N-1). + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] n_leaf_nodes + * Number of leaf nodes for the current port. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_get_number_of_leaf_nodes(uint16_t port_id, + uint32_t *n_leaf_nodes, + struct rte_tm_error *error); + +/** + * Traffic manager node ID validate and type (i.e. leaf or non-leaf) get + * + * The leaf nodes have predefined IDs in the range of 0 .. (N-1), where N is + * the number of TX queues of the current Ethernet port. The non-leaf nodes + * have their IDs generated by the application outside of the above range, + * which is reserved for leaf nodes. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID value. Needs to be valid. + * @param[out] is_leaf + * Set to non-zero value when node is leaf and to zero otherwise (non-leaf). + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_node_type_get(uint16_t port_id, + uint32_t node_id, + int *is_leaf, + struct rte_tm_error *error); + +/** + * Traffic manager capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] cap + * Traffic manager capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_capabilities_get(uint16_t port_id, + struct rte_tm_capabilities *cap, + struct rte_tm_error *error); + +/** + * Traffic manager level capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] level_id + * The hierarchy level identifier. The value of 0 identifies the level of the + * root node. + * @param[out] cap + * Traffic manager level capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_level_capabilities_get(uint16_t port_id, + uint32_t level_id, + struct rte_tm_level_capabilities *cap, + struct rte_tm_error *error); + +/** + * Traffic manager node capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] cap + * Traffic manager node capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_node_capabilities_get(uint16_t port_id, + uint32_t node_id, + struct rte_tm_node_capabilities *cap, + struct rte_tm_error *error); + +/** + * Traffic manager WRED profile add + * + * Create a new WRED profile with ID set to *wred_profile_id*. The new profile + * is used to create one or several WRED contexts. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] wred_profile_id + * WRED profile ID for the new profile. Needs to be unused. + * @param[in] profile + * WRED profile parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_n_max + */ +int +rte_tm_wred_profile_add(uint16_t port_id, + uint32_t wred_profile_id, + struct rte_tm_wred_params *profile, + struct rte_tm_error *error); + +/** + * Traffic manager WRED profile delete + * + * Delete an existing WRED profile. This operation fails when there is + * currently at least one user (i.e. WRED context) of this WRED profile. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] wred_profile_id + * WRED profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_n_max + */ +int +rte_tm_wred_profile_delete(uint16_t port_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared WRED context add or update + * + * When *shared_wred_context_id* is invalid, a new WRED context with this ID is + * created by using the WRED profile identified by *wred_profile_id*. + * + * When *shared_wred_context_id* is valid, this WRED context is no longer using + * the profile previously assigned to it and is updated to use the profile + * identified by *wred_profile_id*. + * + * A valid shared WRED context can be assigned to several hierarchy leaf nodes + * configured to use WRED as the congestion management mode. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_wred_context_id + * Shared WRED context ID + * @param[in] wred_profile_id + * WRED profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max + */ +int +rte_tm_shared_wred_context_add_update(uint16_t port_id, + uint32_t shared_wred_context_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared WRED context delete + * + * Delete an existing shared WRED context. This operation fails when there is + * currently at least one user (i.e. hierarchy leaf node) of this shared WRED + * context. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_wred_context_id + * Shared WRED context ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max + */ +int +rte_tm_shared_wred_context_delete(uint16_t port_id, + uint32_t shared_wred_context_id, + struct rte_tm_error *error); + +/** + * Traffic manager shaper profile add + * + * Create a new shaper profile with ID set to *shaper_profile_id*. The new + * shaper profile is used to create one or several shapers. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shaper_profile_id + * Shaper profile ID for the new profile. Needs to be unused. + * @param[in] profile + * Shaper profile parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_n_max + */ +int +rte_tm_shaper_profile_add(uint16_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_shaper_params *profile, + struct rte_tm_error *error); + +/** + * Traffic manager shaper profile delete + * + * Delete an existing shaper profile. This operation fails when there is + * currently at least one user (i.e. shaper) of this shaper profile. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shaper_profile_id + * Shaper profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_n_max + */ +int +rte_tm_shaper_profile_delete(uint16_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared shaper add or update + * + * When *shared_shaper_id* is not a valid shared shaper ID, a new shared shaper + * with this ID is created using the shaper profile identified by + * *shaper_profile_id*. + * + * When *shared_shaper_id* is a valid shared shaper ID, this shared shaper is + * no longer using the shaper profile previously assigned to it and is updated + * to use the shaper profile identified by *shaper_profile_id*. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_shaper_id + * Shared shaper ID + * @param[in] shaper_profile_id + * Shaper profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_shared_n_max + */ +int +rte_tm_shared_shaper_add_update(uint16_t port_id, + uint32_t shared_shaper_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared shaper delete + * + * Delete an existing shared shaper. This operation fails when there is + * currently at least one user (i.e. hierarchy node) of this shared shaper. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_shaper_id + * Shared shaper ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_shared_n_max + */ +int +rte_tm_shared_shaper_delete(uint16_t port_id, + uint32_t shared_shaper_id, + struct rte_tm_error *error); + +/** + * Traffic manager node add + * + * Create new node and connect it as child of an existing node. The new node is + * further identified by *node_id*, which needs to be unused by any of the + * existing nodes. The parent node is identified by *parent_node_id*, which + * needs to be the valid ID of an existing non-leaf node. The parent node is + * going to use the provided SP *priority* and WFQ *weight* to schedule its new + * child node. + * + * This function has to be called for both leaf and non-leaf nodes. In the case + * of leaf nodes (i.e. *node_id* is within the range of 0 .. (N-1), with N as + * the number of configured TX queues of the current port), the leaf node is + * configured rather than created (as the set of leaf nodes is predefined) and + * it is also connected as child of an existing node. + * + * The first node that is added becomes the root node and all the nodes that + * are subsequently added have to be added as descendants of the root node. The + * parent of the root node has to be specified as RTE_TM_NODE_ID_NULL and there + * can only be one node with this parent ID (i.e. the root node). Further + * restrictions for root node: needs to be non-leaf, its private shaper profile + * needs to be valid and single rate, cannot use any shared shapers. + * + * When called before rte_tm_hierarchy_commit() invocation, this function is + * typically used to define the initial start-up hierarchy for the port. + * Provided that dynamic hierarchy updates are supported by the current port (as + * advertised in the port capability set), this function can be also called + * after the rte_tm_hierarchy_commit() invocation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be unused by any of the existing nodes. + * @param[in] parent_node_id + * Parent node ID. Needs to be the valid. + * @param[in] priority + * Node priority. The highest node priority is zero. Used by the SP algorithm + * running on the parent of the current node for scheduling this child node. + * @param[in] weight + * Node weight. The node weight is relative to the weight sum of all siblings + * that have the same priority. The lowest weight is one. Used by the WFQ + * algorithm running on the parent of the current node for scheduling this + * child node. + * @param[in] level_id + * Level ID that should be met by this node. The hierarchy level of the + * current node is already fully specified through its parent node (i.e. the + * level of this node is equal to the level of its parent node plus one), + * therefore the reason for providing this parameter is to enable the + * application to perform step-by-step checking of the node level during + * successive invocations of this function. When not desired, this check can + * be disabled by assigning value RTE_TM_NODE_LEVEL_ID_ANY to this parameter. + * @param[in] params + * Node parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_hierarchy_commit() + * @see RTE_TM_UPDATE_NODE_ADD_DELETE + * @see RTE_TM_NODE_LEVEL_ID_ANY + * @see struct rte_tm_capabilities + */ +int +rte_tm_node_add(uint16_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + uint32_t level_id, + struct rte_tm_node_params *params, + struct rte_tm_error *error); + +/** + * Traffic manager node delete + * + * Delete an existing node. This operation fails when this node currently has + * at least one user (i.e. child node). + * + * When called before rte_tm_hierarchy_commit() invocation, this function is + * typically used to define the initial start-up hierarchy for the port. + * Provided that dynamic hierarchy updates are supported by the current port (as + * advertised in the port capability set), this function can be also called + * after the rte_tm_hierarchy_commit() invocation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_ADD_DELETE + */ +int +rte_tm_node_delete(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error); + +/** + * Traffic manager node suspend + * + * Suspend an existing node. While the node is in suspended state, no packet is + * scheduled from this node and its descendants. The node exits the suspended + * state through the node resume operation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_node_resume() + * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME + */ +int +rte_tm_node_suspend(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error); + +/** + * Traffic manager node resume + * + * Resume an existing node that is currently in suspended state. The node + * entered the suspended state as result of a previous node suspend operation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_node_suspend() + * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME + */ +int +rte_tm_node_resume(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error); + +/** + * Traffic manager hierarchy commit + * + * This function is called during the port initialization phase (before the + * Ethernet port is started) to freeze the start-up hierarchy. + * + * This function typically performs the following steps: + * a) It validates the start-up hierarchy that was previously defined for the + * current port through successive rte_tm_node_add() invocations; + * b) Assuming successful validation, it performs all the necessary port + * specific configuration operations to install the specified hierarchy on + * the current port, with immediate effect once the port is started. + * + * This function fails when the currently configured hierarchy is not supported + * by the Ethernet port, in which case the user can abort or try out another + * hierarchy configuration (e.g. a hierarchy with less leaf nodes), which can be + * build from scratch (when *clear_on_fail* is enabled) or by modifying the + * existing hierarchy configuration (when *clear_on_fail* is disabled). + * + * Note that this function can still fail due to other causes (e.g. not enough + * memory available in the system, etc), even though the specified hierarchy is + * supported in principle by the current port. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] clear_on_fail + * On function call failure, hierarchy is cleared when this parameter is + * non-zero and preserved when this parameter is equal to zero. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_node_add() + * @see rte_tm_node_delete() + */ +int +rte_tm_hierarchy_commit(uint16_t port_id, + int clear_on_fail, + struct rte_tm_error *error); + +/** + * Traffic manager node parent update + * + * This function may be used to move a node and its children to a different + * parent. Additionally, if the new parent is the same as the current parent, + * this function will update the priority/weight of an existing node. + * + * Restriction for root node: its parent cannot be changed. + * + * This function can only be called after the rte_tm_hierarchy_commit() + * invocation. Its success depends on the port support for this operation, as + * advertised through the port capability set. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] parent_node_id + * Node ID for the new parent. Needs to be valid. + * @param[in] priority + * Node priority. The highest node priority is zero. Used by the SP algorithm + * running on the parent of the current node for scheduling this child node. + * @param[in] weight + * Node weight. The node weight is relative to the weight sum of all siblings + * that have the same priority. The lowest weight is zero. Used by the WFQ + * algorithm running on the parent of the current node for scheduling this + * child node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL + * @see RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL + */ +int +rte_tm_node_parent_update(uint16_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + struct rte_tm_error *error); + +/** + * Traffic manager node private shaper update + * + * Restriction for the root node: its private shaper profile needs to be valid + * and single rate. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] shaper_profile_id + * Shaper profile ID for the private shaper of the current node. Needs to be + * either valid shaper profile ID or RTE_TM_SHAPER_PROFILE_ID_NONE, with + * the latter disabling the private shaper of the current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_private_n_max + */ +int +rte_tm_node_shaper_update(uint16_t port_id, + uint32_t node_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager node shared shapers update + * + * Restriction for root node: cannot use any shared rate shapers. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] shared_shaper_id + * Shared shaper ID. Needs to be valid. + * @param[in] add + * Set to non-zero value to add this shared shaper to current node or to zero + * to delete this shared shaper from current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_shared_n_max + */ +int +rte_tm_node_shared_shaper_update(uint16_t port_id, + uint32_t node_id, + uint32_t shared_shaper_id, + int add, + struct rte_tm_error *error); + +/** + * Traffic manager node enabled statistics counters update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] stats_mask + * Mask of statistics counter types to be enabled for the current node. This + * needs to be a subset of the statistics counter types available for the + * current node. Any statistics counter type not included in this set is to + * be disabled for the current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_tm_stats_type + * @see RTE_TM_UPDATE_NODE_STATS + */ +int +rte_tm_node_stats_update(uint16_t port_id, + uint32_t node_id, + uint64_t stats_mask, + struct rte_tm_error *error); + +/** + * Traffic manager node WFQ weight mode update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid non-leaf node ID. + * @param[in] wfq_weight_mode + * WFQ weight mode for each SP priority. When NULL, it indicates that WFQ is + * to be used for all priorities. When non-NULL, it points to a pre-allocated + * array of *n_sp_priorities* values, with non-zero value for byte-mode and + * zero for packet-mode. + * @param[in] n_sp_priorities + * Number of SP priorities. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE + * @see RTE_TM_UPDATE_NODE_N_SP_PRIORITIES + */ +int +rte_tm_node_wfq_weight_mode_update(uint16_t port_id, + uint32_t node_id, + int *wfq_weight_mode, + uint32_t n_sp_priorities, + struct rte_tm_error *error); + +/** + * Traffic manager node congestion management mode update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] cman + * Congestion management mode. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_CMAN + */ +int +rte_tm_node_cman_update(uint16_t port_id, + uint32_t node_id, + enum rte_tm_cman_mode cman, + struct rte_tm_error *error); + +/** + * Traffic manager node private WRED context update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] wred_profile_id + * WRED profile ID for the private WRED context of the current node. Needs to + * be either valid WRED profile ID or RTE_TM_WRED_PROFILE_ID_NONE, with the + * latter disabling the private WRED context of the current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_private_n_max +*/ +int +rte_tm_node_wred_context_update(uint16_t port_id, + uint32_t node_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager node shared WRED context update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] shared_wred_context_id + * Shared WRED context ID. Needs to be valid. + * @param[in] add + * Set to non-zero value to add this shared WRED context to current node or + * to zero to delete this shared WRED context from current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max + */ +int +rte_tm_node_shared_wred_context_update(uint16_t port_id, + uint32_t node_id, + uint32_t shared_wred_context_id, + int add, + struct rte_tm_error *error); + +/** + * Traffic manager node statistics counters read + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] stats + * When non-NULL, it contains the current value for the statistics counters + * enabled for the current node. + * @param[out] stats_mask + * When non-NULL, it contains the mask of statistics counter types that are + * currently enabled for this node, indicating which of the counters + * retrieved with the *stats* structure are valid. + * @param[in] clear + * When this parameter has a non-zero value, the statistics counters are + * cleared (i.e. set to zero) immediately after they have been read, + * otherwise the statistics counters are left untouched. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_tm_stats_type + */ +int +rte_tm_node_stats_read(uint16_t port_id, + uint32_t node_id, + struct rte_tm_node_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_tm_error *error); + +/** + * Traffic manager packet marking - VLAN DEI (IEEE 802.1Q) + * + * IEEE 802.1p maps the traffic class to the VLAN Priority Code Point (PCP) + * field (3 bits), while IEEE 802.1q maps the drop priority to the VLAN Drop + * Eligible Indicator (DEI) field (1 bit), which was previously named Canonical + * Format Indicator (CFI). + * + * All VLAN frames of a given color get their DEI bit set if marking is enabled + * for this color; otherwise, their DEI bit is left as is (either set or not). + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mark_green + * Set to non-zero value to enable marking of green packets and to zero to + * disable it. + * @param[in] mark_yellow + * Set to non-zero value to enable marking of yellow packets and to zero to + * disable it. + * @param[in] mark_red + * Set to non-zero value to enable marking of red packets and to zero to + * disable it. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::mark_vlan_dei_supported + */ +int +rte_tm_mark_vlan_dei(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** + * Traffic manager packet marking - IPv4 / IPv6 ECN (IETF RFC 3168) + * + * IETF RFCs 2474 and 3168 reorganize the IPv4 Type of Service (TOS) field + * (8 bits) and the IPv6 Traffic Class (TC) field (8 bits) into Differentiated + * Services Codepoint (DSCP) field (6 bits) and Explicit Congestion + * Notification (ECN) field (2 bits). The DSCP field is typically used to + * encode the traffic class and/or drop priority (RFC 2597), while the ECN + * field is used by RFC 3168 to implement a congestion notification mechanism + * to be leveraged by transport layer protocols such as TCP and SCTP that have + * congestion control mechanisms. + * + * When congestion is experienced, as alternative to dropping the packet, + * routers can change the ECN field of input packets from 2'b01 or 2'b10 + * (values indicating that source endpoint is ECN-capable) to 2'b11 (meaning + * that congestion is experienced). The destination endpoint can use the + * ECN-Echo (ECE) TCP flag to relay the congestion indication back to the + * source endpoint, which acknowledges it back to the destination endpoint with + * the Congestion Window Reduced (CWR) TCP flag. + * + * All IPv4/IPv6 packets of a given color with ECN set to 2’b01 or 2’b10 + * carrying TCP or SCTP have their ECN set to 2’b11 if the marking feature is + * enabled for the current color, otherwise the ECN field is left as is. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mark_green + * Set to non-zero value to enable marking of green packets and to zero to + * disable it. + * @param[in] mark_yellow + * Set to non-zero value to enable marking of yellow packets and to zero to + * disable it. + * @param[in] mark_red + * Set to non-zero value to enable marking of red packets and to zero to + * disable it. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::mark_ip_ecn_tcp_supported + * @see struct rte_tm_capabilities::mark_ip_ecn_sctp_supported + */ +int +rte_tm_mark_ip_ecn(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** + * Traffic manager packet marking - IPv4 / IPv6 DSCP (IETF RFC 2597) + * + * IETF RFC 2597 maps the traffic class and the drop priority to the IPv4/IPv6 + * Differentiated Services Codepoint (DSCP) field (6 bits). Here are the DSCP + * values proposed by this RFC: + * + *
                   Class 1    Class 2    Class 3    Class 4   
+ *
                 +----------+----------+----------+----------+
+ *
Low Drop Prec    |  001010  |  010010  |  011010  |  100010  |
+ *
Medium Drop Prec |  001100  |  010100  |  011100  |  100100  |
+ *
High Drop Prec   |  001110  |  010110  |  011110  |  100110  |
+ *
                 +----------+----------+----------+----------+
+ * + * There are 4 traffic classes (classes 1 .. 4) encoded by DSCP bits 1 and 2, + * as well as 3 drop priorities (low/medium/high) encoded by DSCP bits 3 and 4. + * + * All IPv4/IPv6 packets have their color marked into DSCP bits 3 and 4 as + * follows: green mapped to Low Drop Precedence (2’b01), yellow to Medium + * (2’b10) and red to High (2’b11). Marking needs to be explicitly enabled + * for each color; when not enabled for a given color, the DSCP field of all + * packets with that color is left as is. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mark_green + * Set to non-zero value to enable marking of green packets and to zero to + * disable it. + * @param[in] mark_yellow + * Set to non-zero value to enable marking of yellow packets and to zero to + * disable it. + * @param[in] mark_red + * Set to non-zero value to enable marking of red packets and to zero to + * disable it. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::mark_ip_dscp_supported + */ +int +rte_tm_mark_ip_dscp(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_TM_H__ */ diff --git a/lib/librte_ethdev/rte_tm_driver.h b/lib/librte_ethdev/rte_tm_driver.h new file mode 100644 index 00000000..90114ff5 --- /dev/null +++ b/lib/librte_ethdev/rte_tm_driver.h @@ -0,0 +1,337 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef __INCLUDE_RTE_TM_DRIVER_H__ +#define __INCLUDE_RTE_TM_DRIVER_H__ + +/** + * @file + * RTE Generic Traffic Manager API (Driver Side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include + +#include +#include "rte_ethdev.h" +#include "rte_tm.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @internal Traffic manager node ID validate and type get */ +typedef int (*rte_tm_node_type_get_t)(struct rte_eth_dev *dev, + uint32_t node_id, + int *is_leaf, + struct rte_tm_error *error); + +/** @internal Traffic manager capabilities get */ +typedef int (*rte_tm_capabilities_get_t)(struct rte_eth_dev *dev, + struct rte_tm_capabilities *cap, + struct rte_tm_error *error); + +/** @internal Traffic manager level capabilities get */ +typedef int (*rte_tm_level_capabilities_get_t)(struct rte_eth_dev *dev, + uint32_t level_id, + struct rte_tm_level_capabilities *cap, + struct rte_tm_error *error); + +/** @internal Traffic manager node capabilities get */ +typedef int (*rte_tm_node_capabilities_get_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_node_capabilities *cap, + struct rte_tm_error *error); + +/** @internal Traffic manager WRED profile add */ +typedef int (*rte_tm_wred_profile_add_t)(struct rte_eth_dev *dev, + uint32_t wred_profile_id, + struct rte_tm_wred_params *profile, + struct rte_tm_error *error); + +/** @internal Traffic manager WRED profile delete */ +typedef int (*rte_tm_wred_profile_delete_t)(struct rte_eth_dev *dev, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared WRED context add */ +typedef int (*rte_tm_shared_wred_context_add_update_t)( + struct rte_eth_dev *dev, + uint32_t shared_wred_context_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared WRED context delete */ +typedef int (*rte_tm_shared_wred_context_delete_t)( + struct rte_eth_dev *dev, + uint32_t shared_wred_context_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shaper profile add */ +typedef int (*rte_tm_shaper_profile_add_t)(struct rte_eth_dev *dev, + uint32_t shaper_profile_id, + struct rte_tm_shaper_params *profile, + struct rte_tm_error *error); + +/** @internal Traffic manager shaper profile delete */ +typedef int (*rte_tm_shaper_profile_delete_t)(struct rte_eth_dev *dev, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared shaper add/update */ +typedef int (*rte_tm_shared_shaper_add_update_t)(struct rte_eth_dev *dev, + uint32_t shared_shaper_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared shaper delete */ +typedef int (*rte_tm_shared_shaper_delete_t)(struct rte_eth_dev *dev, + uint32_t shared_shaper_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node add */ +typedef int (*rte_tm_node_add_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + uint32_t level_id, + struct rte_tm_node_params *params, + struct rte_tm_error *error); + +/** @internal Traffic manager node delete */ +typedef int (*rte_tm_node_delete_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node suspend */ +typedef int (*rte_tm_node_suspend_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node resume */ +typedef int (*rte_tm_node_resume_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_error *error); + +/** @internal Traffic manager hierarchy commit */ +typedef int (*rte_tm_hierarchy_commit_t)(struct rte_eth_dev *dev, + int clear_on_fail, + struct rte_tm_error *error); + +/** @internal Traffic manager node parent update */ +typedef int (*rte_tm_node_parent_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + struct rte_tm_error *error); + +/** @internal Traffic manager node shaper update */ +typedef int (*rte_tm_node_shaper_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node shaper update */ +typedef int (*rte_tm_node_shared_shaper_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t shared_shaper_id, + int32_t add, + struct rte_tm_error *error); + +/** @internal Traffic manager node stats update */ +typedef int (*rte_tm_node_stats_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint64_t stats_mask, + struct rte_tm_error *error); + +/** @internal Traffic manager node WFQ weight mode update */ +typedef int (*rte_tm_node_wfq_weight_mode_update_t)( + struct rte_eth_dev *dev, + uint32_t node_id, + int *wfq_weight_mode, + uint32_t n_sp_priorities, + struct rte_tm_error *error); + +/** @internal Traffic manager node congestion management mode update */ +typedef int (*rte_tm_node_cman_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + enum rte_tm_cman_mode cman, + struct rte_tm_error *error); + +/** @internal Traffic manager node WRED context update */ +typedef int (*rte_tm_node_wred_context_update_t)( + struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node WRED context update */ +typedef int (*rte_tm_node_shared_wred_context_update_t)( + struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t shared_wred_context_id, + int add, + struct rte_tm_error *error); + +/** @internal Traffic manager read stats counters for specific node */ +typedef int (*rte_tm_node_stats_read_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_node_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_tm_error *error); + +/** @internal Traffic manager packet marking - VLAN DEI */ +typedef int (*rte_tm_mark_vlan_dei_t)(struct rte_eth_dev *dev, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** @internal Traffic manager packet marking - IPv4/IPv6 ECN */ +typedef int (*rte_tm_mark_ip_ecn_t)(struct rte_eth_dev *dev, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** @internal Traffic manager packet marking - IPv4/IPv6 DSCP */ +typedef int (*rte_tm_mark_ip_dscp_t)(struct rte_eth_dev *dev, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +struct rte_tm_ops { + /** Traffic manager node type get */ + rte_tm_node_type_get_t node_type_get; + + /** Traffic manager capabilities_get */ + rte_tm_capabilities_get_t capabilities_get; + /** Traffic manager level capabilities_get */ + rte_tm_level_capabilities_get_t level_capabilities_get; + /** Traffic manager node capabilities get */ + rte_tm_node_capabilities_get_t node_capabilities_get; + + /** Traffic manager WRED profile add */ + rte_tm_wred_profile_add_t wred_profile_add; + /** Traffic manager WRED profile delete */ + rte_tm_wred_profile_delete_t wred_profile_delete; + /** Traffic manager shared WRED context add/update */ + rte_tm_shared_wred_context_add_update_t + shared_wred_context_add_update; + /** Traffic manager shared WRED context delete */ + rte_tm_shared_wred_context_delete_t + shared_wred_context_delete; + + /** Traffic manager shaper profile add */ + rte_tm_shaper_profile_add_t shaper_profile_add; + /** Traffic manager shaper profile delete */ + rte_tm_shaper_profile_delete_t shaper_profile_delete; + /** Traffic manager shared shaper add/update */ + rte_tm_shared_shaper_add_update_t shared_shaper_add_update; + /** Traffic manager shared shaper delete */ + rte_tm_shared_shaper_delete_t shared_shaper_delete; + + /** Traffic manager node add */ + rte_tm_node_add_t node_add; + /** Traffic manager node delete */ + rte_tm_node_delete_t node_delete; + /** Traffic manager node suspend */ + rte_tm_node_suspend_t node_suspend; + /** Traffic manager node resume */ + rte_tm_node_resume_t node_resume; + /** Traffic manager hierarchy commit */ + rte_tm_hierarchy_commit_t hierarchy_commit; + + /** Traffic manager node parent update */ + rte_tm_node_parent_update_t node_parent_update; + /** Traffic manager node shaper update */ + rte_tm_node_shaper_update_t node_shaper_update; + /** Traffic manager node shared shaper update */ + rte_tm_node_shared_shaper_update_t node_shared_shaper_update; + /** Traffic manager node stats update */ + rte_tm_node_stats_update_t node_stats_update; + /** Traffic manager node WFQ weight mode update */ + rte_tm_node_wfq_weight_mode_update_t node_wfq_weight_mode_update; + /** Traffic manager node congestion management mode update */ + rte_tm_node_cman_update_t node_cman_update; + /** Traffic manager node WRED context update */ + rte_tm_node_wred_context_update_t node_wred_context_update; + /** Traffic manager node shared WRED context update */ + rte_tm_node_shared_wred_context_update_t + node_shared_wred_context_update; + /** Traffic manager read statistics counters for current node */ + rte_tm_node_stats_read_t node_stats_read; + + /** Traffic manager packet marking - VLAN DEI */ + rte_tm_mark_vlan_dei_t mark_vlan_dei; + /** Traffic manager packet marking - IPv4/IPv6 ECN */ + rte_tm_mark_ip_ecn_t mark_ip_ecn; + /** Traffic manager packet marking - IPv4/IPv6 DSCP */ + rte_tm_mark_ip_dscp_t mark_ip_dscp; +}; + +/** + * Initialize generic error structure. + * + * This function also sets rte_errno to a given value. + * + * @param[out] error + * Pointer to error structure (may be NULL). + * @param[in] code + * Related error code (rte_errno). + * @param[in] type + * Cause field and error type. + * @param[in] cause + * Object responsible for the error. + * @param[in] message + * Human-readable error message. + * + * @return + * Error code. + */ +static inline int +rte_tm_error_set(struct rte_tm_error *error, + int code, + enum rte_tm_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_tm_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return code; +} + +/** + * Get generic traffic manager operations structure from a port + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] error + * Error details + * + * @return + * The traffic manager operations structure associated with port_id on + * success, NULL otherwise. + */ +const struct rte_tm_ops * +rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_TM_DRIVER_H__ */ diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile deleted file mode 100644 index 3ca5782b..00000000 --- a/lib/librte_ether/Makefile +++ /dev/null @@ -1,44 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2010-2017 Intel Corporation - -include $(RTE_SDK)/mk/rte.vars.mk - -# -# library name -# -LIB = librte_ethdev.a - -CFLAGS += -DALLOW_EXPERIMENTAL_API -CFLAGS += -O3 -CFLAGS += $(WERROR_FLAGS) -LDLIBS += -lrte_net -lrte_eal -lrte_mempool -lrte_ring -LDLIBS += -lrte_mbuf - -EXPORT_MAP := rte_ethdev_version.map - -LIBABIVER := 8 - -SRCS-y += rte_ethdev.c -SRCS-y += rte_flow.c -SRCS-y += rte_tm.c -SRCS-y += rte_mtr.c -SRCS-y += ethdev_profile.c - -# -# Export include files -# -SYMLINK-y-include += rte_ethdev.h -SYMLINK-y-include += rte_ethdev_driver.h -SYMLINK-y-include += rte_ethdev_core.h -SYMLINK-y-include += rte_ethdev_pci.h -SYMLINK-y-include += rte_ethdev_vdev.h -SYMLINK-y-include += rte_eth_ctrl.h -SYMLINK-y-include += rte_dev_info.h -SYMLINK-y-include += rte_flow.h -SYMLINK-y-include += rte_flow_driver.h -SYMLINK-y-include += rte_tm.h -SYMLINK-y-include += rte_tm_driver.h -SYMLINK-y-include += rte_mtr.h -SYMLINK-y-include += rte_mtr_driver.h - -include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ether/ethdev_profile.c b/lib/librte_ether/ethdev_profile.c deleted file mode 100644 index 0d1dcda3..00000000 --- a/lib/librte_ether/ethdev_profile.c +++ /dev/null @@ -1,135 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2017 Intel Corporation - */ - -#include "ethdev_profile.h" - -/** - * This conditional block enables RX queues profiling by tracking wasted - * iterations, i.e. iterations which yielded no RX packets. Profiling is - * performed using the Instrumentation and Tracing Technology (ITT) API, - * employed by the Intel (R) VTune (TM) Amplifier. - */ -#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS - -#include - -#define ITT_MAX_NAME_LEN (100) - -/** - * Auxiliary ITT structure belonging to Ethernet device and using to: - * - track RX queue state to determine whether it is wasting loop iterations - * - begin or end ITT task using task domain and task name (handle) - */ -struct itt_profile_rx_data { - /** - * ITT domains for each queue. - */ - __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT]; - /** - * ITT task names for each queue. - */ - __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT]; - /** - * Flags indicating the queues state. Possible values: - * 1 - queue is wasting iterations, - * 0 - otherwise. - */ - uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT]; -}; - -/** - * The pool of *itt_profile_rx_data* structures. - */ -struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS]; - - -/** - * This callback function manages ITT tasks collection on given port and queue. - * It must be registered with rte_eth_add_rx_callback() to be called from - * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function - * type declaration. - */ -static uint16_t -collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id, - __rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts, - __rte_unused uint16_t max_pkts, __rte_unused void *user_param) -{ - if (unlikely(nb_pkts == 0)) { - if (!itt_rx_data[port_id].queue_state[queue_id]) { - __itt_task_begin( - itt_rx_data[port_id].domains[queue_id], - __itt_null, __itt_null, - itt_rx_data[port_id].handles[queue_id]); - itt_rx_data[port_id].queue_state[queue_id] = 1; - } - } else { - if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) { - __itt_task_end( - itt_rx_data[port_id].domains[queue_id]); - itt_rx_data[port_id].queue_state[queue_id] = 0; - } - } - return nb_pkts; -} - -/** - * Initialization of itt_profile_rx_data for a given Ethernet device. - * This function must be invoked when ethernet device is being configured. - * Result will be stored in the global array *itt_rx_data*. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param port_name - * The name of the Ethernet device. - * @param rx_queue_num - * The number of RX queues on specified port. - * - * @return - * - On success, zero. - * - On failure, a negative value. - */ -static inline int -itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num) -{ - uint16_t q_id; - - for (q_id = 0; q_id < rx_queue_num; ++q_id) { - char domain_name[ITT_MAX_NAME_LEN]; - - snprintf(domain_name, sizeof(domain_name), - "RXBurst.WastedIterations.Port_%s.Queue_%d", - port_name, q_id); - itt_rx_data[port_id].domains[q_id] - = __itt_domain_create(domain_name); - - char task_name[ITT_MAX_NAME_LEN]; - - snprintf(task_name, sizeof(task_name), - "port id: %d; queue id: %d", - port_id, q_id); - itt_rx_data[port_id].handles[q_id] - = __itt_string_handle_create(task_name); - - itt_rx_data[port_id].queue_state[q_id] = 0; - - if (!rte_eth_add_rx_callback( - port_id, q_id, collect_itt_rx_burst_cb, NULL)) { - return -rte_errno; - } - } - - return 0; -} -#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */ - -int -__rte_eth_profile_rx_init(__rte_unused uint16_t port_id, - __rte_unused struct rte_eth_dev *dev) -{ -#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS - return itt_profile_rx_init( - port_id, dev->data->name, dev->data->nb_rx_queues); -#endif - return 0; -} diff --git a/lib/librte_ether/ethdev_profile.h b/lib/librte_ether/ethdev_profile.h deleted file mode 100644 index e5ea3682..00000000 --- a/lib/librte_ether/ethdev_profile.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2017 Intel Corporation - */ - -#ifndef _RTE_ETHDEV_PROFILE_H_ -#define _RTE_ETHDEV_PROFILE_H_ - -#include "rte_ethdev.h" - -/** - * Initialization of profiling RX queues for the Ethernet device. - * Implementation of this function depends on chosen profiling method, - * defined in configs. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param dev - * Pointer to struct rte_eth_dev corresponding to given port_id. - * - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev); - -#endif diff --git a/lib/librte_ether/meson.build b/lib/librte_ether/meson.build deleted file mode 100644 index 7fed8605..00000000 --- a/lib/librte_ether/meson.build +++ /dev/null @@ -1,27 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2017 Intel Corporation - -name = 'ethdev' -version = 8 -allow_experimental_apis = true -sources = files('ethdev_profile.c', - 'rte_ethdev.c', - 'rte_flow.c', - 'rte_mtr.c', - 'rte_tm.c') - -headers = files('rte_ethdev.h', - 'rte_ethdev_driver.h', - 'rte_ethdev_core.h', - 'rte_ethdev_pci.h', - 'rte_ethdev_vdev.h', - 'rte_eth_ctrl.h', - 'rte_dev_info.h', - 'rte_flow.h', - 'rte_flow_driver.h', - 'rte_mtr.h', - 'rte_mtr_driver.h', - 'rte_tm.h', - 'rte_tm_driver.h') - -deps += ['net'] diff --git a/lib/librte_ether/rte_dev_info.h b/lib/librte_ether/rte_dev_info.h deleted file mode 100644 index 6b68584d..00000000 --- a/lib/librte_ether/rte_dev_info.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2015 Intel Corporation - */ - -#ifndef _RTE_DEV_INFO_H_ -#define _RTE_DEV_INFO_H_ - -#include - -/* - * Placeholder for accessing device registers - */ -struct rte_dev_reg_info { - void *data; /**< Buffer for return registers */ - uint32_t offset; /**< Start register table location for access */ - uint32_t length; /**< Number of registers to fetch */ - uint32_t width; /**< Size of device register */ - uint32_t version; /**< Device version */ -}; - -/* - * Placeholder for accessing device eeprom - */ -struct rte_dev_eeprom_info { - void *data; /**< Buffer for return eeprom */ - uint32_t offset; /**< Start eeprom address for access*/ - uint32_t length; /**< Length of eeprom region to access */ - uint32_t magic; /**< Device-specific key, such as device-id */ -}; - -#endif /* _RTE_DEV_INFO_H_ */ diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h deleted file mode 100644 index 668f59ac..00000000 --- a/lib/librte_ether/rte_eth_ctrl.h +++ /dev/null @@ -1,827 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2015 Intel Corporation - */ - -#ifndef _RTE_ETH_CTRL_H_ -#define _RTE_ETH_CTRL_H_ - -#include -#include -#include "rte_ether.h" - -/** - * @file - * - * Ethernet device features and related data structures used - * by control APIs should be defined in this file. - * - */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * A packet can be identified by hardware as different flow types. Different - * NIC hardwares may support different flow types. - * Basically, the NIC hardware identifies the flow type as deep protocol as - * possible, and exclusively. For example, if a packet is identified as - * 'RTE_ETH_FLOW_NONFRAG_IPV4_TCP', it will not be any of other flow types, - * though it is an actual IPV4 packet. - * Note that the flow types are used to define RSS offload types in - * rte_ethdev.h. - */ -#define RTE_ETH_FLOW_UNKNOWN 0 -#define RTE_ETH_FLOW_RAW 1 -#define RTE_ETH_FLOW_IPV4 2 -#define RTE_ETH_FLOW_FRAG_IPV4 3 -#define RTE_ETH_FLOW_NONFRAG_IPV4_TCP 4 -#define RTE_ETH_FLOW_NONFRAG_IPV4_UDP 5 -#define RTE_ETH_FLOW_NONFRAG_IPV4_SCTP 6 -#define RTE_ETH_FLOW_NONFRAG_IPV4_OTHER 7 -#define RTE_ETH_FLOW_IPV6 8 -#define RTE_ETH_FLOW_FRAG_IPV6 9 -#define RTE_ETH_FLOW_NONFRAG_IPV6_TCP 10 -#define RTE_ETH_FLOW_NONFRAG_IPV6_UDP 11 -#define RTE_ETH_FLOW_NONFRAG_IPV6_SCTP 12 -#define RTE_ETH_FLOW_NONFRAG_IPV6_OTHER 13 -#define RTE_ETH_FLOW_L2_PAYLOAD 14 -#define RTE_ETH_FLOW_IPV6_EX 15 -#define RTE_ETH_FLOW_IPV6_TCP_EX 16 -#define RTE_ETH_FLOW_IPV6_UDP_EX 17 -#define RTE_ETH_FLOW_PORT 18 - /**< Consider device port number as a flow differentiator */ -#define RTE_ETH_FLOW_VXLAN 19 /**< VXLAN protocol based flow */ -#define RTE_ETH_FLOW_GENEVE 20 /**< GENEVE protocol based flow */ -#define RTE_ETH_FLOW_NVGRE 21 /**< NVGRE protocol based flow */ -#define RTE_ETH_FLOW_MAX 22 - -/** - * Feature filter types - */ -enum rte_filter_type { - RTE_ETH_FILTER_NONE = 0, - RTE_ETH_FILTER_MACVLAN, - RTE_ETH_FILTER_ETHERTYPE, - RTE_ETH_FILTER_FLEXIBLE, - RTE_ETH_FILTER_SYN, - RTE_ETH_FILTER_NTUPLE, - RTE_ETH_FILTER_TUNNEL, - RTE_ETH_FILTER_FDIR, - RTE_ETH_FILTER_HASH, - RTE_ETH_FILTER_L2_TUNNEL, - RTE_ETH_FILTER_GENERIC, - RTE_ETH_FILTER_MAX -}; - -/** - * Generic operations on filters - */ -enum rte_filter_op { - /** used to check whether the type filter is supported */ - RTE_ETH_FILTER_NOP = 0, - RTE_ETH_FILTER_ADD, /**< add filter entry */ - RTE_ETH_FILTER_UPDATE, /**< update filter entry */ - RTE_ETH_FILTER_DELETE, /**< delete filter entry */ - RTE_ETH_FILTER_FLUSH, /**< flush all entries */ - RTE_ETH_FILTER_GET, /**< get filter entry */ - RTE_ETH_FILTER_SET, /**< configurations */ - RTE_ETH_FILTER_INFO, /**< retrieve information */ - RTE_ETH_FILTER_STATS, /**< retrieve statistics */ - RTE_ETH_FILTER_OP_MAX -}; - -/** - * MAC filter type - */ -enum rte_mac_filter_type { - RTE_MAC_PERFECT_MATCH = 1, /**< exact match of MAC addr. */ - RTE_MACVLAN_PERFECT_MATCH, /**< exact match of MAC addr and VLAN ID. */ - RTE_MAC_HASH_MATCH, /**< hash match of MAC addr. */ - /** hash match of MAC addr and exact match of VLAN ID. */ - RTE_MACVLAN_HASH_MATCH, -}; - -/** - * MAC filter info - */ -struct rte_eth_mac_filter { - uint8_t is_vf; /**< 1 for VF, 0 for port dev */ - uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ - enum rte_mac_filter_type filter_type; /**< MAC filter type */ - struct ether_addr mac_addr; -}; - -/** - * Define all structures for Ethertype Filter type. - */ - -#define RTE_ETHTYPE_FLAGS_MAC 0x0001 /**< If set, compare mac */ -#define RTE_ETHTYPE_FLAGS_DROP 0x0002 /**< If set, drop packet when match */ - -/** - * A structure used to define the ethertype filter entry - * to support RTE_ETH_FILTER_ETHERTYPE with RTE_ETH_FILTER_ADD, - * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. - */ -struct rte_eth_ethertype_filter { - struct ether_addr mac_addr; /**< Mac address to match. */ - uint16_t ether_type; /**< Ether type to match */ - uint16_t flags; /**< Flags from RTE_ETHTYPE_FLAGS_* */ - uint16_t queue; /**< Queue assigned to when match*/ -}; - -#define RTE_FLEX_FILTER_MAXLEN 128 /**< bytes to use in flex filter. */ -#define RTE_FLEX_FILTER_MASK_SIZE \ - (RTE_ALIGN(RTE_FLEX_FILTER_MAXLEN, CHAR_BIT) / CHAR_BIT) - /**< mask bytes in flex filter. */ - -/** - * A structure used to define the flex filter entry - * to support RTE_ETH_FILTER_FLEXIBLE with RTE_ETH_FILTER_ADD, - * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. - */ -struct rte_eth_flex_filter { - uint16_t len; - uint8_t bytes[RTE_FLEX_FILTER_MAXLEN]; /**< flex bytes in big endian.*/ - uint8_t mask[RTE_FLEX_FILTER_MASK_SIZE]; /**< if mask bit is 1b, do - not compare corresponding byte. */ - uint8_t priority; - uint16_t queue; /**< Queue assigned to when match. */ -}; - -/** - * A structure used to define the TCP syn filter entry - * to support RTE_ETH_FILTER_SYN with RTE_ETH_FILTER_ADD, - * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. - */ -struct rte_eth_syn_filter { - uint8_t hig_pri; /**< 1 - higher priority than other filters, - 0 - lower priority. */ - uint16_t queue; /**< Queue assigned to when match */ -}; - -/** - * Define all structures for ntuple Filter type. - */ - -#define RTE_NTUPLE_FLAGS_DST_IP 0x0001 /**< If set, dst_ip is part of ntuple */ -#define RTE_NTUPLE_FLAGS_SRC_IP 0x0002 /**< If set, src_ip is part of ntuple */ -#define RTE_NTUPLE_FLAGS_DST_PORT 0x0004 /**< If set, dst_port is part of ntuple */ -#define RTE_NTUPLE_FLAGS_SRC_PORT 0x0008 /**< If set, src_port is part of ntuple */ -#define RTE_NTUPLE_FLAGS_PROTO 0x0010 /**< If set, protocol is part of ntuple */ -#define RTE_NTUPLE_FLAGS_TCP_FLAG 0x0020 /**< If set, tcp flag is involved */ - -#define RTE_5TUPLE_FLAGS ( \ - RTE_NTUPLE_FLAGS_DST_IP | \ - RTE_NTUPLE_FLAGS_SRC_IP | \ - RTE_NTUPLE_FLAGS_DST_PORT | \ - RTE_NTUPLE_FLAGS_SRC_PORT | \ - RTE_NTUPLE_FLAGS_PROTO) - -#define RTE_2TUPLE_FLAGS ( \ - RTE_NTUPLE_FLAGS_DST_PORT | \ - RTE_NTUPLE_FLAGS_PROTO) - -#define TCP_URG_FLAG 0x20 -#define TCP_ACK_FLAG 0x10 -#define TCP_PSH_FLAG 0x08 -#define TCP_RST_FLAG 0x04 -#define TCP_SYN_FLAG 0x02 -#define TCP_FIN_FLAG 0x01 -#define TCP_FLAG_ALL 0x3F - -/** - * A structure used to define the ntuple filter entry - * to support RTE_ETH_FILTER_NTUPLE with RTE_ETH_FILTER_ADD, - * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. - */ -struct rte_eth_ntuple_filter { - uint16_t flags; /**< Flags from RTE_NTUPLE_FLAGS_* */ - uint32_t dst_ip; /**< Destination IP address in big endian. */ - uint32_t dst_ip_mask; /**< Mask of destination IP address. */ - uint32_t src_ip; /**< Source IP address in big endian. */ - uint32_t src_ip_mask; /**< Mask of destination IP address. */ - uint16_t dst_port; /**< Destination port in big endian. */ - uint16_t dst_port_mask; /**< Mask of destination port. */ - uint16_t src_port; /**< Source Port in big endian. */ - uint16_t src_port_mask; /**< Mask of source port. */ - uint8_t proto; /**< L4 protocol. */ - uint8_t proto_mask; /**< Mask of L4 protocol. */ - /** tcp_flags only meaningful when the proto is TCP. - The packet matched above ntuple fields and contain - any set bit in tcp_flags will hit this filter. */ - uint8_t tcp_flags; - uint16_t priority; /**< seven levels (001b-111b), 111b is highest, - used when more than one filter matches. */ - uint16_t queue; /**< Queue assigned to when match*/ -}; - -/** - * Tunneled type. - */ -enum rte_eth_tunnel_type { - RTE_TUNNEL_TYPE_NONE = 0, - RTE_TUNNEL_TYPE_VXLAN, - RTE_TUNNEL_TYPE_GENEVE, - RTE_TUNNEL_TYPE_TEREDO, - RTE_TUNNEL_TYPE_NVGRE, - RTE_TUNNEL_TYPE_IP_IN_GRE, - RTE_L2_TUNNEL_TYPE_E_TAG, - RTE_TUNNEL_TYPE_MAX, -}; - -/** - * filter type of tunneling packet - */ -#define ETH_TUNNEL_FILTER_OMAC 0x01 /**< filter by outer MAC addr */ -#define ETH_TUNNEL_FILTER_OIP 0x02 /**< filter by outer IP Addr */ -#define ETH_TUNNEL_FILTER_TENID 0x04 /**< filter by tenant ID */ -#define ETH_TUNNEL_FILTER_IMAC 0x08 /**< filter by inner MAC addr */ -#define ETH_TUNNEL_FILTER_IVLAN 0x10 /**< filter by inner VLAN ID */ -#define ETH_TUNNEL_FILTER_IIP 0x20 /**< filter by inner IP addr */ - -#define RTE_TUNNEL_FILTER_IMAC_IVLAN (ETH_TUNNEL_FILTER_IMAC | \ - ETH_TUNNEL_FILTER_IVLAN) -#define RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID (ETH_TUNNEL_FILTER_IMAC | \ - ETH_TUNNEL_FILTER_IVLAN | \ - ETH_TUNNEL_FILTER_TENID) -#define RTE_TUNNEL_FILTER_IMAC_TENID (ETH_TUNNEL_FILTER_IMAC | \ - ETH_TUNNEL_FILTER_TENID) -#define RTE_TUNNEL_FILTER_OMAC_TENID_IMAC (ETH_TUNNEL_FILTER_OMAC | \ - ETH_TUNNEL_FILTER_TENID | \ - ETH_TUNNEL_FILTER_IMAC) - -/** - * Select IPv4 or IPv6 for tunnel filters. - */ -enum rte_tunnel_iptype { - RTE_TUNNEL_IPTYPE_IPV4 = 0, /**< IPv4. */ - RTE_TUNNEL_IPTYPE_IPV6, /**< IPv6. */ -}; - -/** - * Tunneling Packet filter configuration. - */ -struct rte_eth_tunnel_filter_conf { - struct ether_addr outer_mac; /**< Outer MAC address to match. */ - struct ether_addr inner_mac; /**< Inner MAC address to match. */ - uint16_t inner_vlan; /**< Inner VLAN to match. */ - enum rte_tunnel_iptype ip_type; /**< IP address type. */ - /** Outer destination IP address to match if ETH_TUNNEL_FILTER_OIP - is set in filter_type, or inner destination IP address to match - if ETH_TUNNEL_FILTER_IIP is set in filter_type . */ - union { - uint32_t ipv4_addr; /**< IPv4 address in big endian. */ - uint32_t ipv6_addr[4]; /**< IPv6 address in big endian. */ - } ip_addr; - /** Flags from ETH_TUNNEL_FILTER_XX - see above. */ - uint16_t filter_type; - enum rte_eth_tunnel_type tunnel_type; /**< Tunnel Type. */ - uint32_t tenant_id; /**< Tenant ID to match. VNI, GRE key... */ - uint16_t queue_id; /**< Queue assigned to if match. */ -}; - -/** - * Global eth device configuration type. - */ -enum rte_eth_global_cfg_type { - RTE_ETH_GLOBAL_CFG_TYPE_UNKNOWN = 0, - RTE_ETH_GLOBAL_CFG_TYPE_GRE_KEY_LEN, - RTE_ETH_GLOBAL_CFG_TYPE_MAX, -}; - -/** - * Global eth device configuration. - */ -struct rte_eth_global_cfg { - enum rte_eth_global_cfg_type cfg_type; /**< Global config type. */ - union { - uint8_t gre_key_len; /**< Valid GRE key length in byte. */ - uint64_t reserved; /**< Reserve space for future use. */ - } cfg; -}; - -#define RTE_ETH_FDIR_MAX_FLEXLEN 16 /**< Max length of flexbytes. */ -#define RTE_ETH_INSET_SIZE_MAX 128 /**< Max length of input set. */ - -/** - * Input set fields for Flow Director and Hash filters - */ -enum rte_eth_input_set_field { - RTE_ETH_INPUT_SET_UNKNOWN = 0, - - /* L2 */ - RTE_ETH_INPUT_SET_L2_SRC_MAC = 1, - RTE_ETH_INPUT_SET_L2_DST_MAC, - RTE_ETH_INPUT_SET_L2_OUTER_VLAN, - RTE_ETH_INPUT_SET_L2_INNER_VLAN, - RTE_ETH_INPUT_SET_L2_ETHERTYPE, - - /* L3 */ - RTE_ETH_INPUT_SET_L3_SRC_IP4 = 129, - RTE_ETH_INPUT_SET_L3_DST_IP4, - RTE_ETH_INPUT_SET_L3_SRC_IP6, - RTE_ETH_INPUT_SET_L3_DST_IP6, - RTE_ETH_INPUT_SET_L3_IP4_TOS, - RTE_ETH_INPUT_SET_L3_IP4_PROTO, - RTE_ETH_INPUT_SET_L3_IP6_TC, - RTE_ETH_INPUT_SET_L3_IP6_NEXT_HEADER, - RTE_ETH_INPUT_SET_L3_IP4_TTL, - RTE_ETH_INPUT_SET_L3_IP6_HOP_LIMITS, - - /* L4 */ - RTE_ETH_INPUT_SET_L4_UDP_SRC_PORT = 257, - RTE_ETH_INPUT_SET_L4_UDP_DST_PORT, - RTE_ETH_INPUT_SET_L4_TCP_SRC_PORT, - RTE_ETH_INPUT_SET_L4_TCP_DST_PORT, - RTE_ETH_INPUT_SET_L4_SCTP_SRC_PORT, - RTE_ETH_INPUT_SET_L4_SCTP_DST_PORT, - RTE_ETH_INPUT_SET_L4_SCTP_VERIFICATION_TAG, - - /* Tunnel */ - RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_DST_MAC = 385, - RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_SRC_MAC, - RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_VLAN, - RTE_ETH_INPUT_SET_TUNNEL_L4_UDP_KEY, - RTE_ETH_INPUT_SET_TUNNEL_GRE_KEY, - - /* Flexible Payload */ - RTE_ETH_INPUT_SET_FLEX_PAYLOAD_1ST_WORD = 641, - RTE_ETH_INPUT_SET_FLEX_PAYLOAD_2ND_WORD, - RTE_ETH_INPUT_SET_FLEX_PAYLOAD_3RD_WORD, - RTE_ETH_INPUT_SET_FLEX_PAYLOAD_4TH_WORD, - RTE_ETH_INPUT_SET_FLEX_PAYLOAD_5TH_WORD, - RTE_ETH_INPUT_SET_FLEX_PAYLOAD_6TH_WORD, - RTE_ETH_INPUT_SET_FLEX_PAYLOAD_7TH_WORD, - RTE_ETH_INPUT_SET_FLEX_PAYLOAD_8TH_WORD, - - RTE_ETH_INPUT_SET_DEFAULT = 65533, - RTE_ETH_INPUT_SET_NONE = 65534, - RTE_ETH_INPUT_SET_MAX = 65535, -}; - -/** - * Filters input set operations - */ -enum rte_filter_input_set_op { - RTE_ETH_INPUT_SET_OP_UNKNOWN, - RTE_ETH_INPUT_SET_SELECT, /**< select input set */ - RTE_ETH_INPUT_SET_ADD, /**< add input set entry */ - RTE_ETH_INPUT_SET_OP_MAX -}; - - -/** - * A structure used to define the input set configuration for - * flow director and hash filters - */ -struct rte_eth_input_set_conf { - uint16_t flow_type; - uint16_t inset_size; - enum rte_eth_input_set_field field[RTE_ETH_INSET_SIZE_MAX]; - enum rte_filter_input_set_op op; -}; - -/** - * A structure used to define the input for L2 flow - */ -struct rte_eth_l2_flow { - uint16_t ether_type; /**< Ether type in big endian */ -}; - -/** - * A structure used to define the input for IPV4 flow - */ -struct rte_eth_ipv4_flow { - uint32_t src_ip; /**< IPv4 source address in big endian. */ - uint32_t dst_ip; /**< IPv4 destination address in big endian. */ - uint8_t tos; /**< Type of service to match. */ - uint8_t ttl; /**< Time to live to match. */ - uint8_t proto; /**< Protocol, next header in big endian. */ -}; - -/** - * A structure used to define the input for IPV4 UDP flow - */ -struct rte_eth_udpv4_flow { - struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ - uint16_t src_port; /**< UDP source port in big endian. */ - uint16_t dst_port; /**< UDP destination port in big endian. */ -}; - -/** - * A structure used to define the input for IPV4 TCP flow - */ -struct rte_eth_tcpv4_flow { - struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ - uint16_t src_port; /**< TCP source port in big endian. */ - uint16_t dst_port; /**< TCP destination port in big endian. */ -}; - -/** - * A structure used to define the input for IPV4 SCTP flow - */ -struct rte_eth_sctpv4_flow { - struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ - uint16_t src_port; /**< SCTP source port in big endian. */ - uint16_t dst_port; /**< SCTP destination port in big endian. */ - uint32_t verify_tag; /**< Verify tag in big endian */ -}; - -/** - * A structure used to define the input for IPV6 flow - */ -struct rte_eth_ipv6_flow { - uint32_t src_ip[4]; /**< IPv6 source address in big endian. */ - uint32_t dst_ip[4]; /**< IPv6 destination address in big endian. */ - uint8_t tc; /**< Traffic class to match. */ - uint8_t proto; /**< Protocol, next header to match. */ - uint8_t hop_limits; /**< Hop limits to match. */ -}; - -/** - * A structure used to define the input for IPV6 UDP flow - */ -struct rte_eth_udpv6_flow { - struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ - uint16_t src_port; /**< UDP source port in big endian. */ - uint16_t dst_port; /**< UDP destination port in big endian. */ -}; - -/** - * A structure used to define the input for IPV6 TCP flow - */ -struct rte_eth_tcpv6_flow { - struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ - uint16_t src_port; /**< TCP source port to in big endian. */ - uint16_t dst_port; /**< TCP destination port in big endian. */ -}; - -/** - * A structure used to define the input for IPV6 SCTP flow - */ -struct rte_eth_sctpv6_flow { - struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ - uint16_t src_port; /**< SCTP source port in big endian. */ - uint16_t dst_port; /**< SCTP destination port in big endian. */ - uint32_t verify_tag; /**< Verify tag in big endian. */ -}; - -/** - * A structure used to define the input for MAC VLAN flow - */ -struct rte_eth_mac_vlan_flow { - struct ether_addr mac_addr; /**< Mac address to match. */ -}; - -/** - * Tunnel type for flow director. - */ -enum rte_eth_fdir_tunnel_type { - RTE_FDIR_TUNNEL_TYPE_UNKNOWN = 0, - RTE_FDIR_TUNNEL_TYPE_NVGRE, - RTE_FDIR_TUNNEL_TYPE_VXLAN, -}; - -/** - * A structure used to define the input for tunnel flow, now it's VxLAN or - * NVGRE - */ -struct rte_eth_tunnel_flow { - enum rte_eth_fdir_tunnel_type tunnel_type; /**< Tunnel type to match. */ - /** Tunnel ID to match. TNI, VNI... in big endian. */ - uint32_t tunnel_id; - struct ether_addr mac_addr; /**< Mac address to match. */ -}; - -/** - * An union contains the inputs for all types of flow - * Items in flows need to be in big endian - */ -union rte_eth_fdir_flow { - struct rte_eth_l2_flow l2_flow; - struct rte_eth_udpv4_flow udp4_flow; - struct rte_eth_tcpv4_flow tcp4_flow; - struct rte_eth_sctpv4_flow sctp4_flow; - struct rte_eth_ipv4_flow ip4_flow; - struct rte_eth_udpv6_flow udp6_flow; - struct rte_eth_tcpv6_flow tcp6_flow; - struct rte_eth_sctpv6_flow sctp6_flow; - struct rte_eth_ipv6_flow ipv6_flow; - struct rte_eth_mac_vlan_flow mac_vlan_flow; - struct rte_eth_tunnel_flow tunnel_flow; -}; - -/** - * A structure used to contain extend input of flow - */ -struct rte_eth_fdir_flow_ext { - uint16_t vlan_tci; - uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN]; - /**< It is filled by the flexible payload to match. */ - uint8_t is_vf; /**< 1 for VF, 0 for port dev */ - uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ -}; - -/** - * A structure used to define the input for a flow director filter entry - */ -struct rte_eth_fdir_input { - uint16_t flow_type; - union rte_eth_fdir_flow flow; - /**< Flow fields to match, dependent on flow_type */ - struct rte_eth_fdir_flow_ext flow_ext; - /**< Additional fields to match */ -}; - -/** - * Behavior will be taken if FDIR match - */ -enum rte_eth_fdir_behavior { - RTE_ETH_FDIR_ACCEPT = 0, - RTE_ETH_FDIR_REJECT, - RTE_ETH_FDIR_PASSTHRU, -}; - -/** - * Flow director report status - * It defines what will be reported if FDIR entry is matched. - */ -enum rte_eth_fdir_status { - RTE_ETH_FDIR_NO_REPORT_STATUS = 0, /**< Report nothing. */ - RTE_ETH_FDIR_REPORT_ID, /**< Only report FD ID. */ - RTE_ETH_FDIR_REPORT_ID_FLEX_4, /**< Report FD ID and 4 flex bytes. */ - RTE_ETH_FDIR_REPORT_FLEX_8, /**< Report 8 flex bytes. */ -}; - -/** - * A structure used to define an action when match FDIR packet filter. - */ -struct rte_eth_fdir_action { - uint16_t rx_queue; /**< Queue assigned to if FDIR match. */ - enum rte_eth_fdir_behavior behavior; /**< Behavior will be taken */ - enum rte_eth_fdir_status report_status; /**< Status report option */ - uint8_t flex_off; - /**< If report_status is RTE_ETH_FDIR_REPORT_ID_FLEX_4 or - RTE_ETH_FDIR_REPORT_FLEX_8, flex_off specifies where the reported - flex bytes start from in flexible payload. */ -}; - -/** - * A structure used to define the flow director filter entry by filter_ctrl API - * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_ADD and - * RTE_ETH_FILTER_DELETE operations. - */ -struct rte_eth_fdir_filter { - uint32_t soft_id; - /**< ID, an unique value is required when deal with FDIR entry */ - struct rte_eth_fdir_input input; /**< Input set */ - struct rte_eth_fdir_action action; /**< Action taken when match */ -}; - -/** - * A structure used to configure FDIR masks that are used by the device - * to match the various fields of RX packet headers. - */ -struct rte_eth_fdir_masks { - uint16_t vlan_tci_mask; /**< Bit mask for vlan_tci in big endian */ - /** Bit mask for ipv4 flow in big endian. */ - struct rte_eth_ipv4_flow ipv4_mask; - /** Bit maks for ipv6 flow in big endian. */ - struct rte_eth_ipv6_flow ipv6_mask; - /** Bit mask for L4 source port in big endian. */ - uint16_t src_port_mask; - /** Bit mask for L4 destination port in big endian. */ - uint16_t dst_port_mask; - /** 6 bit mask for proper 6 bytes of Mac address, bit 0 matches the - first byte on the wire */ - uint8_t mac_addr_byte_mask; - /** Bit mask for tunnel ID in big endian. */ - uint32_t tunnel_id_mask; - uint8_t tunnel_type_mask; /**< 1 - Match tunnel type, - 0 - Ignore tunnel type. */ -}; - -/** - * Payload type - */ -enum rte_eth_payload_type { - RTE_ETH_PAYLOAD_UNKNOWN = 0, - RTE_ETH_RAW_PAYLOAD, - RTE_ETH_L2_PAYLOAD, - RTE_ETH_L3_PAYLOAD, - RTE_ETH_L4_PAYLOAD, - RTE_ETH_PAYLOAD_MAX = 8, -}; - -/** - * A structure used to select bytes extracted from the protocol layers to - * flexible payload for filter - */ -struct rte_eth_flex_payload_cfg { - enum rte_eth_payload_type type; /**< Payload type */ - uint16_t src_offset[RTE_ETH_FDIR_MAX_FLEXLEN]; - /**< Offset in bytes from the beginning of packet's payload - src_offset[i] indicates the flexbyte i's offset in original - packet payload. This value should be less than - flex_payload_limit in struct rte_eth_fdir_info.*/ -}; - -/** - * A structure used to define FDIR masks for flexible payload - * for each flow type - */ -struct rte_eth_fdir_flex_mask { - uint16_t flow_type; - uint8_t mask[RTE_ETH_FDIR_MAX_FLEXLEN]; - /**< Mask for the whole flexible payload */ -}; - -/** - * A structure used to define all flexible payload related setting - * include flex payload and flex mask - */ -struct rte_eth_fdir_flex_conf { - uint16_t nb_payloads; /**< The number of following payload cfg */ - uint16_t nb_flexmasks; /**< The number of following mask */ - struct rte_eth_flex_payload_cfg flex_set[RTE_ETH_PAYLOAD_MAX]; - /**< Flex payload configuration for each payload type */ - struct rte_eth_fdir_flex_mask flex_mask[RTE_ETH_FLOW_MAX]; - /**< Flex mask configuration for each flow type */ -}; - -/** - * Flow Director setting modes: none, signature or perfect. - */ -enum rte_fdir_mode { - RTE_FDIR_MODE_NONE = 0, /**< Disable FDIR support. */ - RTE_FDIR_MODE_SIGNATURE, /**< Enable FDIR signature filter mode. */ - RTE_FDIR_MODE_PERFECT, /**< Enable FDIR perfect filter mode. */ - RTE_FDIR_MODE_PERFECT_MAC_VLAN, /**< Enable FDIR filter mode - MAC VLAN. */ - RTE_FDIR_MODE_PERFECT_TUNNEL, /**< Enable FDIR filter mode - tunnel. */ -}; - -#define UINT64_BIT (CHAR_BIT * sizeof(uint64_t)) -#define RTE_FLOW_MASK_ARRAY_SIZE \ - (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT64_BIT)/UINT64_BIT) - -/** - * A structure used to get the information of flow director filter. - * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_INFO operation. - * It includes the mode, flexible payload configuration information, - * capabilities and supported flow types, flexible payload characters. - * It can be gotten to help taking specific configurations per device. - */ -struct rte_eth_fdir_info { - enum rte_fdir_mode mode; /**< Flow director mode */ - struct rte_eth_fdir_masks mask; - /** Flex payload configuration information */ - struct rte_eth_fdir_flex_conf flex_conf; - uint32_t guarant_spc; /**< Guaranteed spaces.*/ - uint32_t best_spc; /**< Best effort spaces.*/ - /** Bit mask for every supported flow type. */ - uint64_t flow_types_mask[RTE_FLOW_MASK_ARRAY_SIZE]; - uint32_t max_flexpayload; /**< Total flex payload in bytes. */ - /** Flexible payload unit in bytes. Size and alignments of all flex - payload segments should be multiplies of this value. */ - uint32_t flex_payload_unit; - /** Max number of flexible payload continuous segments. - Each segment should be a multiple of flex_payload_unit.*/ - uint32_t max_flex_payload_segment_num; - /** Maximum src_offset in bytes allowed. It indicates that - src_offset[i] in struct rte_eth_flex_payload_cfg should be less - than this value. */ - uint16_t flex_payload_limit; - /** Flex bitmask unit in bytes. Size of flex bitmasks should be a - multiply of this value. */ - uint32_t flex_bitmask_unit; - /** Max supported size of flex bitmasks in flex_bitmask_unit */ - uint32_t max_flex_bitmask_num; -}; - -/** - * A structure used to define the statistics of flow director. - * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_STATS operation. - */ -struct rte_eth_fdir_stats { - uint32_t collision; /**< Number of filters with collision. */ - uint32_t free; /**< Number of free filters. */ - uint32_t maxhash; - /**< The lookup hash value of the added filter that updated the value - of the MAXLEN field */ - uint32_t maxlen; /**< Longest linked list of filters. */ - uint64_t add; /**< Number of added filters. */ - uint64_t remove; /**< Number of removed filters. */ - uint64_t f_add; /**< Number of failed added filters. */ - uint64_t f_remove; /**< Number of failed removed filters. */ - uint32_t guarant_cnt; /**< Number of filters in guaranteed spaces. */ - uint32_t best_cnt; /**< Number of filters in best effort spaces. */ -}; - -/** - * Flow Director filter information types. - */ -enum rte_eth_fdir_filter_info_type { - RTE_ETH_FDIR_FILTER_INFO_TYPE_UNKNOWN = 0, - /** Flow Director filter input set configuration */ - RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT, - RTE_ETH_FDIR_FILTER_INFO_TYPE_MAX, -}; - -/** - * A structure used to set FDIR filter information, to support filter type - * of 'RTE_ETH_FILTER_FDIR' RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT operation. - */ -struct rte_eth_fdir_filter_info { - enum rte_eth_fdir_filter_info_type info_type; /**< Information type */ - /** Details of fdir filter information */ - union { - /** Flow Director input set configuration per port */ - struct rte_eth_input_set_conf input_set_conf; - } info; -}; - -/** - * Hash filter information types. - * - RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT is for getting/setting the - * information/configuration of 'symmetric hash enable' per port. - * - RTE_ETH_HASH_FILTER_GLOBAL_CONFIG is for getting/setting the global - * configurations of hash filters. Those global configurations are valid - * for all ports of the same NIC. - * - RTE_ETH_HASH_FILTER_INPUT_SET_SELECT is for setting the global - * hash input set fields - */ -enum rte_eth_hash_filter_info_type { - RTE_ETH_HASH_FILTER_INFO_TYPE_UNKNOWN = 0, - /** Symmetric hash enable per port */ - RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT, - /** Configure globally for hash filter */ - RTE_ETH_HASH_FILTER_GLOBAL_CONFIG, - /** Global Hash filter input set configuration */ - RTE_ETH_HASH_FILTER_INPUT_SET_SELECT, - RTE_ETH_HASH_FILTER_INFO_TYPE_MAX, -}; - -/** - * Hash function types. - */ -enum rte_eth_hash_function { - RTE_ETH_HASH_FUNCTION_DEFAULT = 0, - RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */ - RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */ - RTE_ETH_HASH_FUNCTION_MAX, -}; - -#define RTE_SYM_HASH_MASK_ARRAY_SIZE \ - (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT64_BIT)/UINT64_BIT) -/** - * A structure used to set or get global hash function configurations which - * include symmetric hash enable per flow type and hash function type. - * Each bit in sym_hash_enable_mask[] indicates if the symmetric hash of the - * corresponding flow type is enabled or not. - * Each bit in valid_bit_mask[] indicates if the corresponding bit in - * sym_hash_enable_mask[] is valid or not. For the configurations gotten, it - * also means if the flow type is supported by hardware or not. - */ -struct rte_eth_hash_global_conf { - enum rte_eth_hash_function hash_func; /**< Hash function type */ - /** Bit mask for symmetric hash enable per flow type */ - uint64_t sym_hash_enable_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; - /** Bit mask indicates if the corresponding bit is valid */ - uint64_t valid_bit_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; -}; - -/** - * A structure used to set or get hash filter information, to support filter - * type of 'RTE_ETH_FILTER_HASH' and its operations. - */ -struct rte_eth_hash_filter_info { - enum rte_eth_hash_filter_info_type info_type; /**< Information type */ - /** Details of hash filter information */ - union { - /** For RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT */ - uint8_t enable; - /** Global configurations of hash filter */ - struct rte_eth_hash_global_conf global_conf; - /** Global configurations of hash filter input set */ - struct rte_eth_input_set_conf input_set_conf; - } info; -}; - -/** - * l2 tunnel configuration. - */ -struct rte_eth_l2_tunnel_conf { - enum rte_eth_tunnel_type l2_tunnel_type; - uint16_t ether_type; /* ether type in l2 header */ - uint32_t tunnel_id; /* port tag id for e-tag */ - uint16_t vf_id; /* VF id for tag insertion */ - uint32_t pool; /* destination pool for tag based forwarding */ -}; - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_ETH_CTRL_H_ */ diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c deleted file mode 100644 index 0590f0c1..00000000 --- a/lib/librte_ether/rte_ethdev.c +++ /dev/null @@ -1,4170 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2017 Intel Corporation - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rte_ether.h" -#include "rte_ethdev.h" -#include "rte_ethdev_driver.h" -#include "ethdev_profile.h" - -static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; -struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; -static uint8_t eth_dev_last_created_port; - -/* spinlock for eth device callbacks */ -static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER; - -/* spinlock for add/remove rx callbacks */ -static rte_spinlock_t rte_eth_rx_cb_lock = RTE_SPINLOCK_INITIALIZER; - -/* spinlock for add/remove tx callbacks */ -static rte_spinlock_t rte_eth_tx_cb_lock = RTE_SPINLOCK_INITIALIZER; - -/* spinlock for shared data allocation */ -static rte_spinlock_t rte_eth_shared_data_lock = RTE_SPINLOCK_INITIALIZER; - -/* store statistics names and its offset in stats structure */ -struct rte_eth_xstats_name_off { - char name[RTE_ETH_XSTATS_NAME_SIZE]; - unsigned offset; -}; - -/* Shared memory between primary and secondary processes. */ -static struct { - uint64_t next_owner_id; - rte_spinlock_t ownership_lock; - struct rte_eth_dev_data data[RTE_MAX_ETHPORTS]; -} *rte_eth_dev_shared_data; - -static const struct rte_eth_xstats_name_off rte_stats_strings[] = { - {"rx_good_packets", offsetof(struct rte_eth_stats, ipackets)}, - {"tx_good_packets", offsetof(struct rte_eth_stats, opackets)}, - {"rx_good_bytes", offsetof(struct rte_eth_stats, ibytes)}, - {"tx_good_bytes", offsetof(struct rte_eth_stats, obytes)}, - {"rx_missed_errors", offsetof(struct rte_eth_stats, imissed)}, - {"rx_errors", offsetof(struct rte_eth_stats, ierrors)}, - {"tx_errors", offsetof(struct rte_eth_stats, oerrors)}, - {"rx_mbuf_allocation_errors", offsetof(struct rte_eth_stats, - rx_nombuf)}, -}; - -#define RTE_NB_STATS (sizeof(rte_stats_strings) / sizeof(rte_stats_strings[0])) - -static const struct rte_eth_xstats_name_off rte_rxq_stats_strings[] = { - {"packets", offsetof(struct rte_eth_stats, q_ipackets)}, - {"bytes", offsetof(struct rte_eth_stats, q_ibytes)}, - {"errors", offsetof(struct rte_eth_stats, q_errors)}, -}; - -#define RTE_NB_RXQ_STATS (sizeof(rte_rxq_stats_strings) / \ - sizeof(rte_rxq_stats_strings[0])) - -static const struct rte_eth_xstats_name_off rte_txq_stats_strings[] = { - {"packets", offsetof(struct rte_eth_stats, q_opackets)}, - {"bytes", offsetof(struct rte_eth_stats, q_obytes)}, -}; -#define RTE_NB_TXQ_STATS (sizeof(rte_txq_stats_strings) / \ - sizeof(rte_txq_stats_strings[0])) - -#define RTE_RX_OFFLOAD_BIT2STR(_name) \ - { DEV_RX_OFFLOAD_##_name, #_name } - -static const struct { - uint64_t offload; - const char *name; -} rte_rx_offload_names[] = { - RTE_RX_OFFLOAD_BIT2STR(VLAN_STRIP), - RTE_RX_OFFLOAD_BIT2STR(IPV4_CKSUM), - RTE_RX_OFFLOAD_BIT2STR(UDP_CKSUM), - RTE_RX_OFFLOAD_BIT2STR(TCP_CKSUM), - RTE_RX_OFFLOAD_BIT2STR(TCP_LRO), - RTE_RX_OFFLOAD_BIT2STR(QINQ_STRIP), - RTE_RX_OFFLOAD_BIT2STR(OUTER_IPV4_CKSUM), - RTE_RX_OFFLOAD_BIT2STR(MACSEC_STRIP), - RTE_RX_OFFLOAD_BIT2STR(HEADER_SPLIT), - RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER), - RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND), - RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME), - RTE_RX_OFFLOAD_BIT2STR(CRC_STRIP), - RTE_RX_OFFLOAD_BIT2STR(SCATTER), - RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP), - RTE_RX_OFFLOAD_BIT2STR(SECURITY), -}; - -#undef RTE_RX_OFFLOAD_BIT2STR - -#define RTE_TX_OFFLOAD_BIT2STR(_name) \ - { DEV_TX_OFFLOAD_##_name, #_name } - -static const struct { - uint64_t offload; - const char *name; -} rte_tx_offload_names[] = { - RTE_TX_OFFLOAD_BIT2STR(VLAN_INSERT), - RTE_TX_OFFLOAD_BIT2STR(IPV4_CKSUM), - RTE_TX_OFFLOAD_BIT2STR(UDP_CKSUM), - RTE_TX_OFFLOAD_BIT2STR(TCP_CKSUM), - RTE_TX_OFFLOAD_BIT2STR(SCTP_CKSUM), - RTE_TX_OFFLOAD_BIT2STR(TCP_TSO), - RTE_TX_OFFLOAD_BIT2STR(UDP_TSO), - RTE_TX_OFFLOAD_BIT2STR(OUTER_IPV4_CKSUM), - RTE_TX_OFFLOAD_BIT2STR(QINQ_INSERT), - RTE_TX_OFFLOAD_BIT2STR(VXLAN_TNL_TSO), - RTE_TX_OFFLOAD_BIT2STR(GRE_TNL_TSO), - RTE_TX_OFFLOAD_BIT2STR(IPIP_TNL_TSO), - RTE_TX_OFFLOAD_BIT2STR(GENEVE_TNL_TSO), - RTE_TX_OFFLOAD_BIT2STR(MACSEC_INSERT), - RTE_TX_OFFLOAD_BIT2STR(MT_LOCKFREE), - RTE_TX_OFFLOAD_BIT2STR(MULTI_SEGS), - RTE_TX_OFFLOAD_BIT2STR(MBUF_FAST_FREE), - RTE_TX_OFFLOAD_BIT2STR(SECURITY), -}; - -#undef RTE_TX_OFFLOAD_BIT2STR - -/** - * The user application callback description. - * - * It contains callback address to be registered by user application, - * the pointer to the parameters for callback, and the event type. - */ -struct rte_eth_dev_callback { - TAILQ_ENTRY(rte_eth_dev_callback) next; /**< Callbacks list */ - rte_eth_dev_cb_fn cb_fn; /**< Callback address */ - void *cb_arg; /**< Parameter for callback */ - void *ret_param; /**< Return parameter */ - enum rte_eth_event_type event; /**< Interrupt event type */ - uint32_t active; /**< Callback is executing */ -}; - -enum { - STAT_QMAP_TX = 0, - STAT_QMAP_RX -}; - -uint16_t -rte_eth_find_next(uint16_t port_id) -{ - while (port_id < RTE_MAX_ETHPORTS && - rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED && - rte_eth_devices[port_id].state != RTE_ETH_DEV_REMOVED) - port_id++; - - if (port_id >= RTE_MAX_ETHPORTS) - return RTE_MAX_ETHPORTS; - - return port_id; -} - -static void -rte_eth_dev_shared_data_prepare(void) -{ - const unsigned flags = 0; - const struct rte_memzone *mz; - - rte_spinlock_lock(&rte_eth_shared_data_lock); - - if (rte_eth_dev_shared_data == NULL) { - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - /* Allocate port data and ownership shared memory. */ - mz = rte_memzone_reserve(MZ_RTE_ETH_DEV_DATA, - sizeof(*rte_eth_dev_shared_data), - rte_socket_id(), flags); - } else - mz = rte_memzone_lookup(MZ_RTE_ETH_DEV_DATA); - if (mz == NULL) - rte_panic("Cannot allocate ethdev shared data\n"); - - rte_eth_dev_shared_data = mz->addr; - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - rte_eth_dev_shared_data->next_owner_id = - RTE_ETH_DEV_NO_OWNER + 1; - rte_spinlock_init(&rte_eth_dev_shared_data->ownership_lock); - memset(rte_eth_dev_shared_data->data, 0, - sizeof(rte_eth_dev_shared_data->data)); - } - } - - rte_spinlock_unlock(&rte_eth_shared_data_lock); -} - -struct rte_eth_dev * -rte_eth_dev_allocated(const char *name) -{ - unsigned i; - - for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - if ((rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED) && - strcmp(rte_eth_devices[i].data->name, name) == 0) - return &rte_eth_devices[i]; - } - return NULL; -} - -static uint16_t -rte_eth_dev_find_free_port(void) -{ - unsigned i; - - for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - /* Using shared name field to find a free port. */ - if (rte_eth_dev_shared_data->data[i].name[0] == '\0') { - RTE_ASSERT(rte_eth_devices[i].state == - RTE_ETH_DEV_UNUSED); - return i; - } - } - return RTE_MAX_ETHPORTS; -} - -static struct rte_eth_dev * -eth_dev_get(uint16_t port_id) -{ - struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id]; - - eth_dev->data = &rte_eth_dev_shared_data->data[port_id]; - eth_dev->state = RTE_ETH_DEV_ATTACHED; - - eth_dev_last_created_port = port_id; - - return eth_dev; -} - -struct rte_eth_dev * -rte_eth_dev_allocate(const char *name) -{ - uint16_t port_id; - struct rte_eth_dev *eth_dev = NULL; - - rte_eth_dev_shared_data_prepare(); - - /* Synchronize port creation between primary and secondary threads. */ - rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); - - port_id = rte_eth_dev_find_free_port(); - if (port_id == RTE_MAX_ETHPORTS) { - RTE_LOG(ERR, EAL, "Reached maximum number of Ethernet ports\n"); - goto unlock; - } - - if (rte_eth_dev_allocated(name) != NULL) { - RTE_LOG(ERR, EAL, "Ethernet Device with name %s already allocated!\n", - name); - goto unlock; - } - - eth_dev = eth_dev_get(port_id); - snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name); - eth_dev->data->port_id = port_id; - eth_dev->data->mtu = ETHER_MTU; - -unlock: - rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); - - if (eth_dev != NULL) - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_NEW, NULL); - - return eth_dev; -} - -/* - * Attach to a port already registered by the primary process, which - * makes sure that the same device would have the same port id both - * in the primary and secondary process. - */ -struct rte_eth_dev * -rte_eth_dev_attach_secondary(const char *name) -{ - uint16_t i; - struct rte_eth_dev *eth_dev = NULL; - - rte_eth_dev_shared_data_prepare(); - - /* Synchronize port attachment to primary port creation and release. */ - rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); - - for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - if (strcmp(rte_eth_dev_shared_data->data[i].name, name) == 0) - break; - } - if (i == RTE_MAX_ETHPORTS) { - RTE_PMD_DEBUG_TRACE( - "device %s is not driven by the primary process\n", - name); - } else { - eth_dev = eth_dev_get(i); - RTE_ASSERT(eth_dev->data->port_id == i); - } - - rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); - return eth_dev; -} - -int -rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) -{ - if (eth_dev == NULL) - return -EINVAL; - - rte_eth_dev_shared_data_prepare(); - - rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); - - eth_dev->state = RTE_ETH_DEV_UNUSED; - - memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data)); - - rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); - - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL); - - return 0; -} - -int -rte_eth_dev_is_valid_port(uint16_t port_id) -{ - if (port_id >= RTE_MAX_ETHPORTS || - (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)) - return 0; - else - return 1; -} - -static int -rte_eth_is_valid_owner_id(uint64_t owner_id) -{ - if (owner_id == RTE_ETH_DEV_NO_OWNER || - rte_eth_dev_shared_data->next_owner_id <= owner_id) { - RTE_PMD_DEBUG_TRACE("Invalid owner_id=%016lX.\n", owner_id); - return 0; - } - return 1; -} - -uint64_t __rte_experimental -rte_eth_find_next_owned_by(uint16_t port_id, const uint64_t owner_id) -{ - while (port_id < RTE_MAX_ETHPORTS && - ((rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED && - rte_eth_devices[port_id].state != RTE_ETH_DEV_REMOVED) || - rte_eth_devices[port_id].data->owner.id != owner_id)) - port_id++; - - if (port_id >= RTE_MAX_ETHPORTS) - return RTE_MAX_ETHPORTS; - - return port_id; -} - -int __rte_experimental -rte_eth_dev_owner_new(uint64_t *owner_id) -{ - rte_eth_dev_shared_data_prepare(); - - rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); - - *owner_id = rte_eth_dev_shared_data->next_owner_id++; - - rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); - return 0; -} - -static int -_rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id, - const struct rte_eth_dev_owner *new_owner) -{ - struct rte_eth_dev_owner *port_owner; - int sret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - if (!rte_eth_is_valid_owner_id(new_owner->id) && - !rte_eth_is_valid_owner_id(old_owner_id)) - return -EINVAL; - - port_owner = &rte_eth_devices[port_id].data->owner; - if (port_owner->id != old_owner_id) { - RTE_PMD_DEBUG_TRACE("Cannot set owner to port %d already owned" - " by %s_%016lX.\n", port_id, - port_owner->name, port_owner->id); - return -EPERM; - } - - sret = snprintf(port_owner->name, RTE_ETH_MAX_OWNER_NAME_LEN, "%s", - new_owner->name); - if (sret < 0 || sret >= RTE_ETH_MAX_OWNER_NAME_LEN) - RTE_PMD_DEBUG_TRACE("Port %d owner name was truncated.\n", - port_id); - - port_owner->id = new_owner->id; - - RTE_PMD_DEBUG_TRACE("Port %d owner is %s_%016lX.\n", port_id, - new_owner->name, new_owner->id); - - return 0; -} - -int __rte_experimental -rte_eth_dev_owner_set(const uint16_t port_id, - const struct rte_eth_dev_owner *owner) -{ - int ret; - - rte_eth_dev_shared_data_prepare(); - - rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); - - ret = _rte_eth_dev_owner_set(port_id, RTE_ETH_DEV_NO_OWNER, owner); - - rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); - return ret; -} - -int __rte_experimental -rte_eth_dev_owner_unset(const uint16_t port_id, const uint64_t owner_id) -{ - const struct rte_eth_dev_owner new_owner = (struct rte_eth_dev_owner) - {.id = RTE_ETH_DEV_NO_OWNER, .name = ""}; - int ret; - - rte_eth_dev_shared_data_prepare(); - - rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); - - ret = _rte_eth_dev_owner_set(port_id, owner_id, &new_owner); - - rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); - return ret; -} - -void __rte_experimental -rte_eth_dev_owner_delete(const uint64_t owner_id) -{ - uint16_t port_id; - - rte_eth_dev_shared_data_prepare(); - - rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); - - if (rte_eth_is_valid_owner_id(owner_id)) { - RTE_ETH_FOREACH_DEV_OWNED_BY(port_id, owner_id) - memset(&rte_eth_devices[port_id].data->owner, 0, - sizeof(struct rte_eth_dev_owner)); - RTE_PMD_DEBUG_TRACE("All port owners owned by %016X identifier" - " have removed.\n", owner_id); - } - - rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); -} - -int __rte_experimental -rte_eth_dev_owner_get(const uint16_t port_id, struct rte_eth_dev_owner *owner) -{ - int ret = 0; - - rte_eth_dev_shared_data_prepare(); - - rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); - - if (!rte_eth_dev_is_valid_port(port_id)) { - RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); - ret = -ENODEV; - } else { - rte_memcpy(owner, &rte_eth_devices[port_id].data->owner, - sizeof(*owner)); - } - - rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); - return ret; -} - -int -rte_eth_dev_socket_id(uint16_t port_id) -{ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1); - return rte_eth_devices[port_id].data->numa_node; -} - -void * -rte_eth_dev_get_sec_ctx(uint8_t port_id) -{ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); - return rte_eth_devices[port_id].security_ctx; -} - -uint16_t -rte_eth_dev_count(void) -{ - uint16_t p; - uint16_t count; - - count = 0; - - RTE_ETH_FOREACH_DEV(p) - count++; - - return count; -} - -int -rte_eth_dev_get_name_by_port(uint16_t port_id, char *name) -{ - char *tmp; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - if (name == NULL) { - RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); - return -EINVAL; - } - - /* shouldn't check 'rte_eth_devices[i].data', - * because it might be overwritten by VDEV PMD */ - tmp = rte_eth_dev_shared_data->data[port_id].name; - strcpy(name, tmp); - return 0; -} - -int -rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id) -{ - uint32_t pid; - - if (name == NULL) { - RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); - return -EINVAL; - } - - for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) { - if (rte_eth_devices[pid].state != RTE_ETH_DEV_UNUSED && - !strncmp(name, rte_eth_dev_shared_data->data[pid].name, - strlen(name))) { - *port_id = pid; - return 0; - } - } - - return -ENODEV; -} - -static int -eth_err(uint16_t port_id, int ret) -{ - if (ret == 0) - return 0; - if (rte_eth_dev_is_removed(port_id)) - return -EIO; - return ret; -} - -/* attach the new device, then store port_id of the device */ -int -rte_eth_dev_attach(const char *devargs, uint16_t *port_id) -{ - int ret = -1; - int current = rte_eth_dev_count(); - char *name = NULL; - char *args = NULL; - - if ((devargs == NULL) || (port_id == NULL)) { - ret = -EINVAL; - goto err; - } - - /* parse devargs, then retrieve device name and args */ - if (rte_eal_parse_devargs_str(devargs, &name, &args)) - goto err; - - ret = rte_eal_dev_attach(name, args); - if (ret < 0) - goto err; - - /* no point looking at the port count if no port exists */ - if (!rte_eth_dev_count()) { - RTE_LOG(ERR, EAL, "No port found for device (%s)\n", name); - ret = -1; - goto err; - } - - /* if nothing happened, there is a bug here, since some driver told us - * it did attach a device, but did not create a port. - */ - if (current == rte_eth_dev_count()) { - ret = -1; - goto err; - } - - *port_id = eth_dev_last_created_port; - ret = 0; - -err: - free(name); - free(args); - return ret; -} - -/* detach the device, then store the name of the device */ -int -rte_eth_dev_detach(uint16_t port_id, char *name) -{ - uint32_t dev_flags; - int ret = -1; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - if (name == NULL) { - ret = -EINVAL; - goto err; - } - - dev_flags = rte_eth_devices[port_id].data->dev_flags; - if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) { - RTE_LOG(ERR, EAL, "Port %" PRIu16 " is bonded, cannot detach\n", - port_id); - ret = -ENOTSUP; - goto err; - } - - snprintf(name, sizeof(rte_eth_devices[port_id].data->name), - "%s", rte_eth_devices[port_id].data->name); - - ret = rte_eal_dev_detach(rte_eth_devices[port_id].device); - if (ret < 0) - goto err; - - rte_eth_dev_release_port(&rte_eth_devices[port_id]); - return 0; - -err: - return ret; -} - -static int -rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) -{ - uint16_t old_nb_queues = dev->data->nb_rx_queues; - void **rxq; - unsigned i; - - if (dev->data->rx_queues == NULL && nb_queues != 0) { /* first time configuration */ - dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues", - sizeof(dev->data->rx_queues[0]) * nb_queues, - RTE_CACHE_LINE_SIZE); - if (dev->data->rx_queues == NULL) { - dev->data->nb_rx_queues = 0; - return -(ENOMEM); - } - } else if (dev->data->rx_queues != NULL && nb_queues != 0) { /* re-configure */ - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP); - - rxq = dev->data->rx_queues; - - for (i = nb_queues; i < old_nb_queues; i++) - (*dev->dev_ops->rx_queue_release)(rxq[i]); - rxq = rte_realloc(rxq, sizeof(rxq[0]) * nb_queues, - RTE_CACHE_LINE_SIZE); - if (rxq == NULL) - return -(ENOMEM); - if (nb_queues > old_nb_queues) { - uint16_t new_qs = nb_queues - old_nb_queues; - - memset(rxq + old_nb_queues, 0, - sizeof(rxq[0]) * new_qs); - } - - dev->data->rx_queues = rxq; - - } else if (dev->data->rx_queues != NULL && nb_queues == 0) { - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP); - - rxq = dev->data->rx_queues; - - for (i = nb_queues; i < old_nb_queues; i++) - (*dev->dev_ops->rx_queue_release)(rxq[i]); - - rte_free(dev->data->rx_queues); - dev->data->rx_queues = NULL; - } - dev->data->nb_rx_queues = nb_queues; - return 0; -} - -int -rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - if (rx_queue_id >= dev->data->nb_rx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP); - - if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { - RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 - " already started\n", - rx_queue_id, port_id); - return 0; - } - - return eth_err(port_id, dev->dev_ops->rx_queue_start(dev, - rx_queue_id)); - -} - -int -rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - if (rx_queue_id >= dev->data->nb_rx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP); - - if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { - RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 - " already stopped\n", - rx_queue_id, port_id); - return 0; - } - - return eth_err(port_id, dev->dev_ops->rx_queue_stop(dev, rx_queue_id)); - -} - -int -rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - if (tx_queue_id >= dev->data->nb_tx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP); - - if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { - RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 - " already started\n", - tx_queue_id, port_id); - return 0; - } - - return eth_err(port_id, dev->dev_ops->tx_queue_start(dev, - tx_queue_id)); - -} - -int -rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - if (tx_queue_id >= dev->data->nb_tx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP); - - if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { - RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 - " already stopped\n", - tx_queue_id, port_id); - return 0; - } - - return eth_err(port_id, dev->dev_ops->tx_queue_stop(dev, tx_queue_id)); - -} - -static int -rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) -{ - uint16_t old_nb_queues = dev->data->nb_tx_queues; - void **txq; - unsigned i; - - if (dev->data->tx_queues == NULL && nb_queues != 0) { /* first time configuration */ - dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues", - sizeof(dev->data->tx_queues[0]) * nb_queues, - RTE_CACHE_LINE_SIZE); - if (dev->data->tx_queues == NULL) { - dev->data->nb_tx_queues = 0; - return -(ENOMEM); - } - } else if (dev->data->tx_queues != NULL && nb_queues != 0) { /* re-configure */ - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP); - - txq = dev->data->tx_queues; - - for (i = nb_queues; i < old_nb_queues; i++) - (*dev->dev_ops->tx_queue_release)(txq[i]); - txq = rte_realloc(txq, sizeof(txq[0]) * nb_queues, - RTE_CACHE_LINE_SIZE); - if (txq == NULL) - return -ENOMEM; - if (nb_queues > old_nb_queues) { - uint16_t new_qs = nb_queues - old_nb_queues; - - memset(txq + old_nb_queues, 0, - sizeof(txq[0]) * new_qs); - } - - dev->data->tx_queues = txq; - - } else if (dev->data->tx_queues != NULL && nb_queues == 0) { - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP); - - txq = dev->data->tx_queues; - - for (i = nb_queues; i < old_nb_queues; i++) - (*dev->dev_ops->tx_queue_release)(txq[i]); - - rte_free(dev->data->tx_queues); - dev->data->tx_queues = NULL; - } - dev->data->nb_tx_queues = nb_queues; - return 0; -} - -uint32_t -rte_eth_speed_bitflag(uint32_t speed, int duplex) -{ - switch (speed) { - case ETH_SPEED_NUM_10M: - return duplex ? ETH_LINK_SPEED_10M : ETH_LINK_SPEED_10M_HD; - case ETH_SPEED_NUM_100M: - return duplex ? ETH_LINK_SPEED_100M : ETH_LINK_SPEED_100M_HD; - case ETH_SPEED_NUM_1G: - return ETH_LINK_SPEED_1G; - case ETH_SPEED_NUM_2_5G: - return ETH_LINK_SPEED_2_5G; - case ETH_SPEED_NUM_5G: - return ETH_LINK_SPEED_5G; - case ETH_SPEED_NUM_10G: - return ETH_LINK_SPEED_10G; - case ETH_SPEED_NUM_20G: - return ETH_LINK_SPEED_20G; - case ETH_SPEED_NUM_25G: - return ETH_LINK_SPEED_25G; - case ETH_SPEED_NUM_40G: - return ETH_LINK_SPEED_40G; - case ETH_SPEED_NUM_50G: - return ETH_LINK_SPEED_50G; - case ETH_SPEED_NUM_56G: - return ETH_LINK_SPEED_56G; - case ETH_SPEED_NUM_100G: - return ETH_LINK_SPEED_100G; - default: - return 0; - } -} - -/** - * A conversion function from rxmode bitfield API. - */ -static void -rte_eth_convert_rx_offload_bitfield(const struct rte_eth_rxmode *rxmode, - uint64_t *rx_offloads) -{ - uint64_t offloads = 0; - - if (rxmode->header_split == 1) - offloads |= DEV_RX_OFFLOAD_HEADER_SPLIT; - if (rxmode->hw_ip_checksum == 1) - offloads |= DEV_RX_OFFLOAD_CHECKSUM; - if (rxmode->hw_vlan_filter == 1) - offloads |= DEV_RX_OFFLOAD_VLAN_FILTER; - if (rxmode->hw_vlan_strip == 1) - offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; - if (rxmode->hw_vlan_extend == 1) - offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND; - if (rxmode->jumbo_frame == 1) - offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; - if (rxmode->hw_strip_crc == 1) - offloads |= DEV_RX_OFFLOAD_CRC_STRIP; - if (rxmode->enable_scatter == 1) - offloads |= DEV_RX_OFFLOAD_SCATTER; - if (rxmode->enable_lro == 1) - offloads |= DEV_RX_OFFLOAD_TCP_LRO; - if (rxmode->hw_timestamp == 1) - offloads |= DEV_RX_OFFLOAD_TIMESTAMP; - if (rxmode->security == 1) - offloads |= DEV_RX_OFFLOAD_SECURITY; - - *rx_offloads = offloads; -} - -/** - * A conversion function from rxmode offloads API. - */ -static void -rte_eth_convert_rx_offloads(const uint64_t rx_offloads, - struct rte_eth_rxmode *rxmode) -{ - - if (rx_offloads & DEV_RX_OFFLOAD_HEADER_SPLIT) - rxmode->header_split = 1; - else - rxmode->header_split = 0; - if (rx_offloads & DEV_RX_OFFLOAD_CHECKSUM) - rxmode->hw_ip_checksum = 1; - else - rxmode->hw_ip_checksum = 0; - if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) - rxmode->hw_vlan_filter = 1; - else - rxmode->hw_vlan_filter = 0; - if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) - rxmode->hw_vlan_strip = 1; - else - rxmode->hw_vlan_strip = 0; - if (rx_offloads & DEV_RX_OFFLOAD_VLAN_EXTEND) - rxmode->hw_vlan_extend = 1; - else - rxmode->hw_vlan_extend = 0; - if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) - rxmode->jumbo_frame = 1; - else - rxmode->jumbo_frame = 0; - if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP) - rxmode->hw_strip_crc = 1; - else - rxmode->hw_strip_crc = 0; - if (rx_offloads & DEV_RX_OFFLOAD_SCATTER) - rxmode->enable_scatter = 1; - else - rxmode->enable_scatter = 0; - if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) - rxmode->enable_lro = 1; - else - rxmode->enable_lro = 0; - if (rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) - rxmode->hw_timestamp = 1; - else - rxmode->hw_timestamp = 0; - if (rx_offloads & DEV_RX_OFFLOAD_SECURITY) - rxmode->security = 1; - else - rxmode->security = 0; -} - -const char * __rte_experimental -rte_eth_dev_rx_offload_name(uint64_t offload) -{ - const char *name = "UNKNOWN"; - unsigned int i; - - for (i = 0; i < RTE_DIM(rte_rx_offload_names); ++i) { - if (offload == rte_rx_offload_names[i].offload) { - name = rte_rx_offload_names[i].name; - break; - } - } - - return name; -} - -const char * __rte_experimental -rte_eth_dev_tx_offload_name(uint64_t offload) -{ - const char *name = "UNKNOWN"; - unsigned int i; - - for (i = 0; i < RTE_DIM(rte_tx_offload_names); ++i) { - if (offload == rte_tx_offload_names[i].offload) { - name = rte_tx_offload_names[i].name; - break; - } - } - - return name; -} - -int -rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, - const struct rte_eth_conf *dev_conf) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - struct rte_eth_conf local_conf = *dev_conf; - int diag; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) { - RTE_PMD_DEBUG_TRACE( - "Number of RX queues requested (%u) is greater than max supported(%d)\n", - nb_rx_q, RTE_MAX_QUEUES_PER_PORT); - return -EINVAL; - } - - if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) { - RTE_PMD_DEBUG_TRACE( - "Number of TX queues requested (%u) is greater than max supported(%d)\n", - nb_tx_q, RTE_MAX_QUEUES_PER_PORT); - return -EINVAL; - } - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP); - - if (dev->data->dev_started) { - RTE_PMD_DEBUG_TRACE( - "port %d must be stopped to allow configuration\n", port_id); - return -EBUSY; - } - - /* - * Convert between the offloads API to enable PMDs to support - * only one of them. - */ - if (dev_conf->rxmode.ignore_offload_bitfield == 0) { - rte_eth_convert_rx_offload_bitfield( - &dev_conf->rxmode, &local_conf.rxmode.offloads); - } else { - rte_eth_convert_rx_offloads(dev_conf->rxmode.offloads, - &local_conf.rxmode); - } - - /* Copy the dev_conf parameter into the dev structure */ - memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf)); - - /* - * Check that the numbers of RX and TX queues are not greater - * than the maximum number of RX and TX queues supported by the - * configured device. - */ - (*dev->dev_ops->dev_infos_get)(dev, &dev_info); - - if (nb_rx_q == 0 && nb_tx_q == 0) { - RTE_PMD_DEBUG_TRACE("ethdev port_id=%d both rx and tx queue cannot be 0\n", port_id); - return -EINVAL; - } - - if (nb_rx_q > dev_info.max_rx_queues) { - RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d\n", - port_id, nb_rx_q, dev_info.max_rx_queues); - return -EINVAL; - } - - if (nb_tx_q > dev_info.max_tx_queues) { - RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d\n", - port_id, nb_tx_q, dev_info.max_tx_queues); - return -EINVAL; - } - - /* Check that the device supports requested interrupts */ - if ((dev_conf->intr_conf.lsc == 1) && - (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) { - RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n", - dev->device->driver->name); - return -EINVAL; - } - if ((dev_conf->intr_conf.rmv == 1) && - (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) { - RTE_PMD_DEBUG_TRACE("driver %s does not support rmv\n", - dev->device->driver->name); - return -EINVAL; - } - - /* - * If jumbo frames are enabled, check that the maximum RX packet - * length is supported by the configured device. - */ - if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { - if (dev_conf->rxmode.max_rx_pkt_len > - dev_info.max_rx_pktlen) { - RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u" - " > max valid value %u\n", - port_id, - (unsigned)dev_conf->rxmode.max_rx_pkt_len, - (unsigned)dev_info.max_rx_pktlen); - return -EINVAL; - } else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) { - RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u" - " < min valid value %u\n", - port_id, - (unsigned)dev_conf->rxmode.max_rx_pkt_len, - (unsigned)ETHER_MIN_LEN); - return -EINVAL; - } - } else { - if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN || - dev_conf->rxmode.max_rx_pkt_len > ETHER_MAX_LEN) - /* Use default value */ - dev->data->dev_conf.rxmode.max_rx_pkt_len = - ETHER_MAX_LEN; - } - - /* - * Setup new number of RX/TX queues and reconfigure device. - */ - diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q); - if (diag != 0) { - RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d\n", - port_id, diag); - return diag; - } - - diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q); - if (diag != 0) { - RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d\n", - port_id, diag); - rte_eth_dev_rx_queue_config(dev, 0); - return diag; - } - - diag = (*dev->dev_ops->dev_configure)(dev); - if (diag != 0) { - RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d\n", - port_id, diag); - rte_eth_dev_rx_queue_config(dev, 0); - rte_eth_dev_tx_queue_config(dev, 0); - return eth_err(port_id, diag); - } - - /* Initialize Rx profiling if enabled at compilation time. */ - diag = __rte_eth_profile_rx_init(port_id, dev); - if (diag != 0) { - RTE_PMD_DEBUG_TRACE("port%d __rte_eth_profile_rx_init = %d\n", - port_id, diag); - rte_eth_dev_rx_queue_config(dev, 0); - rte_eth_dev_tx_queue_config(dev, 0); - return eth_err(port_id, diag); - } - - return 0; -} - -void -_rte_eth_dev_reset(struct rte_eth_dev *dev) -{ - if (dev->data->dev_started) { - RTE_PMD_DEBUG_TRACE( - "port %d must be stopped to allow reset\n", - dev->data->port_id); - return; - } - - rte_eth_dev_rx_queue_config(dev, 0); - rte_eth_dev_tx_queue_config(dev, 0); - - memset(&dev->data->dev_conf, 0, sizeof(dev->data->dev_conf)); -} - -static void -rte_eth_dev_config_restore(uint16_t port_id) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - struct ether_addr *addr; - uint16_t i; - uint32_t pool = 0; - uint64_t pool_mask; - - dev = &rte_eth_devices[port_id]; - - rte_eth_dev_info_get(port_id, &dev_info); - - /* replay MAC address configuration including default MAC */ - addr = &dev->data->mac_addrs[0]; - if (*dev->dev_ops->mac_addr_set != NULL) - (*dev->dev_ops->mac_addr_set)(dev, addr); - else if (*dev->dev_ops->mac_addr_add != NULL) - (*dev->dev_ops->mac_addr_add)(dev, addr, 0, pool); - - if (*dev->dev_ops->mac_addr_add != NULL) { - for (i = 1; i < dev_info.max_mac_addrs; i++) { - addr = &dev->data->mac_addrs[i]; - - /* skip zero address */ - if (is_zero_ether_addr(addr)) - continue; - - pool = 0; - pool_mask = dev->data->mac_pool_sel[i]; - - do { - if (pool_mask & 1ULL) - (*dev->dev_ops->mac_addr_add)(dev, - addr, i, pool); - pool_mask >>= 1; - pool++; - } while (pool_mask); - } - } - - /* replay promiscuous configuration */ - if (rte_eth_promiscuous_get(port_id) == 1) - rte_eth_promiscuous_enable(port_id); - else if (rte_eth_promiscuous_get(port_id) == 0) - rte_eth_promiscuous_disable(port_id); - - /* replay all multicast configuration */ - if (rte_eth_allmulticast_get(port_id) == 1) - rte_eth_allmulticast_enable(port_id); - else if (rte_eth_allmulticast_get(port_id) == 0) - rte_eth_allmulticast_disable(port_id); -} - -int -rte_eth_dev_start(uint16_t port_id) -{ - struct rte_eth_dev *dev; - int diag; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP); - - if (dev->data->dev_started != 0) { - RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16 - " already started\n", - port_id); - return 0; - } - - diag = (*dev->dev_ops->dev_start)(dev); - if (diag == 0) - dev->data->dev_started = 1; - else - return eth_err(port_id, diag); - - rte_eth_dev_config_restore(port_id); - - if (dev->data->dev_conf.intr_conf.lsc == 0) { - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP); - (*dev->dev_ops->link_update)(dev, 0); - } - return 0; -} - -void -rte_eth_dev_stop(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop); - - if (dev->data->dev_started == 0) { - RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16 - " already stopped\n", - port_id); - return; - } - - dev->data->dev_started = 0; - (*dev->dev_ops->dev_stop)(dev); -} - -int -rte_eth_dev_set_link_up(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_up, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->dev_set_link_up)(dev)); -} - -int -rte_eth_dev_set_link_down(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_down, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->dev_set_link_down)(dev)); -} - -void -rte_eth_dev_close(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_close); - dev->data->dev_started = 0; - (*dev->dev_ops->dev_close)(dev); - - dev->data->nb_rx_queues = 0; - rte_free(dev->data->rx_queues); - dev->data->rx_queues = NULL; - dev->data->nb_tx_queues = 0; - rte_free(dev->data->tx_queues); - dev->data->tx_queues = NULL; -} - -int -rte_eth_dev_reset(uint16_t port_id) -{ - struct rte_eth_dev *dev; - int ret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_reset, -ENOTSUP); - - rte_eth_dev_stop(port_id); - ret = dev->dev_ops->dev_reset(dev); - - return eth_err(port_id, ret); -} - -int __rte_experimental -rte_eth_dev_is_removed(uint16_t port_id) -{ - struct rte_eth_dev *dev; - int ret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); - - dev = &rte_eth_devices[port_id]; - - if (dev->state == RTE_ETH_DEV_REMOVED) - return 1; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->is_removed, 0); - - ret = dev->dev_ops->is_removed(dev); - if (ret != 0) - /* Device is physically removed. */ - dev->state = RTE_ETH_DEV_REMOVED; - - return ret; -} - -int -rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, - uint16_t nb_rx_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mp) -{ - int ret; - uint32_t mbp_buf_size; - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - struct rte_eth_rxconf local_conf; - void **rxq; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - if (rx_queue_id >= dev->data->nb_rx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); - return -EINVAL; - } - - if (dev->data->dev_started) { - RTE_PMD_DEBUG_TRACE( - "port %d must be stopped to allow configuration\n", port_id); - return -EBUSY; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP); - - /* - * Check the size of the mbuf data buffer. - * This value must be provided in the private data of the memory pool. - * First check that the memory pool has a valid private data. - */ - rte_eth_dev_info_get(port_id, &dev_info); - if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) { - RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d\n", - mp->name, (int) mp->private_data_size, - (int) sizeof(struct rte_pktmbuf_pool_private)); - return -ENOSPC; - } - mbp_buf_size = rte_pktmbuf_data_room_size(mp); - - if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) { - RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d " - "(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)" - "=%d)\n", - mp->name, - (int)mbp_buf_size, - (int)(RTE_PKTMBUF_HEADROOM + - dev_info.min_rx_bufsize), - (int)RTE_PKTMBUF_HEADROOM, - (int)dev_info.min_rx_bufsize); - return -EINVAL; - } - - if (nb_rx_desc > dev_info.rx_desc_lim.nb_max || - nb_rx_desc < dev_info.rx_desc_lim.nb_min || - nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) { - - RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), " - "should be: <= %hu, = %hu, and a product of %hu\n", - nb_rx_desc, - dev_info.rx_desc_lim.nb_max, - dev_info.rx_desc_lim.nb_min, - dev_info.rx_desc_lim.nb_align); - return -EINVAL; - } - - rxq = dev->data->rx_queues; - if (rxq[rx_queue_id]) { - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, - -ENOTSUP); - (*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]); - rxq[rx_queue_id] = NULL; - } - - if (rx_conf == NULL) - rx_conf = &dev_info.default_rxconf; - - local_conf = *rx_conf; - if (dev->data->dev_conf.rxmode.ignore_offload_bitfield == 0) { - /** - * Reflect port offloads to queue offloads in order for - * offloads to not be discarded. - */ - rte_eth_convert_rx_offload_bitfield(&dev->data->dev_conf.rxmode, - &local_conf.offloads); - } - - ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc, - socket_id, &local_conf, mp); - if (!ret) { - if (!dev->data->min_rx_buf_size || - dev->data->min_rx_buf_size > mbp_buf_size) - dev->data->min_rx_buf_size = mbp_buf_size; - } - - return eth_err(port_id, ret); -} - -/** - * A conversion function from txq_flags API. - */ -static void -rte_eth_convert_txq_flags(const uint32_t txq_flags, uint64_t *tx_offloads) -{ - uint64_t offloads = 0; - - if (!(txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)) - offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; - if (!(txq_flags & ETH_TXQ_FLAGS_NOVLANOFFL)) - offloads |= DEV_TX_OFFLOAD_VLAN_INSERT; - if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP)) - offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM; - if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMUDP)) - offloads |= DEV_TX_OFFLOAD_UDP_CKSUM; - if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMTCP)) - offloads |= DEV_TX_OFFLOAD_TCP_CKSUM; - if ((txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT) && - (txq_flags & ETH_TXQ_FLAGS_NOMULTMEMP)) - offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; - - *tx_offloads = offloads; -} - -/** - * A conversion function from offloads API. - */ -static void -rte_eth_convert_txq_offloads(const uint64_t tx_offloads, uint32_t *txq_flags) -{ - uint32_t flags = 0; - - if (!(tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)) - flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - if (!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT)) - flags |= ETH_TXQ_FLAGS_NOVLANOFFL; - if (!(tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM)) - flags |= ETH_TXQ_FLAGS_NOXSUMSCTP; - if (!(tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) - flags |= ETH_TXQ_FLAGS_NOXSUMUDP; - if (!(tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)) - flags |= ETH_TXQ_FLAGS_NOXSUMTCP; - if (tx_offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) - flags |= (ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOMULTMEMP); - - *txq_flags = flags; -} - -int -rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, - uint16_t nb_tx_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - struct rte_eth_txconf local_conf; - void **txq; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - if (tx_queue_id >= dev->data->nb_tx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); - return -EINVAL; - } - - if (dev->data->dev_started) { - RTE_PMD_DEBUG_TRACE( - "port %d must be stopped to allow configuration\n", port_id); - return -EBUSY; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP); - - rte_eth_dev_info_get(port_id, &dev_info); - - if (nb_tx_desc > dev_info.tx_desc_lim.nb_max || - nb_tx_desc < dev_info.tx_desc_lim.nb_min || - nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) { - RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), " - "should be: <= %hu, = %hu, and a product of %hu\n", - nb_tx_desc, - dev_info.tx_desc_lim.nb_max, - dev_info.tx_desc_lim.nb_min, - dev_info.tx_desc_lim.nb_align); - return -EINVAL; - } - - txq = dev->data->tx_queues; - if (txq[tx_queue_id]) { - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, - -ENOTSUP); - (*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]); - txq[tx_queue_id] = NULL; - } - - if (tx_conf == NULL) - tx_conf = &dev_info.default_txconf; - - /* - * Convert between the offloads API to enable PMDs to support - * only one of them. - */ - local_conf = *tx_conf; - if (tx_conf->txq_flags & ETH_TXQ_FLAGS_IGNORE) { - rte_eth_convert_txq_offloads(tx_conf->offloads, - &local_conf.txq_flags); - /* Keep the ignore flag. */ - local_conf.txq_flags |= ETH_TXQ_FLAGS_IGNORE; - } else { - rte_eth_convert_txq_flags(tx_conf->txq_flags, - &local_conf.offloads); - } - - return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev, - tx_queue_id, nb_tx_desc, socket_id, &local_conf)); -} - -void -rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent, - void *userdata __rte_unused) -{ - unsigned i; - - for (i = 0; i < unsent; i++) - rte_pktmbuf_free(pkts[i]); -} - -void -rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, - void *userdata) -{ - uint64_t *count = userdata; - unsigned i; - - for (i = 0; i < unsent; i++) - rte_pktmbuf_free(pkts[i]); - - *count += unsent; -} - -int -rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer, - buffer_tx_error_fn cbfn, void *userdata) -{ - buffer->error_callback = cbfn; - buffer->error_userdata = userdata; - return 0; -} - -int -rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size) -{ - int ret = 0; - - if (buffer == NULL) - return -EINVAL; - - buffer->size = size; - if (buffer->error_callback == NULL) { - ret = rte_eth_tx_buffer_set_err_callback( - buffer, rte_eth_tx_buffer_drop_callback, NULL); - } - - return ret; -} - -int -rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - int ret; - - /* Validate Input Data. Bail if not valid or not supported. */ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_done_cleanup, -ENOTSUP); - - /* Call driver to free pending mbufs. */ - ret = (*dev->dev_ops->tx_done_cleanup)(dev->data->tx_queues[queue_id], - free_cnt); - return eth_err(port_id, ret); -} - -void -rte_eth_promiscuous_enable(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_enable); - (*dev->dev_ops->promiscuous_enable)(dev); - dev->data->promiscuous = 1; -} - -void -rte_eth_promiscuous_disable(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_disable); - dev->data->promiscuous = 0; - (*dev->dev_ops->promiscuous_disable)(dev); -} - -int -rte_eth_promiscuous_get(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - return dev->data->promiscuous; -} - -void -rte_eth_allmulticast_enable(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_enable); - (*dev->dev_ops->allmulticast_enable)(dev); - dev->data->all_multicast = 1; -} - -void -rte_eth_allmulticast_disable(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_disable); - dev->data->all_multicast = 0; - (*dev->dev_ops->allmulticast_disable)(dev); -} - -int -rte_eth_allmulticast_get(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - return dev->data->all_multicast; -} - -static inline int -rte_eth_dev_atomic_read_link_status(struct rte_eth_dev *dev, - struct rte_eth_link *link) -{ - struct rte_eth_link *dst = link; - struct rte_eth_link *src = &(dev->data->dev_link); - - if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, - *(uint64_t *)src) == 0) - return -1; - - return 0; -} - -void -rte_eth_link_get(uint16_t port_id, struct rte_eth_link *eth_link) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - if (dev->data->dev_conf.intr_conf.lsc != 0) - rte_eth_dev_atomic_read_link_status(dev, eth_link); - else { - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); - (*dev->dev_ops->link_update)(dev, 1); - *eth_link = dev->data->dev_link; - } -} - -void -rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *eth_link) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - if (dev->data->dev_conf.intr_conf.lsc != 0) - rte_eth_dev_atomic_read_link_status(dev, eth_link); - else { - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); - (*dev->dev_ops->link_update)(dev, 0); - *eth_link = dev->data->dev_link; - } -} - -int -rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - memset(stats, 0, sizeof(*stats)); - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP); - stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; - return eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats)); -} - -int -rte_eth_stats_reset(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_reset, -ENOTSUP); - (*dev->dev_ops->stats_reset)(dev); - dev->data->rx_mbuf_alloc_failed = 0; - - return 0; -} - -static inline int -get_xstats_basic_count(struct rte_eth_dev *dev) -{ - uint16_t nb_rxqs, nb_txqs; - int count; - - nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); - nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); - - count = RTE_NB_STATS; - count += nb_rxqs * RTE_NB_RXQ_STATS; - count += nb_txqs * RTE_NB_TXQ_STATS; - - return count; -} - -static int -get_xstats_count(uint16_t port_id) -{ - struct rte_eth_dev *dev; - int count; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - dev = &rte_eth_devices[port_id]; - if (dev->dev_ops->xstats_get_names_by_id != NULL) { - count = (*dev->dev_ops->xstats_get_names_by_id)(dev, NULL, - NULL, 0); - if (count < 0) - return eth_err(port_id, count); - } - if (dev->dev_ops->xstats_get_names != NULL) { - count = (*dev->dev_ops->xstats_get_names)(dev, NULL, 0); - if (count < 0) - return eth_err(port_id, count); - } else - count = 0; - - - count += get_xstats_basic_count(dev); - - return count; -} - -int -rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name, - uint64_t *id) -{ - int cnt_xstats, idx_xstat; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - if (!id) { - RTE_PMD_DEBUG_TRACE("Error: id pointer is NULL\n"); - return -ENOMEM; - } - - if (!xstat_name) { - RTE_PMD_DEBUG_TRACE("Error: xstat_name pointer is NULL\n"); - return -ENOMEM; - } - - /* Get count */ - cnt_xstats = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL); - if (cnt_xstats < 0) { - RTE_PMD_DEBUG_TRACE("Error: Cannot get count of xstats\n"); - return -ENODEV; - } - - /* Get id-name lookup table */ - struct rte_eth_xstat_name xstats_names[cnt_xstats]; - - if (cnt_xstats != rte_eth_xstats_get_names_by_id( - port_id, xstats_names, cnt_xstats, NULL)) { - RTE_PMD_DEBUG_TRACE("Error: Cannot get xstats lookup\n"); - return -1; - } - - for (idx_xstat = 0; idx_xstat < cnt_xstats; idx_xstat++) { - if (!strcmp(xstats_names[idx_xstat].name, xstat_name)) { - *id = idx_xstat; - return 0; - }; - } - - return -EINVAL; -} - -/* retrieve basic stats names */ -static int -rte_eth_basic_stats_get_names(struct rte_eth_dev *dev, - struct rte_eth_xstat_name *xstats_names) -{ - int cnt_used_entries = 0; - uint32_t idx, id_queue; - uint16_t num_q; - - for (idx = 0; idx < RTE_NB_STATS; idx++) { - snprintf(xstats_names[cnt_used_entries].name, - sizeof(xstats_names[0].name), - "%s", rte_stats_strings[idx].name); - cnt_used_entries++; - } - num_q = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); - for (id_queue = 0; id_queue < num_q; id_queue++) { - for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) { - snprintf(xstats_names[cnt_used_entries].name, - sizeof(xstats_names[0].name), - "rx_q%u%s", - id_queue, rte_rxq_stats_strings[idx].name); - cnt_used_entries++; - } - - } - num_q = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); - for (id_queue = 0; id_queue < num_q; id_queue++) { - for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) { - snprintf(xstats_names[cnt_used_entries].name, - sizeof(xstats_names[0].name), - "tx_q%u%s", - id_queue, rte_txq_stats_strings[idx].name); - cnt_used_entries++; - } - } - return cnt_used_entries; -} - -/* retrieve ethdev extended statistics names */ -int -rte_eth_xstats_get_names_by_id(uint16_t port_id, - struct rte_eth_xstat_name *xstats_names, unsigned int size, - uint64_t *ids) -{ - struct rte_eth_xstat_name *xstats_names_copy; - unsigned int no_basic_stat_requested = 1; - unsigned int no_ext_stat_requested = 1; - unsigned int expected_entries; - unsigned int basic_count; - struct rte_eth_dev *dev; - unsigned int i; - int ret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - basic_count = get_xstats_basic_count(dev); - ret = get_xstats_count(port_id); - if (ret < 0) - return ret; - expected_entries = (unsigned int)ret; - - /* Return max number of stats if no ids given */ - if (!ids) { - if (!xstats_names) - return expected_entries; - else if (xstats_names && size < expected_entries) - return expected_entries; - } - - if (ids && !xstats_names) - return -EINVAL; - - if (ids && dev->dev_ops->xstats_get_names_by_id != NULL && size > 0) { - uint64_t ids_copy[size]; - - for (i = 0; i < size; i++) { - if (ids[i] < basic_count) { - no_basic_stat_requested = 0; - break; - } - - /* - * Convert ids to xstats ids that PMD knows. - * ids known by user are basic + extended stats. - */ - ids_copy[i] = ids[i] - basic_count; - } - - if (no_basic_stat_requested) - return (*dev->dev_ops->xstats_get_names_by_id)(dev, - xstats_names, ids_copy, size); - } - - /* Retrieve all stats */ - if (!ids) { - int num_stats = rte_eth_xstats_get_names(port_id, xstats_names, - expected_entries); - if (num_stats < 0 || num_stats > (int)expected_entries) - return num_stats; - else - return expected_entries; - } - - xstats_names_copy = calloc(expected_entries, - sizeof(struct rte_eth_xstat_name)); - - if (!xstats_names_copy) { - RTE_PMD_DEBUG_TRACE("ERROR: can't allocate memory"); - return -ENOMEM; - } - - if (ids) { - for (i = 0; i < size; i++) { - if (ids[i] >= basic_count) { - no_ext_stat_requested = 0; - break; - } - } - } - - /* Fill xstats_names_copy structure */ - if (ids && no_ext_stat_requested) { - rte_eth_basic_stats_get_names(dev, xstats_names_copy); - } else { - ret = rte_eth_xstats_get_names(port_id, xstats_names_copy, - expected_entries); - if (ret < 0) { - free(xstats_names_copy); - return ret; - } - } - - /* Filter stats */ - for (i = 0; i < size; i++) { - if (ids[i] >= expected_entries) { - RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n"); - free(xstats_names_copy); - return -1; - } - xstats_names[i] = xstats_names_copy[ids[i]]; - } - - free(xstats_names_copy); - return size; -} - -int -rte_eth_xstats_get_names(uint16_t port_id, - struct rte_eth_xstat_name *xstats_names, - unsigned int size) -{ - struct rte_eth_dev *dev; - int cnt_used_entries; - int cnt_expected_entries; - int cnt_driver_entries; - - cnt_expected_entries = get_xstats_count(port_id); - if (xstats_names == NULL || cnt_expected_entries < 0 || - (int)size < cnt_expected_entries) - return cnt_expected_entries; - - /* port_id checked in get_xstats_count() */ - dev = &rte_eth_devices[port_id]; - - cnt_used_entries = rte_eth_basic_stats_get_names( - dev, xstats_names); - - if (dev->dev_ops->xstats_get_names != NULL) { - /* If there are any driver-specific xstats, append them - * to end of list. - */ - cnt_driver_entries = (*dev->dev_ops->xstats_get_names)( - dev, - xstats_names + cnt_used_entries, - size - cnt_used_entries); - if (cnt_driver_entries < 0) - return eth_err(port_id, cnt_driver_entries); - cnt_used_entries += cnt_driver_entries; - } - - return cnt_used_entries; -} - - -static int -rte_eth_basic_stats_get(uint16_t port_id, struct rte_eth_xstat *xstats) -{ - struct rte_eth_dev *dev; - struct rte_eth_stats eth_stats; - unsigned int count = 0, i, q; - uint64_t val, *stats_ptr; - uint16_t nb_rxqs, nb_txqs; - int ret; - - ret = rte_eth_stats_get(port_id, ð_stats); - if (ret < 0) - return ret; - - dev = &rte_eth_devices[port_id]; - - nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); - nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); - - /* global stats */ - for (i = 0; i < RTE_NB_STATS; i++) { - stats_ptr = RTE_PTR_ADD(ð_stats, - rte_stats_strings[i].offset); - val = *stats_ptr; - xstats[count++].value = val; - } - - /* per-rxq stats */ - for (q = 0; q < nb_rxqs; q++) { - for (i = 0; i < RTE_NB_RXQ_STATS; i++) { - stats_ptr = RTE_PTR_ADD(ð_stats, - rte_rxq_stats_strings[i].offset + - q * sizeof(uint64_t)); - val = *stats_ptr; - xstats[count++].value = val; - } - } - - /* per-txq stats */ - for (q = 0; q < nb_txqs; q++) { - for (i = 0; i < RTE_NB_TXQ_STATS; i++) { - stats_ptr = RTE_PTR_ADD(ð_stats, - rte_txq_stats_strings[i].offset + - q * sizeof(uint64_t)); - val = *stats_ptr; - xstats[count++].value = val; - } - } - return count; -} - -/* retrieve ethdev extended statistics */ -int -rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids, - uint64_t *values, unsigned int size) -{ - unsigned int no_basic_stat_requested = 1; - unsigned int no_ext_stat_requested = 1; - unsigned int num_xstats_filled; - unsigned int basic_count; - uint16_t expected_entries; - struct rte_eth_dev *dev; - unsigned int i; - int ret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - ret = get_xstats_count(port_id); - if (ret < 0) - return ret; - expected_entries = (uint16_t)ret; - struct rte_eth_xstat xstats[expected_entries]; - dev = &rte_eth_devices[port_id]; - basic_count = get_xstats_basic_count(dev); - - /* Return max number of stats if no ids given */ - if (!ids) { - if (!values) - return expected_entries; - else if (values && size < expected_entries) - return expected_entries; - } - - if (ids && !values) - return -EINVAL; - - if (ids && dev->dev_ops->xstats_get_by_id != NULL && size) { - unsigned int basic_count = get_xstats_basic_count(dev); - uint64_t ids_copy[size]; - - for (i = 0; i < size; i++) { - if (ids[i] < basic_count) { - no_basic_stat_requested = 0; - break; - } - - /* - * Convert ids to xstats ids that PMD knows. - * ids known by user are basic + extended stats. - */ - ids_copy[i] = ids[i] - basic_count; - } - - if (no_basic_stat_requested) - return (*dev->dev_ops->xstats_get_by_id)(dev, ids_copy, - values, size); - } - - if (ids) { - for (i = 0; i < size; i++) { - if (ids[i] >= basic_count) { - no_ext_stat_requested = 0; - break; - } - } - } - - /* Fill the xstats structure */ - if (ids && no_ext_stat_requested) - ret = rte_eth_basic_stats_get(port_id, xstats); - else - ret = rte_eth_xstats_get(port_id, xstats, expected_entries); - - if (ret < 0) - return ret; - num_xstats_filled = (unsigned int)ret; - - /* Return all stats */ - if (!ids) { - for (i = 0; i < num_xstats_filled; i++) - values[i] = xstats[i].value; - return expected_entries; - } - - /* Filter stats */ - for (i = 0; i < size; i++) { - if (ids[i] >= expected_entries) { - RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n"); - return -1; - } - values[i] = xstats[ids[i]].value; - } - return size; -} - -int -rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, - unsigned int n) -{ - struct rte_eth_dev *dev; - unsigned int count = 0, i; - signed int xcount = 0; - uint16_t nb_rxqs, nb_txqs; - int ret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - dev = &rte_eth_devices[port_id]; - - nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); - nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); - - /* Return generic statistics */ - count = RTE_NB_STATS + (nb_rxqs * RTE_NB_RXQ_STATS) + - (nb_txqs * RTE_NB_TXQ_STATS); - - /* implemented by the driver */ - if (dev->dev_ops->xstats_get != NULL) { - /* Retrieve the xstats from the driver at the end of the - * xstats struct. - */ - xcount = (*dev->dev_ops->xstats_get)(dev, - xstats ? xstats + count : NULL, - (n > count) ? n - count : 0); - - if (xcount < 0) - return eth_err(port_id, xcount); - } - - if (n < count + xcount || xstats == NULL) - return count + xcount; - - /* now fill the xstats structure */ - ret = rte_eth_basic_stats_get(port_id, xstats); - if (ret < 0) - return ret; - count = ret; - - for (i = 0; i < count; i++) - xstats[i].id = i; - /* add an offset to driver-specific stats */ - for ( ; i < count + xcount; i++) - xstats[i].id += count; - - return count + xcount; -} - -/* reset ethdev extended statistics */ -void -rte_eth_xstats_reset(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - /* implemented by the driver */ - if (dev->dev_ops->xstats_reset != NULL) { - (*dev->dev_ops->xstats_reset)(dev); - return; - } - - /* fallback to default */ - rte_eth_stats_reset(port_id); -} - -static int -set_queue_stats_mapping(uint16_t port_id, uint16_t queue_id, uint8_t stat_idx, - uint8_t is_rx) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_stats_mapping_set, -ENOTSUP); - return (*dev->dev_ops->queue_stats_mapping_set) - (dev, queue_id, stat_idx, is_rx); -} - - -int -rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t tx_queue_id, - uint8_t stat_idx) -{ - return eth_err(port_id, set_queue_stats_mapping(port_id, tx_queue_id, - stat_idx, STAT_QMAP_TX)); -} - - -int -rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, uint16_t rx_queue_id, - uint8_t stat_idx) -{ - return eth_err(port_id, set_queue_stats_mapping(port_id, rx_queue_id, - stat_idx, STAT_QMAP_RX)); -} - -int -rte_eth_dev_fw_version_get(uint16_t port_id, char *fw_version, size_t fw_size) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fw_version_get, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->fw_version_get)(dev, - fw_version, fw_size)); -} - -void -rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info) -{ - struct rte_eth_dev *dev; - const struct rte_eth_desc_lim lim = { - .nb_max = UINT16_MAX, - .nb_min = 0, - .nb_align = 1, - }; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - - memset(dev_info, 0, sizeof(struct rte_eth_dev_info)); - dev_info->rx_desc_lim = lim; - dev_info->tx_desc_lim = lim; - - RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); - (*dev->dev_ops->dev_infos_get)(dev, dev_info); - dev_info->driver_name = dev->device->driver->name; - dev_info->nb_rx_queues = dev->data->nb_rx_queues; - dev_info->nb_tx_queues = dev->data->nb_tx_queues; -} - -int -rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask, - uint32_t *ptypes, int num) -{ - int i, j; - struct rte_eth_dev *dev; - const uint32_t *all_ptypes; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_supported_ptypes_get, 0); - all_ptypes = (*dev->dev_ops->dev_supported_ptypes_get)(dev); - - if (!all_ptypes) - return 0; - - for (i = 0, j = 0; all_ptypes[i] != RTE_PTYPE_UNKNOWN; ++i) - if (all_ptypes[i] & ptype_mask) { - if (j < num) - ptypes[j] = all_ptypes[i]; - j++; - } - - return j; -} - -void -rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_RET(port_id); - dev = &rte_eth_devices[port_id]; - ether_addr_copy(&dev->data->mac_addrs[0], mac_addr); -} - - -int -rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - *mtu = dev->data->mtu; - return 0; -} - -int -rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu) -{ - int ret; - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP); - - ret = (*dev->dev_ops->mtu_set)(dev, mtu); - if (!ret) - dev->data->mtu = mtu; - - return eth_err(port_id, ret); -} - -int -rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on) -{ - struct rte_eth_dev *dev; - int ret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - if (!(dev->data->dev_conf.rxmode.offloads & - DEV_RX_OFFLOAD_VLAN_FILTER)) { - RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id); - return -ENOSYS; - } - - if (vlan_id > 4095) { - RTE_PMD_DEBUG_TRACE("(port_id=%d) invalid vlan_id=%u > 4095\n", - port_id, (unsigned) vlan_id); - return -EINVAL; - } - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP); - - ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on); - if (ret == 0) { - struct rte_vlan_filter_conf *vfc; - int vidx; - int vbit; - - vfc = &dev->data->vlan_filter_conf; - vidx = vlan_id / 64; - vbit = vlan_id % 64; - - if (on) - vfc->ids[vidx] |= UINT64_C(1) << vbit; - else - vfc->ids[vidx] &= ~(UINT64_C(1) << vbit); - } - - return eth_err(port_id, ret); -} - -int -rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id, - int on) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - if (rx_queue_id >= dev->data->nb_rx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid rx_queue_id=%d\n", port_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_strip_queue_set, -ENOTSUP); - (*dev->dev_ops->vlan_strip_queue_set)(dev, rx_queue_id, on); - - return 0; -} - -int -rte_eth_dev_set_vlan_ether_type(uint16_t port_id, - enum rte_vlan_type vlan_type, - uint16_t tpid) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_tpid_set, -ENOTSUP); - - return eth_err(port_id, (*dev->dev_ops->vlan_tpid_set)(dev, vlan_type, - tpid)); -} - -int -rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask) -{ - struct rte_eth_dev *dev; - int ret = 0; - int mask = 0; - int cur, org = 0; - uint64_t orig_offloads; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - /* save original values in case of failure */ - orig_offloads = dev->data->dev_conf.rxmode.offloads; - - /*check which option changed by application*/ - cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD); - org = !!(dev->data->dev_conf.rxmode.offloads & - DEV_RX_OFFLOAD_VLAN_STRIP); - if (cur != org) { - if (cur) - dev->data->dev_conf.rxmode.offloads |= - DEV_RX_OFFLOAD_VLAN_STRIP; - else - dev->data->dev_conf.rxmode.offloads &= - ~DEV_RX_OFFLOAD_VLAN_STRIP; - mask |= ETH_VLAN_STRIP_MASK; - } - - cur = !!(offload_mask & ETH_VLAN_FILTER_OFFLOAD); - org = !!(dev->data->dev_conf.rxmode.offloads & - DEV_RX_OFFLOAD_VLAN_FILTER); - if (cur != org) { - if (cur) - dev->data->dev_conf.rxmode.offloads |= - DEV_RX_OFFLOAD_VLAN_FILTER; - else - dev->data->dev_conf.rxmode.offloads &= - ~DEV_RX_OFFLOAD_VLAN_FILTER; - mask |= ETH_VLAN_FILTER_MASK; - } - - cur = !!(offload_mask & ETH_VLAN_EXTEND_OFFLOAD); - org = !!(dev->data->dev_conf.rxmode.offloads & - DEV_RX_OFFLOAD_VLAN_EXTEND); - if (cur != org) { - if (cur) - dev->data->dev_conf.rxmode.offloads |= - DEV_RX_OFFLOAD_VLAN_EXTEND; - else - dev->data->dev_conf.rxmode.offloads &= - ~DEV_RX_OFFLOAD_VLAN_EXTEND; - mask |= ETH_VLAN_EXTEND_MASK; - } - - /*no change*/ - if (mask == 0) - return ret; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP); - - /* - * Convert to the offload bitfield API just in case the underlying PMD - * still supporting it. - */ - rte_eth_convert_rx_offloads(dev->data->dev_conf.rxmode.offloads, - &dev->data->dev_conf.rxmode); - ret = (*dev->dev_ops->vlan_offload_set)(dev, mask); - if (ret) { - /* hit an error restore original values */ - dev->data->dev_conf.rxmode.offloads = orig_offloads; - rte_eth_convert_rx_offloads(dev->data->dev_conf.rxmode.offloads, - &dev->data->dev_conf.rxmode); - } - - return eth_err(port_id, ret); -} - -int -rte_eth_dev_get_vlan_offload(uint16_t port_id) -{ - struct rte_eth_dev *dev; - int ret = 0; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - if (dev->data->dev_conf.rxmode.offloads & - DEV_RX_OFFLOAD_VLAN_STRIP) - ret |= ETH_VLAN_STRIP_OFFLOAD; - - if (dev->data->dev_conf.rxmode.offloads & - DEV_RX_OFFLOAD_VLAN_FILTER) - ret |= ETH_VLAN_FILTER_OFFLOAD; - - if (dev->data->dev_conf.rxmode.offloads & - DEV_RX_OFFLOAD_VLAN_EXTEND) - ret |= ETH_VLAN_EXTEND_OFFLOAD; - - return ret; -} - -int -rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_pvid_set, -ENOTSUP); - - return eth_err(port_id, (*dev->dev_ops->vlan_pvid_set)(dev, pvid, on)); -} - -int -rte_eth_dev_flow_ctrl_get(uint16_t port_id, struct rte_eth_fc_conf *fc_conf) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_get, -ENOTSUP); - memset(fc_conf, 0, sizeof(*fc_conf)); - return eth_err(port_id, (*dev->dev_ops->flow_ctrl_get)(dev, fc_conf)); -} - -int -rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - if ((fc_conf->send_xon != 0) && (fc_conf->send_xon != 1)) { - RTE_PMD_DEBUG_TRACE("Invalid send_xon, only 0/1 allowed\n"); - return -EINVAL; - } - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_set, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->flow_ctrl_set)(dev, fc_conf)); -} - -int -rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id, - struct rte_eth_pfc_conf *pfc_conf) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - if (pfc_conf->priority > (ETH_DCB_NUM_USER_PRIORITIES - 1)) { - RTE_PMD_DEBUG_TRACE("Invalid priority, only 0-7 allowed\n"); - return -EINVAL; - } - - dev = &rte_eth_devices[port_id]; - /* High water, low water validation are device specific */ - if (*dev->dev_ops->priority_flow_ctrl_set) - return eth_err(port_id, (*dev->dev_ops->priority_flow_ctrl_set) - (dev, pfc_conf)); - return -ENOTSUP; -} - -static int -rte_eth_check_reta_mask(struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size) -{ - uint16_t i, num; - - if (!reta_conf) - return -EINVAL; - - num = (reta_size + RTE_RETA_GROUP_SIZE - 1) / RTE_RETA_GROUP_SIZE; - for (i = 0; i < num; i++) { - if (reta_conf[i].mask) - return 0; - } - - return -EINVAL; -} - -static int -rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size, - uint16_t max_rxq) -{ - uint16_t i, idx, shift; - - if (!reta_conf) - return -EINVAL; - - if (max_rxq == 0) { - RTE_PMD_DEBUG_TRACE("No receive queue is available\n"); - return -EINVAL; - } - - for (i = 0; i < reta_size; i++) { - idx = i / RTE_RETA_GROUP_SIZE; - shift = i % RTE_RETA_GROUP_SIZE; - if ((reta_conf[idx].mask & (1ULL << shift)) && - (reta_conf[idx].reta[shift] >= max_rxq)) { - RTE_PMD_DEBUG_TRACE("reta_conf[%u]->reta[%u]: %u exceeds " - "the maximum rxq index: %u\n", idx, shift, - reta_conf[idx].reta[shift], max_rxq); - return -EINVAL; - } - } - - return 0; -} - -int -rte_eth_dev_rss_reta_update(uint16_t port_id, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size) -{ - struct rte_eth_dev *dev; - int ret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - /* Check mask bits */ - ret = rte_eth_check_reta_mask(reta_conf, reta_size); - if (ret < 0) - return ret; - - dev = &rte_eth_devices[port_id]; - - /* Check entry value */ - ret = rte_eth_check_reta_entry(reta_conf, reta_size, - dev->data->nb_rx_queues); - if (ret < 0) - return ret; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_update, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->reta_update)(dev, reta_conf, - reta_size)); -} - -int -rte_eth_dev_rss_reta_query(uint16_t port_id, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size) -{ - struct rte_eth_dev *dev; - int ret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - /* Check mask bits */ - ret = rte_eth_check_reta_mask(reta_conf, reta_size); - if (ret < 0) - return ret; - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_query, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->reta_query)(dev, reta_conf, - reta_size)); -} - -int -rte_eth_dev_rss_hash_update(uint16_t port_id, - struct rte_eth_rss_conf *rss_conf) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev, - rss_conf)); -} - -int -rte_eth_dev_rss_hash_conf_get(uint16_t port_id, - struct rte_eth_rss_conf *rss_conf) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_conf_get, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->rss_hash_conf_get)(dev, - rss_conf)); -} - -int -rte_eth_dev_udp_tunnel_port_add(uint16_t port_id, - struct rte_eth_udp_tunnel *udp_tunnel) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - if (udp_tunnel == NULL) { - RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n"); - return -EINVAL; - } - - if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { - RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); - return -EINVAL; - } - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_add, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->udp_tunnel_port_add)(dev, - udp_tunnel)); -} - -int -rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id, - struct rte_eth_udp_tunnel *udp_tunnel) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - if (udp_tunnel == NULL) { - RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n"); - return -EINVAL; - } - - if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { - RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_del, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->udp_tunnel_port_del)(dev, - udp_tunnel)); -} - -int -rte_eth_led_on(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_on, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->dev_led_on)(dev)); -} - -int -rte_eth_led_off(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_off, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->dev_led_off)(dev)); -} - -/* - * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find - * an empty spot. - */ -static int -get_mac_addr_index(uint16_t port_id, const struct ether_addr *addr) -{ - struct rte_eth_dev_info dev_info; - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - unsigned i; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - rte_eth_dev_info_get(port_id, &dev_info); - - for (i = 0; i < dev_info.max_mac_addrs; i++) - if (memcmp(addr, &dev->data->mac_addrs[i], ETHER_ADDR_LEN) == 0) - return i; - - return -1; -} - -static const struct ether_addr null_mac_addr; - -int -rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr, - uint32_t pool) -{ - struct rte_eth_dev *dev; - int index; - uint64_t pool_mask; - int ret; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_add, -ENOTSUP); - - if (is_zero_ether_addr(addr)) { - RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n", - port_id); - return -EINVAL; - } - if (pool >= ETH_64_POOLS) { - RTE_PMD_DEBUG_TRACE("pool id must be 0-%d\n", ETH_64_POOLS - 1); - return -EINVAL; - } - - index = get_mac_addr_index(port_id, addr); - if (index < 0) { - index = get_mac_addr_index(port_id, &null_mac_addr); - if (index < 0) { - RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n", - port_id); - return -ENOSPC; - } - } else { - pool_mask = dev->data->mac_pool_sel[index]; - - /* Check if both MAC address and pool is already there, and do nothing */ - if (pool_mask & (1ULL << pool)) - return 0; - } - - /* Update NIC */ - ret = (*dev->dev_ops->mac_addr_add)(dev, addr, index, pool); - - if (ret == 0) { - /* Update address in NIC data structure */ - ether_addr_copy(addr, &dev->data->mac_addrs[index]); - - /* Update pool bitmap in NIC data structure */ - dev->data->mac_pool_sel[index] |= (1ULL << pool); - } - - return eth_err(port_id, ret); -} - -int -rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *addr) -{ - struct rte_eth_dev *dev; - int index; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_remove, -ENOTSUP); - - index = get_mac_addr_index(port_id, addr); - if (index == 0) { - RTE_PMD_DEBUG_TRACE("port %d: Cannot remove default MAC address\n", port_id); - return -EADDRINUSE; - } else if (index < 0) - return 0; /* Do nothing if address wasn't found */ - - /* Update NIC */ - (*dev->dev_ops->mac_addr_remove)(dev, index); - - /* Update address in NIC data structure */ - ether_addr_copy(&null_mac_addr, &dev->data->mac_addrs[index]); - - /* reset pool bitmap */ - dev->data->mac_pool_sel[index] = 0; - - return 0; -} - -int -rte_eth_dev_default_mac_addr_set(uint16_t port_id, struct ether_addr *addr) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - if (!is_valid_assigned_ether_addr(addr)) - return -EINVAL; - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_set, -ENOTSUP); - - /* Update default address in NIC data structure */ - ether_addr_copy(addr, &dev->data->mac_addrs[0]); - - (*dev->dev_ops->mac_addr_set)(dev, addr); - - return 0; -} - - -/* - * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find - * an empty spot. - */ -static int -get_hash_mac_addr_index(uint16_t port_id, const struct ether_addr *addr) -{ - struct rte_eth_dev_info dev_info; - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - unsigned i; - - rte_eth_dev_info_get(port_id, &dev_info); - if (!dev->data->hash_mac_addrs) - return -1; - - for (i = 0; i < dev_info.max_hash_mac_addrs; i++) - if (memcmp(addr, &dev->data->hash_mac_addrs[i], - ETHER_ADDR_LEN) == 0) - return i; - - return -1; -} - -int -rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr, - uint8_t on) -{ - int index; - int ret; - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - if (is_zero_ether_addr(addr)) { - RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n", - port_id); - return -EINVAL; - } - - index = get_hash_mac_addr_index(port_id, addr); - /* Check if it's already there, and do nothing */ - if ((index >= 0) && on) - return 0; - - if (index < 0) { - if (!on) { - RTE_PMD_DEBUG_TRACE("port %d: the MAC address was not " - "set in UTA\n", port_id); - return -EINVAL; - } - - index = get_hash_mac_addr_index(port_id, &null_mac_addr); - if (index < 0) { - RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n", - port_id); - return -ENOSPC; - } - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_hash_table_set, -ENOTSUP); - ret = (*dev->dev_ops->uc_hash_table_set)(dev, addr, on); - if (ret == 0) { - /* Update address in NIC data structure */ - if (on) - ether_addr_copy(addr, - &dev->data->hash_mac_addrs[index]); - else - ether_addr_copy(&null_mac_addr, - &dev->data->hash_mac_addrs[index]); - } - - return eth_err(port_id, ret); -} - -int -rte_eth_dev_uc_all_hash_table_set(uint16_t port_id, uint8_t on) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_all_hash_table_set, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->uc_all_hash_table_set)(dev, - on)); -} - -int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx, - uint16_t tx_rate) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - struct rte_eth_link link; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - rte_eth_dev_info_get(port_id, &dev_info); - link = dev->data->dev_link; - - if (queue_idx > dev_info.max_tx_queues) { - RTE_PMD_DEBUG_TRACE("set queue rate limit:port %d: " - "invalid queue id=%d\n", port_id, queue_idx); - return -EINVAL; - } - - if (tx_rate > link.link_speed) { - RTE_PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, " - "bigger than link speed= %d\n", - tx_rate, link.link_speed); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_queue_rate_limit, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->set_queue_rate_limit)(dev, - queue_idx, tx_rate)); -} - -int -rte_eth_mirror_rule_set(uint16_t port_id, - struct rte_eth_mirror_conf *mirror_conf, - uint8_t rule_id, uint8_t on) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - if (mirror_conf->rule_type == 0) { - RTE_PMD_DEBUG_TRACE("mirror rule type can not be 0.\n"); - return -EINVAL; - } - - if (mirror_conf->dst_pool >= ETH_64_POOLS) { - RTE_PMD_DEBUG_TRACE("Invalid dst pool, pool id must be 0-%d\n", - ETH_64_POOLS - 1); - return -EINVAL; - } - - if ((mirror_conf->rule_type & (ETH_MIRROR_VIRTUAL_POOL_UP | - ETH_MIRROR_VIRTUAL_POOL_DOWN)) && - (mirror_conf->pool_mask == 0)) { - RTE_PMD_DEBUG_TRACE("Invalid mirror pool, pool mask can not be 0.\n"); - return -EINVAL; - } - - if ((mirror_conf->rule_type & ETH_MIRROR_VLAN) && - mirror_conf->vlan.vlan_mask == 0) { - RTE_PMD_DEBUG_TRACE("Invalid vlan mask, vlan mask can not be 0.\n"); - return -EINVAL; - } - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_set, -ENOTSUP); - - return eth_err(port_id, (*dev->dev_ops->mirror_rule_set)(dev, - mirror_conf, rule_id, on)); -} - -int -rte_eth_mirror_rule_reset(uint16_t port_id, uint8_t rule_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_reset, -ENOTSUP); - - return eth_err(port_id, (*dev->dev_ops->mirror_rule_reset)(dev, - rule_id)); -} - -RTE_INIT(eth_dev_init_cb_lists) -{ - int i; - - for (i = 0; i < RTE_MAX_ETHPORTS; i++) - TAILQ_INIT(&rte_eth_devices[i].link_intr_cbs); -} - -int -rte_eth_dev_callback_register(uint16_t port_id, - enum rte_eth_event_type event, - rte_eth_dev_cb_fn cb_fn, void *cb_arg) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_callback *user_cb; - uint32_t next_port; /* size is 32-bit to prevent loop wrap-around */ - uint16_t last_port; - - if (!cb_fn) - return -EINVAL; - - if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) { - RTE_LOG(ERR, EAL, "Invalid port_id=%d\n", port_id); - return -EINVAL; - } - - if (port_id == RTE_ETH_ALL) { - next_port = 0; - last_port = RTE_MAX_ETHPORTS - 1; - } else { - next_port = last_port = port_id; - } - - rte_spinlock_lock(&rte_eth_dev_cb_lock); - - do { - dev = &rte_eth_devices[next_port]; - - TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) { - if (user_cb->cb_fn == cb_fn && - user_cb->cb_arg == cb_arg && - user_cb->event == event) { - break; - } - } - - /* create a new callback. */ - if (user_cb == NULL) { - user_cb = rte_zmalloc("INTR_USER_CALLBACK", - sizeof(struct rte_eth_dev_callback), 0); - if (user_cb != NULL) { - user_cb->cb_fn = cb_fn; - user_cb->cb_arg = cb_arg; - user_cb->event = event; - TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), - user_cb, next); - } else { - rte_spinlock_unlock(&rte_eth_dev_cb_lock); - rte_eth_dev_callback_unregister(port_id, event, - cb_fn, cb_arg); - return -ENOMEM; - } - - } - } while (++next_port <= last_port); - - rte_spinlock_unlock(&rte_eth_dev_cb_lock); - return 0; -} - -int -rte_eth_dev_callback_unregister(uint16_t port_id, - enum rte_eth_event_type event, - rte_eth_dev_cb_fn cb_fn, void *cb_arg) -{ - int ret; - struct rte_eth_dev *dev; - struct rte_eth_dev_callback *cb, *next; - uint32_t next_port; /* size is 32-bit to prevent loop wrap-around */ - uint16_t last_port; - - if (!cb_fn) - return -EINVAL; - - if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) { - RTE_LOG(ERR, EAL, "Invalid port_id=%d\n", port_id); - return -EINVAL; - } - - if (port_id == RTE_ETH_ALL) { - next_port = 0; - last_port = RTE_MAX_ETHPORTS - 1; - } else { - next_port = last_port = port_id; - } - - rte_spinlock_lock(&rte_eth_dev_cb_lock); - - do { - dev = &rte_eth_devices[next_port]; - ret = 0; - for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; - cb = next) { - - next = TAILQ_NEXT(cb, next); - - if (cb->cb_fn != cb_fn || cb->event != event || - (cb->cb_arg != (void *)-1 && cb->cb_arg != cb_arg)) - continue; - - /* - * if this callback is not executing right now, - * then remove it. - */ - if (cb->active == 0) { - TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next); - rte_free(cb); - } else { - ret = -EAGAIN; - } - } - } while (++next_port <= last_port); - - rte_spinlock_unlock(&rte_eth_dev_cb_lock); - return ret; -} - -int -_rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event, void *ret_param) -{ - struct rte_eth_dev_callback *cb_lst; - struct rte_eth_dev_callback dev_cb; - int rc = 0; - - rte_spinlock_lock(&rte_eth_dev_cb_lock); - TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) { - if (cb_lst->cb_fn == NULL || cb_lst->event != event) - continue; - dev_cb = *cb_lst; - cb_lst->active = 1; - if (ret_param != NULL) - dev_cb.ret_param = ret_param; - - rte_spinlock_unlock(&rte_eth_dev_cb_lock); - rc = dev_cb.cb_fn(dev->data->port_id, dev_cb.event, - dev_cb.cb_arg, dev_cb.ret_param); - rte_spinlock_lock(&rte_eth_dev_cb_lock); - cb_lst->active = 0; - } - rte_spinlock_unlock(&rte_eth_dev_cb_lock); - return rc; -} - -int -rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data) -{ - uint32_t vec; - struct rte_eth_dev *dev; - struct rte_intr_handle *intr_handle; - uint16_t qid; - int rc; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - if (!dev->intr_handle) { - RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); - return -ENOTSUP; - } - - intr_handle = dev->intr_handle; - if (!intr_handle->intr_vec) { - RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); - return -EPERM; - } - - for (qid = 0; qid < dev->data->nb_rx_queues; qid++) { - vec = intr_handle->intr_vec[qid]; - rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); - if (rc && rc != -EEXIST) { - RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error" - " op %d epfd %d vec %u\n", - port_id, qid, op, epfd, vec); - } - } - - return 0; -} - -const struct rte_memzone * -rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, - uint16_t queue_id, size_t size, unsigned align, - int socket_id) -{ - char z_name[RTE_MEMZONE_NAMESIZE]; - const struct rte_memzone *mz; - - snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->device->driver->name, ring_name, - dev->data->port_id, queue_id); - - mz = rte_memzone_lookup(z_name); - if (mz) - return mz; - - return rte_memzone_reserve_aligned(z_name, size, socket_id, 0, align); -} - -int -rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id, - int epfd, int op, void *data) -{ - uint32_t vec; - struct rte_eth_dev *dev; - struct rte_intr_handle *intr_handle; - int rc; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - if (queue_id >= dev->data->nb_rx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%u\n", queue_id); - return -EINVAL; - } - - if (!dev->intr_handle) { - RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); - return -ENOTSUP; - } - - intr_handle = dev->intr_handle; - if (!intr_handle->intr_vec) { - RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); - return -EPERM; - } - - vec = intr_handle->intr_vec[queue_id]; - rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); - if (rc && rc != -EEXIST) { - RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error" - " op %d epfd %d vec %u\n", - port_id, queue_id, op, epfd, vec); - return rc; - } - - return 0; -} - -int -rte_eth_dev_rx_intr_enable(uint16_t port_id, - uint16_t queue_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->rx_queue_intr_enable)(dev, - queue_id)); -} - -int -rte_eth_dev_rx_intr_disable(uint16_t port_id, - uint16_t queue_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->rx_queue_intr_disable)(dev, - queue_id)); -} - - -int -rte_eth_dev_filter_supported(uint16_t port_id, - enum rte_filter_type filter_type) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); - return (*dev->dev_ops->filter_ctrl)(dev, filter_type, - RTE_ETH_FILTER_NOP, NULL); -} - -int -rte_eth_dev_filter_ctrl_v22(uint16_t port_id, - enum rte_filter_type filter_type, - enum rte_filter_op filter_op, void *arg); - -int -rte_eth_dev_filter_ctrl_v22(uint16_t port_id, - enum rte_filter_type filter_type, - enum rte_filter_op filter_op, void *arg) -{ - struct rte_eth_fdir_info_v22 { - enum rte_fdir_mode mode; - struct rte_eth_fdir_masks mask; - struct rte_eth_fdir_flex_conf flex_conf; - uint32_t guarant_spc; - uint32_t best_spc; - uint32_t flow_types_mask[1]; - uint32_t max_flexpayload; - uint32_t flex_payload_unit; - uint32_t max_flex_payload_segment_num; - uint16_t flex_payload_limit; - uint32_t flex_bitmask_unit; - uint32_t max_flex_bitmask_num; - }; - - struct rte_eth_hash_global_conf_v22 { - enum rte_eth_hash_function hash_func; - uint32_t sym_hash_enable_mask[1]; - uint32_t valid_bit_mask[1]; - }; - - struct rte_eth_hash_filter_info_v22 { - enum rte_eth_hash_filter_info_type info_type; - union { - uint8_t enable; - struct rte_eth_hash_global_conf_v22 global_conf; - struct rte_eth_input_set_conf input_set_conf; - } info; - }; - - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); - if (filter_op == RTE_ETH_FILTER_INFO) { - int retval; - struct rte_eth_fdir_info_v22 *fdir_info_v22; - struct rte_eth_fdir_info fdir_info; - - fdir_info_v22 = (struct rte_eth_fdir_info_v22 *)arg; - - retval = (*dev->dev_ops->filter_ctrl)(dev, filter_type, - filter_op, (void *)&fdir_info); - fdir_info_v22->mode = fdir_info.mode; - fdir_info_v22->mask = fdir_info.mask; - fdir_info_v22->flex_conf = fdir_info.flex_conf; - fdir_info_v22->guarant_spc = fdir_info.guarant_spc; - fdir_info_v22->best_spc = fdir_info.best_spc; - fdir_info_v22->flow_types_mask[0] = - (uint32_t)fdir_info.flow_types_mask[0]; - fdir_info_v22->max_flexpayload = fdir_info.max_flexpayload; - fdir_info_v22->flex_payload_unit = fdir_info.flex_payload_unit; - fdir_info_v22->max_flex_payload_segment_num = - fdir_info.max_flex_payload_segment_num; - fdir_info_v22->flex_payload_limit = - fdir_info.flex_payload_limit; - fdir_info_v22->flex_bitmask_unit = fdir_info.flex_bitmask_unit; - fdir_info_v22->max_flex_bitmask_num = - fdir_info.max_flex_bitmask_num; - return retval; - } else if (filter_op == RTE_ETH_FILTER_GET) { - int retval; - struct rte_eth_hash_filter_info f_info; - struct rte_eth_hash_filter_info_v22 *f_info_v22 = - (struct rte_eth_hash_filter_info_v22 *)arg; - - f_info.info_type = f_info_v22->info_type; - retval = (*dev->dev_ops->filter_ctrl)(dev, filter_type, - filter_op, (void *)&f_info); - - switch (f_info_v22->info_type) { - case RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT: - f_info_v22->info.enable = f_info.info.enable; - break; - case RTE_ETH_HASH_FILTER_GLOBAL_CONFIG: - f_info_v22->info.global_conf.hash_func = - f_info.info.global_conf.hash_func; - f_info_v22->info.global_conf.sym_hash_enable_mask[0] = - (uint32_t) - f_info.info.global_conf.sym_hash_enable_mask[0]; - f_info_v22->info.global_conf.valid_bit_mask[0] = - (uint32_t) - f_info.info.global_conf.valid_bit_mask[0]; - break; - case RTE_ETH_HASH_FILTER_INPUT_SET_SELECT: - f_info_v22->info.input_set_conf = - f_info.info.input_set_conf; - break; - default: - break; - } - return retval; - } else if (filter_op == RTE_ETH_FILTER_SET) { - struct rte_eth_hash_filter_info f_info; - struct rte_eth_hash_filter_info_v22 *f_v22 = - (struct rte_eth_hash_filter_info_v22 *)arg; - - f_info.info_type = f_v22->info_type; - switch (f_v22->info_type) { - case RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT: - f_info.info.enable = f_v22->info.enable; - break; - case RTE_ETH_HASH_FILTER_GLOBAL_CONFIG: - f_info.info.global_conf.hash_func = - f_v22->info.global_conf.hash_func; - f_info.info.global_conf.sym_hash_enable_mask[0] = - (uint32_t) - f_v22->info.global_conf.sym_hash_enable_mask[0]; - f_info.info.global_conf.valid_bit_mask[0] = - (uint32_t) - f_v22->info.global_conf.valid_bit_mask[0]; - break; - case RTE_ETH_HASH_FILTER_INPUT_SET_SELECT: - f_info.info.input_set_conf = - f_v22->info.input_set_conf; - break; - default: - break; - } - return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, - (void *)&f_info); - } else - return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, - arg); -} -VERSION_SYMBOL(rte_eth_dev_filter_ctrl, _v22, 2.2); - -int -rte_eth_dev_filter_ctrl_v1802(uint16_t port_id, - enum rte_filter_type filter_type, - enum rte_filter_op filter_op, void *arg); - -int -rte_eth_dev_filter_ctrl_v1802(uint16_t port_id, - enum rte_filter_type filter_type, - enum rte_filter_op filter_op, void *arg) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->filter_ctrl)(dev, filter_type, - filter_op, arg)); -} -BIND_DEFAULT_SYMBOL(rte_eth_dev_filter_ctrl, _v1802, 18.02); -MAP_STATIC_SYMBOL(int rte_eth_dev_filter_ctrl(uint16_t port_id, - enum rte_filter_type filter_type, - enum rte_filter_op filter_op, void *arg), - rte_eth_dev_filter_ctrl_v1802); - -void * -rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id, - rte_rx_callback_fn fn, void *user_param) -{ -#ifndef RTE_ETHDEV_RXTX_CALLBACKS - rte_errno = ENOTSUP; - return NULL; -#endif - /* check input parameters */ - if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || - queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { - rte_errno = EINVAL; - return NULL; - } - struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); - - if (cb == NULL) { - rte_errno = ENOMEM; - return NULL; - } - - cb->fn.rx = fn; - cb->param = user_param; - - rte_spinlock_lock(&rte_eth_rx_cb_lock); - /* Add the callbacks in fifo order. */ - struct rte_eth_rxtx_callback *tail = - rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; - - if (!tail) { - rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; - - } else { - while (tail->next) - tail = tail->next; - tail->next = cb; - } - rte_spinlock_unlock(&rte_eth_rx_cb_lock); - - return cb; -} - -void * -rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, - rte_rx_callback_fn fn, void *user_param) -{ -#ifndef RTE_ETHDEV_RXTX_CALLBACKS - rte_errno = ENOTSUP; - return NULL; -#endif - /* check input parameters */ - if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || - queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { - rte_errno = EINVAL; - return NULL; - } - - struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); - - if (cb == NULL) { - rte_errno = ENOMEM; - return NULL; - } - - cb->fn.rx = fn; - cb->param = user_param; - - rte_spinlock_lock(&rte_eth_rx_cb_lock); - /* Add the callbacks at fisrt position*/ - cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; - rte_smp_wmb(); - rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; - rte_spinlock_unlock(&rte_eth_rx_cb_lock); - - return cb; -} - -void * -rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id, - rte_tx_callback_fn fn, void *user_param) -{ -#ifndef RTE_ETHDEV_RXTX_CALLBACKS - rte_errno = ENOTSUP; - return NULL; -#endif - /* check input parameters */ - if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || - queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) { - rte_errno = EINVAL; - return NULL; - } - - struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); - - if (cb == NULL) { - rte_errno = ENOMEM; - return NULL; - } - - cb->fn.tx = fn; - cb->param = user_param; - - rte_spinlock_lock(&rte_eth_tx_cb_lock); - /* Add the callbacks in fifo order. */ - struct rte_eth_rxtx_callback *tail = - rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id]; - - if (!tail) { - rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb; - - } else { - while (tail->next) - tail = tail->next; - tail->next = cb; - } - rte_spinlock_unlock(&rte_eth_tx_cb_lock); - - return cb; -} - -int -rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id, - struct rte_eth_rxtx_callback *user_cb) -{ -#ifndef RTE_ETHDEV_RXTX_CALLBACKS - return -ENOTSUP; -#endif - /* Check input parameters. */ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - if (user_cb == NULL || - queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) - return -EINVAL; - - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - struct rte_eth_rxtx_callback *cb; - struct rte_eth_rxtx_callback **prev_cb; - int ret = -EINVAL; - - rte_spinlock_lock(&rte_eth_rx_cb_lock); - prev_cb = &dev->post_rx_burst_cbs[queue_id]; - for (; *prev_cb != NULL; prev_cb = &cb->next) { - cb = *prev_cb; - if (cb == user_cb) { - /* Remove the user cb from the callback list. */ - *prev_cb = cb->next; - ret = 0; - break; - } - } - rte_spinlock_unlock(&rte_eth_rx_cb_lock); - - return ret; -} - -int -rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id, - struct rte_eth_rxtx_callback *user_cb) -{ -#ifndef RTE_ETHDEV_RXTX_CALLBACKS - return -ENOTSUP; -#endif - /* Check input parameters. */ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - if (user_cb == NULL || - queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) - return -EINVAL; - - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - int ret = -EINVAL; - struct rte_eth_rxtx_callback *cb; - struct rte_eth_rxtx_callback **prev_cb; - - rte_spinlock_lock(&rte_eth_tx_cb_lock); - prev_cb = &dev->pre_tx_burst_cbs[queue_id]; - for (; *prev_cb != NULL; prev_cb = &cb->next) { - cb = *prev_cb; - if (cb == user_cb) { - /* Remove the user cb from the callback list. */ - *prev_cb = cb->next; - ret = 0; - break; - } - } - rte_spinlock_unlock(&rte_eth_tx_cb_lock); - - return ret; -} - -int -rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id, - struct rte_eth_rxq_info *qinfo) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - if (qinfo == NULL) - return -EINVAL; - - dev = &rte_eth_devices[port_id]; - if (queue_id >= dev->data->nb_rx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rxq_info_get, -ENOTSUP); - - memset(qinfo, 0, sizeof(*qinfo)); - dev->dev_ops->rxq_info_get(dev, queue_id, qinfo); - return 0; -} - -int -rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id, - struct rte_eth_txq_info *qinfo) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - if (qinfo == NULL) - return -EINVAL; - - dev = &rte_eth_devices[port_id]; - if (queue_id >= dev->data->nb_tx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); - return -EINVAL; - } - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->txq_info_get, -ENOTSUP); - - memset(qinfo, 0, sizeof(*qinfo)); - dev->dev_ops->txq_info_get(dev, queue_id, qinfo); - return 0; -} - -int -rte_eth_dev_set_mc_addr_list(uint16_t port_id, - struct ether_addr *mc_addr_set, - uint32_t nb_mc_addr) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_mc_addr_list, -ENOTSUP); - return eth_err(port_id, dev->dev_ops->set_mc_addr_list(dev, - mc_addr_set, nb_mc_addr)); -} - -int -rte_eth_timesync_enable(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_enable, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->timesync_enable)(dev)); -} - -int -rte_eth_timesync_disable(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_disable, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->timesync_disable)(dev)); -} - -int -rte_eth_timesync_read_rx_timestamp(uint16_t port_id, struct timespec *timestamp, - uint32_t flags) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_rx_timestamp, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->timesync_read_rx_timestamp) - (dev, timestamp, flags)); -} - -int -rte_eth_timesync_read_tx_timestamp(uint16_t port_id, - struct timespec *timestamp) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_tx_timestamp, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->timesync_read_tx_timestamp) - (dev, timestamp)); -} - -int -rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_adjust_time, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->timesync_adjust_time)(dev, - delta)); -} - -int -rte_eth_timesync_read_time(uint16_t port_id, struct timespec *timestamp) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_time, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->timesync_read_time)(dev, - timestamp)); -} - -int -rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *timestamp) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - dev = &rte_eth_devices[port_id]; - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_write_time, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->timesync_write_time)(dev, - timestamp)); -} - -int -rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->get_reg)(dev, info)); -} - -int -rte_eth_dev_get_eeprom_length(uint16_t port_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom_length, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->get_eeprom_length)(dev)); -} - -int -rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->get_eeprom)(dev, info)); -} - -int -rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_eeprom, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->set_eeprom)(dev, info)); -} - -int -rte_eth_dev_get_dcb_info(uint16_t port_id, - struct rte_eth_dcb_info *dcb_info) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - memset(dcb_info, 0, sizeof(struct rte_eth_dcb_info)); - - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_dcb_info, -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->get_dcb_info)(dev, dcb_info)); -} - -int -rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id, - struct rte_eth_l2_tunnel_conf *l2_tunnel) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - if (l2_tunnel == NULL) { - RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n"); - return -EINVAL; - } - - if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) { - RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); - return -EINVAL; - } - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_eth_type_conf, - -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->l2_tunnel_eth_type_conf)(dev, - l2_tunnel)); -} - -int -rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id, - struct rte_eth_l2_tunnel_conf *l2_tunnel, - uint32_t mask, - uint8_t en) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - if (l2_tunnel == NULL) { - RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n"); - return -EINVAL; - } - - if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) { - RTE_PMD_DEBUG_TRACE("Invalid tunnel type.\n"); - return -EINVAL; - } - - if (mask == 0) { - RTE_PMD_DEBUG_TRACE("Mask should have a value.\n"); - return -EINVAL; - } - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_offload_set, - -ENOTSUP); - return eth_err(port_id, (*dev->dev_ops->l2_tunnel_offload_set)(dev, - l2_tunnel, mask, en)); -} - -static void -rte_eth_dev_adjust_nb_desc(uint16_t *nb_desc, - const struct rte_eth_desc_lim *desc_lim) -{ - if (desc_lim->nb_align != 0) - *nb_desc = RTE_ALIGN_CEIL(*nb_desc, desc_lim->nb_align); - - if (desc_lim->nb_max != 0) - *nb_desc = RTE_MIN(*nb_desc, desc_lim->nb_max); - - *nb_desc = RTE_MAX(*nb_desc, desc_lim->nb_min); -} - -int -rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id, - uint16_t *nb_rx_desc, - uint16_t *nb_tx_desc) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_info dev_info; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); - - rte_eth_dev_info_get(port_id, &dev_info); - - if (nb_rx_desc != NULL) - rte_eth_dev_adjust_nb_desc(nb_rx_desc, &dev_info.rx_desc_lim); - - if (nb_tx_desc != NULL) - rte_eth_dev_adjust_nb_desc(nb_tx_desc, &dev_info.tx_desc_lim); - - return 0; -} - -int -rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - - if (pool == NULL) - return -EINVAL; - - dev = &rte_eth_devices[port_id]; - - if (*dev->dev_ops->pool_ops_supported == NULL) - return 1; /* all pools are supported */ - - return (*dev->dev_ops->pool_ops_supported)(dev, pool); -} diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h deleted file mode 100644 index 03615330..00000000 --- a/lib/librte_ether/rte_ethdev.h +++ /dev/null @@ -1,4112 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2017 Intel Corporation - */ - -#ifndef _RTE_ETHDEV_H_ -#define _RTE_ETHDEV_H_ - -/** - * @file - * - * RTE Ethernet Device API - * - * The Ethernet Device API is composed of two parts: - * - * - The application-oriented Ethernet API that includes functions to setup - * an Ethernet device (configure it, setup its RX and TX queues and start it), - * to get its MAC address, the speed and the status of its physical link, - * to receive and to transmit packets, and so on. - * - * - The driver-oriented Ethernet API that exports functions allowing - * an Ethernet Poll Mode Driver (PMD) to allocate an Ethernet device instance, - * create memzone for HW rings and process registered callbacks, and so on. - * PMDs should include rte_ethdev_driver.h instead of this header. - * - * By default, all the functions of the Ethernet Device API exported by a PMD - * are lock-free functions which assume to not be invoked in parallel on - * different logical cores to work on the same target object. For instance, - * the receive function of a PMD cannot be invoked in parallel on two logical - * cores to poll the same RX queue [of the same port]. Of course, this function - * can be invoked in parallel by different logical cores on different RX queues. - * It is the responsibility of the upper level application to enforce this rule. - * - * If needed, parallel accesses by multiple logical cores to shared queues - * shall be explicitly protected by dedicated inline lock-aware functions - * built on top of their corresponding lock-free functions of the PMD API. - * - * In all functions of the Ethernet API, the Ethernet device is - * designated by an integer >= 0 named the device port identifier. - * - * At the Ethernet driver level, Ethernet devices are represented by a generic - * data structure of type *rte_eth_dev*. - * - * Ethernet devices are dynamically registered during the PCI probing phase - * performed at EAL initialization time. - * When an Ethernet device is being probed, an *rte_eth_dev* structure and - * a new port identifier are allocated for that device. Then, the eth_dev_init() - * function supplied by the Ethernet driver matching the probed PCI - * device is invoked to properly initialize the device. - * - * The role of the device init function consists of resetting the hardware, - * checking access to Non-volatile Memory (NVM), reading the MAC address - * from NVM etc. - * - * If the device init operation is successful, the correspondence between - * the port identifier assigned to the new device and its associated - * *rte_eth_dev* structure is effectively registered. - * Otherwise, both the *rte_eth_dev* structure and the port identifier are - * freed. - * - * The functions exported by the application Ethernet API to setup a device - * designated by its port identifier must be invoked in the following order: - * - rte_eth_dev_configure() - * - rte_eth_tx_queue_setup() - * - rte_eth_rx_queue_setup() - * - rte_eth_dev_start() - * - * Then, the network application can invoke, in any order, the functions - * exported by the Ethernet API to get the MAC address of a given device, to - * get the speed and the status of a device physical link, to receive/transmit - * [burst of] packets, and so on. - * - * If the application wants to change the configuration (i.e. call - * rte_eth_dev_configure(), rte_eth_tx_queue_setup(), or - * rte_eth_rx_queue_setup()), it must call rte_eth_dev_stop() first to stop the - * device and then do the reconfiguration before calling rte_eth_dev_start() - * again. The transmit and receive functions should not be invoked when the - * device is stopped. - * - * Please note that some configuration is not stored between calls to - * rte_eth_dev_stop()/rte_eth_dev_start(). The following configuration will - * be retained: - * - * - flow control settings - * - receive mode configuration (promiscuous mode, hardware checksum mode, - * RSS/VMDQ settings etc.) - * - VLAN filtering configuration - * - MAC addresses supplied to MAC address array - * - flow director filtering mode (but not filtering rules) - * - NIC queue statistics mappings - * - * Any other configuration will not be stored and will need to be re-entered - * before a call to rte_eth_dev_start(). - * - * Finally, a network application can close an Ethernet device by invoking the - * rte_eth_dev_close() function. - * - * Each function of the application Ethernet API invokes a specific function - * of the PMD that controls the target device designated by its port - * identifier. - * For this purpose, all device-specific functions of an Ethernet driver are - * supplied through a set of pointers contained in a generic structure of type - * *eth_dev_ops*. - * The address of the *eth_dev_ops* structure is stored in the *rte_eth_dev* - * structure by the device init function of the Ethernet driver, which is - * invoked during the PCI probing phase, as explained earlier. - * - * In other words, each function of the Ethernet API simply retrieves the - * *rte_eth_dev* structure associated with the device port identifier and - * performs an indirect invocation of the corresponding driver function - * supplied in the *eth_dev_ops* structure of the *rte_eth_dev* structure. - * - * For performance reasons, the address of the burst-oriented RX and TX - * functions of the Ethernet driver are not contained in the *eth_dev_ops* - * structure. Instead, they are directly stored at the beginning of the - * *rte_eth_dev* structure to avoid an extra indirect memory access during - * their invocation. - * - * RTE ethernet device drivers do not use interrupts for transmitting or - * receiving. Instead, Ethernet drivers export Poll-Mode receive and transmit - * functions to applications. - * Both receive and transmit functions are packet-burst oriented to minimize - * their cost per packet through the following optimizations: - * - * - Sharing among multiple packets the incompressible cost of the - * invocation of receive/transmit functions. - * - * - Enabling receive/transmit functions to take advantage of burst-oriented - * hardware features (L1 cache, prefetch instructions, NIC head/tail - * registers) to minimize the number of CPU cycles per packet, for instance, - * by avoiding useless read memory accesses to ring descriptors, or by - * systematically using arrays of pointers that exactly fit L1 cache line - * boundaries and sizes. - * - * The burst-oriented receive function does not provide any error notification, - * to avoid the corresponding overhead. As a hint, the upper-level application - * might check the status of the device link once being systematically returned - * a 0 value by the receive function of the driver for a given number of tries. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* Use this macro to check if LRO API is supported */ -#define RTE_ETHDEV_HAS_LRO_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rte_ether.h" -#include "rte_eth_ctrl.h" -#include "rte_dev_info.h" - -struct rte_mbuf; - -/** - * A structure used to retrieve statistics for an Ethernet port. - * Not all statistics fields in struct rte_eth_stats are supported - * by any type of network interface card (NIC). If any statistics - * field is not supported, its value is 0. - */ -struct rte_eth_stats { - uint64_t ipackets; /**< Total number of successfully received packets. */ - uint64_t opackets; /**< Total number of successfully transmitted packets.*/ - uint64_t ibytes; /**< Total number of successfully received bytes. */ - uint64_t obytes; /**< Total number of successfully transmitted bytes. */ - uint64_t imissed; - /**< Total of RX packets dropped by the HW, - * because there are no available buffer (i.e. RX queues are full). - */ - uint64_t ierrors; /**< Total number of erroneous received packets. */ - uint64_t oerrors; /**< Total number of failed transmitted packets. */ - uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */ - uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; - /**< Total number of queue RX packets. */ - uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; - /**< Total number of queue TX packets. */ - uint64_t q_ibytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; - /**< Total number of successfully received queue bytes. */ - uint64_t q_obytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; - /**< Total number of successfully transmitted queue bytes. */ - uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS]; - /**< Total number of queue packets received that are dropped. */ -}; - -/** - * Device supported speeds bitmap flags - */ -#define ETH_LINK_SPEED_AUTONEG (0 << 0) /**< Autonegotiate (all speeds) */ -#define ETH_LINK_SPEED_FIXED (1 << 0) /**< Disable autoneg (fixed speed) */ -#define ETH_LINK_SPEED_10M_HD (1 << 1) /**< 10 Mbps half-duplex */ -#define ETH_LINK_SPEED_10M (1 << 2) /**< 10 Mbps full-duplex */ -#define ETH_LINK_SPEED_100M_HD (1 << 3) /**< 100 Mbps half-duplex */ -#define ETH_LINK_SPEED_100M (1 << 4) /**< 100 Mbps full-duplex */ -#define ETH_LINK_SPEED_1G (1 << 5) /**< 1 Gbps */ -#define ETH_LINK_SPEED_2_5G (1 << 6) /**< 2.5 Gbps */ -#define ETH_LINK_SPEED_5G (1 << 7) /**< 5 Gbps */ -#define ETH_LINK_SPEED_10G (1 << 8) /**< 10 Gbps */ -#define ETH_LINK_SPEED_20G (1 << 9) /**< 20 Gbps */ -#define ETH_LINK_SPEED_25G (1 << 10) /**< 25 Gbps */ -#define ETH_LINK_SPEED_40G (1 << 11) /**< 40 Gbps */ -#define ETH_LINK_SPEED_50G (1 << 12) /**< 50 Gbps */ -#define ETH_LINK_SPEED_56G (1 << 13) /**< 56 Gbps */ -#define ETH_LINK_SPEED_100G (1 << 14) /**< 100 Gbps */ - -/** - * Ethernet numeric link speeds in Mbps - */ -#define ETH_SPEED_NUM_NONE 0 /**< Not defined */ -#define ETH_SPEED_NUM_10M 10 /**< 10 Mbps */ -#define ETH_SPEED_NUM_100M 100 /**< 100 Mbps */ -#define ETH_SPEED_NUM_1G 1000 /**< 1 Gbps */ -#define ETH_SPEED_NUM_2_5G 2500 /**< 2.5 Gbps */ -#define ETH_SPEED_NUM_5G 5000 /**< 5 Gbps */ -#define ETH_SPEED_NUM_10G 10000 /**< 10 Gbps */ -#define ETH_SPEED_NUM_20G 20000 /**< 20 Gbps */ -#define ETH_SPEED_NUM_25G 25000 /**< 25 Gbps */ -#define ETH_SPEED_NUM_40G 40000 /**< 40 Gbps */ -#define ETH_SPEED_NUM_50G 50000 /**< 50 Gbps */ -#define ETH_SPEED_NUM_56G 56000 /**< 56 Gbps */ -#define ETH_SPEED_NUM_100G 100000 /**< 100 Gbps */ - -/** - * A structure used to retrieve link-level information of an Ethernet port. - */ -__extension__ -struct rte_eth_link { - uint32_t link_speed; /**< ETH_SPEED_NUM_ */ - uint16_t link_duplex : 1; /**< ETH_LINK_[HALF/FULL]_DUPLEX */ - uint16_t link_autoneg : 1; /**< ETH_LINK_[AUTONEG/FIXED] */ - uint16_t link_status : 1; /**< ETH_LINK_[DOWN/UP] */ -} __attribute__((aligned(8))); /**< aligned for atomic64 read/write */ - -/* Utility constants */ -#define ETH_LINK_HALF_DUPLEX 0 /**< Half-duplex connection (see link_duplex). */ -#define ETH_LINK_FULL_DUPLEX 1 /**< Full-duplex connection (see link_duplex). */ -#define ETH_LINK_DOWN 0 /**< Link is down (see link_status). */ -#define ETH_LINK_UP 1 /**< Link is up (see link_status). */ -#define ETH_LINK_FIXED 0 /**< No autonegotiation (see link_autoneg). */ -#define ETH_LINK_AUTONEG 1 /**< Autonegotiated (see link_autoneg). */ - -/** - * A structure used to configure the ring threshold registers of an RX/TX - * queue for an Ethernet port. - */ -struct rte_eth_thresh { - uint8_t pthresh; /**< Ring prefetch threshold. */ - uint8_t hthresh; /**< Ring host threshold. */ - uint8_t wthresh; /**< Ring writeback threshold. */ -}; - -/** - * Simple flags are used for rte_eth_conf.rxmode.mq_mode. - */ -#define ETH_MQ_RX_RSS_FLAG 0x1 -#define ETH_MQ_RX_DCB_FLAG 0x2 -#define ETH_MQ_RX_VMDQ_FLAG 0x4 - -/** - * A set of values to identify what method is to be used to route - * packets to multiple queues. - */ -enum rte_eth_rx_mq_mode { - /** None of DCB,RSS or VMDQ mode */ - ETH_MQ_RX_NONE = 0, - - /** For RX side, only RSS is on */ - ETH_MQ_RX_RSS = ETH_MQ_RX_RSS_FLAG, - /** For RX side,only DCB is on. */ - ETH_MQ_RX_DCB = ETH_MQ_RX_DCB_FLAG, - /** Both DCB and RSS enable */ - ETH_MQ_RX_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG, - - /** Only VMDQ, no RSS nor DCB */ - ETH_MQ_RX_VMDQ_ONLY = ETH_MQ_RX_VMDQ_FLAG, - /** RSS mode with VMDQ */ - ETH_MQ_RX_VMDQ_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_VMDQ_FLAG, - /** Use VMDQ+DCB to route traffic to queues */ - ETH_MQ_RX_VMDQ_DCB = ETH_MQ_RX_VMDQ_FLAG | ETH_MQ_RX_DCB_FLAG, - /** Enable both VMDQ and DCB in VMDq */ - ETH_MQ_RX_VMDQ_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG | - ETH_MQ_RX_VMDQ_FLAG, -}; - -/** - * for rx mq mode backward compatible - */ -#define ETH_RSS ETH_MQ_RX_RSS -#define VMDQ_DCB ETH_MQ_RX_VMDQ_DCB -#define ETH_DCB_RX ETH_MQ_RX_DCB - -/** - * A set of values to identify what method is to be used to transmit - * packets using multi-TCs. - */ -enum rte_eth_tx_mq_mode { - ETH_MQ_TX_NONE = 0, /**< It is in neither DCB nor VT mode. */ - ETH_MQ_TX_DCB, /**< For TX side,only DCB is on. */ - ETH_MQ_TX_VMDQ_DCB, /**< For TX side,both DCB and VT is on. */ - ETH_MQ_TX_VMDQ_ONLY, /**< Only VT on, no DCB */ -}; - -/** - * for tx mq mode backward compatible - */ -#define ETH_DCB_NONE ETH_MQ_TX_NONE -#define ETH_VMDQ_DCB_TX ETH_MQ_TX_VMDQ_DCB -#define ETH_DCB_TX ETH_MQ_TX_DCB - -/** - * A structure used to configure the RX features of an Ethernet port. - */ -struct rte_eth_rxmode { - /** The multi-queue packet distribution mode to be used, e.g. RSS. */ - enum rte_eth_rx_mq_mode mq_mode; - uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */ - uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/ - /** - * Per-port Rx offloads to be set using DEV_RX_OFFLOAD_* flags. - * Only offloads set on rx_offload_capa field on rte_eth_dev_info - * structure are allowed to be set. - */ - uint64_t offloads; - __extension__ - /** - * Below bitfield API is obsolete. Application should - * enable per-port offloads using the offload field - * above. - */ - uint16_t header_split : 1, /**< Header Split enable. */ - hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */ - hw_vlan_filter : 1, /**< VLAN filter enable. */ - hw_vlan_strip : 1, /**< VLAN strip enable. */ - hw_vlan_extend : 1, /**< Extended VLAN enable. */ - jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */ - hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */ - enable_scatter : 1, /**< Enable scatter packets rx handler */ - enable_lro : 1, /**< Enable LRO */ - hw_timestamp : 1, /**< Enable HW timestamp */ - security : 1, /**< Enable rte_security offloads */ - /** - * When set the offload bitfield should be ignored. - * Instead per-port Rx offloads should be set on offloads - * field above. - * Per-queue offloads shuold be set on rte_eth_rxq_conf - * structure. - * This bit is temporary till rxmode bitfield offloads API will - * be deprecated. - */ - ignore_offload_bitfield : 1; -}; - -/** - * VLAN types to indicate if it is for single VLAN, inner VLAN or outer VLAN. - * Note that single VLAN is treated the same as inner VLAN. - */ -enum rte_vlan_type { - ETH_VLAN_TYPE_UNKNOWN = 0, - ETH_VLAN_TYPE_INNER, /**< Inner VLAN. */ - ETH_VLAN_TYPE_OUTER, /**< Single VLAN, or outer VLAN. */ - ETH_VLAN_TYPE_MAX, -}; - -/** - * A structure used to describe a vlan filter. - * If the bit corresponding to a VID is set, such VID is on. - */ -struct rte_vlan_filter_conf { - uint64_t ids[64]; -}; - -/** - * A structure used to configure the Receive Side Scaling (RSS) feature - * of an Ethernet port. - * If not NULL, the *rss_key* pointer of the *rss_conf* structure points - * to an array holding the RSS key to use for hashing specific header - * fields of received packets. The length of this array should be indicated - * by *rss_key_len* below. Otherwise, a default random hash key is used by - * the device driver. - * - * The *rss_key_len* field of the *rss_conf* structure indicates the length - * in bytes of the array pointed by *rss_key*. To be compatible, this length - * will be checked in i40e only. Others assume 40 bytes to be used as before. - * - * The *rss_hf* field of the *rss_conf* structure indicates the different - * types of IPv4/IPv6 packets to which the RSS hashing must be applied. - * Supplying an *rss_hf* equal to zero disables the RSS feature. - */ -struct rte_eth_rss_conf { - uint8_t *rss_key; /**< If not NULL, 40-byte hash key. */ - uint8_t rss_key_len; /**< hash key length in bytes. */ - uint64_t rss_hf; /**< Hash functions to apply - see below. */ -}; - -/* - * The RSS offload types are defined based on flow types which are defined - * in rte_eth_ctrl.h. Different NIC hardwares may support different RSS offload - * types. The supported flow types or RSS offload types can be queried by - * rte_eth_dev_info_get(). - */ -#define ETH_RSS_IPV4 (1ULL << RTE_ETH_FLOW_IPV4) -#define ETH_RSS_FRAG_IPV4 (1ULL << RTE_ETH_FLOW_FRAG_IPV4) -#define ETH_RSS_NONFRAG_IPV4_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_TCP) -#define ETH_RSS_NONFRAG_IPV4_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_UDP) -#define ETH_RSS_NONFRAG_IPV4_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_SCTP) -#define ETH_RSS_NONFRAG_IPV4_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) -#define ETH_RSS_IPV6 (1ULL << RTE_ETH_FLOW_IPV6) -#define ETH_RSS_FRAG_IPV6 (1ULL << RTE_ETH_FLOW_FRAG_IPV6) -#define ETH_RSS_NONFRAG_IPV6_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_TCP) -#define ETH_RSS_NONFRAG_IPV6_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_UDP) -#define ETH_RSS_NONFRAG_IPV6_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_SCTP) -#define ETH_RSS_NONFRAG_IPV6_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_OTHER) -#define ETH_RSS_L2_PAYLOAD (1ULL << RTE_ETH_FLOW_L2_PAYLOAD) -#define ETH_RSS_IPV6_EX (1ULL << RTE_ETH_FLOW_IPV6_EX) -#define ETH_RSS_IPV6_TCP_EX (1ULL << RTE_ETH_FLOW_IPV6_TCP_EX) -#define ETH_RSS_IPV6_UDP_EX (1ULL << RTE_ETH_FLOW_IPV6_UDP_EX) -#define ETH_RSS_PORT (1ULL << RTE_ETH_FLOW_PORT) -#define ETH_RSS_VXLAN (1ULL << RTE_ETH_FLOW_VXLAN) -#define ETH_RSS_GENEVE (1ULL << RTE_ETH_FLOW_GENEVE) -#define ETH_RSS_NVGRE (1ULL << RTE_ETH_FLOW_NVGRE) - -#define ETH_RSS_IP ( \ - ETH_RSS_IPV4 | \ - ETH_RSS_FRAG_IPV4 | \ - ETH_RSS_NONFRAG_IPV4_OTHER | \ - ETH_RSS_IPV6 | \ - ETH_RSS_FRAG_IPV6 | \ - ETH_RSS_NONFRAG_IPV6_OTHER | \ - ETH_RSS_IPV6_EX) - -#define ETH_RSS_UDP ( \ - ETH_RSS_NONFRAG_IPV4_UDP | \ - ETH_RSS_NONFRAG_IPV6_UDP | \ - ETH_RSS_IPV6_UDP_EX) - -#define ETH_RSS_TCP ( \ - ETH_RSS_NONFRAG_IPV4_TCP | \ - ETH_RSS_NONFRAG_IPV6_TCP | \ - ETH_RSS_IPV6_TCP_EX) - -#define ETH_RSS_SCTP ( \ - ETH_RSS_NONFRAG_IPV4_SCTP | \ - ETH_RSS_NONFRAG_IPV6_SCTP) - -#define ETH_RSS_TUNNEL ( \ - ETH_RSS_VXLAN | \ - ETH_RSS_GENEVE | \ - ETH_RSS_NVGRE) - -/**< Mask of valid RSS hash protocols */ -#define ETH_RSS_PROTO_MASK ( \ - ETH_RSS_IPV4 | \ - ETH_RSS_FRAG_IPV4 | \ - ETH_RSS_NONFRAG_IPV4_TCP | \ - ETH_RSS_NONFRAG_IPV4_UDP | \ - ETH_RSS_NONFRAG_IPV4_SCTP | \ - ETH_RSS_NONFRAG_IPV4_OTHER | \ - ETH_RSS_IPV6 | \ - ETH_RSS_FRAG_IPV6 | \ - ETH_RSS_NONFRAG_IPV6_TCP | \ - ETH_RSS_NONFRAG_IPV6_UDP | \ - ETH_RSS_NONFRAG_IPV6_SCTP | \ - ETH_RSS_NONFRAG_IPV6_OTHER | \ - ETH_RSS_L2_PAYLOAD | \ - ETH_RSS_IPV6_EX | \ - ETH_RSS_IPV6_TCP_EX | \ - ETH_RSS_IPV6_UDP_EX | \ - ETH_RSS_PORT | \ - ETH_RSS_VXLAN | \ - ETH_RSS_GENEVE | \ - ETH_RSS_NVGRE) - -/* - * Definitions used for redirection table entry size. - * Some RSS RETA sizes may not be supported by some drivers, check the - * documentation or the description of relevant functions for more details. - */ -#define ETH_RSS_RETA_SIZE_64 64 -#define ETH_RSS_RETA_SIZE_128 128 -#define ETH_RSS_RETA_SIZE_256 256 -#define ETH_RSS_RETA_SIZE_512 512 -#define RTE_RETA_GROUP_SIZE 64 - -/* Definitions used for VMDQ and DCB functionality */ -#define ETH_VMDQ_MAX_VLAN_FILTERS 64 /**< Maximum nb. of VMDQ vlan filters. */ -#define ETH_DCB_NUM_USER_PRIORITIES 8 /**< Maximum nb. of DCB priorities. */ -#define ETH_VMDQ_DCB_NUM_QUEUES 128 /**< Maximum nb. of VMDQ DCB queues. */ -#define ETH_DCB_NUM_QUEUES 128 /**< Maximum nb. of DCB queues. */ - -/* DCB capability defines */ -#define ETH_DCB_PG_SUPPORT 0x00000001 /**< Priority Group(ETS) support. */ -#define ETH_DCB_PFC_SUPPORT 0x00000002 /**< Priority Flow Control support. */ - -/* Definitions used for VLAN Offload functionality */ -#define ETH_VLAN_STRIP_OFFLOAD 0x0001 /**< VLAN Strip On/Off */ -#define ETH_VLAN_FILTER_OFFLOAD 0x0002 /**< VLAN Filter On/Off */ -#define ETH_VLAN_EXTEND_OFFLOAD 0x0004 /**< VLAN Extend On/Off */ - -/* Definitions used for mask VLAN setting */ -#define ETH_VLAN_STRIP_MASK 0x0001 /**< VLAN Strip setting mask */ -#define ETH_VLAN_FILTER_MASK 0x0002 /**< VLAN Filter setting mask*/ -#define ETH_VLAN_EXTEND_MASK 0x0004 /**< VLAN Extend setting mask*/ -#define ETH_VLAN_ID_MAX 0x0FFF /**< VLAN ID is in lower 12 bits*/ - -/* Definitions used for receive MAC address */ -#define ETH_NUM_RECEIVE_MAC_ADDR 128 /**< Maximum nb. of receive mac addr. */ - -/* Definitions used for unicast hash */ -#define ETH_VMDQ_NUM_UC_HASH_ARRAY 128 /**< Maximum nb. of UC hash array. */ - -/* Definitions used for VMDQ pool rx mode setting */ -#define ETH_VMDQ_ACCEPT_UNTAG 0x0001 /**< accept untagged packets. */ -#define ETH_VMDQ_ACCEPT_HASH_MC 0x0002 /**< accept packets in multicast table . */ -#define ETH_VMDQ_ACCEPT_HASH_UC 0x0004 /**< accept packets in unicast table. */ -#define ETH_VMDQ_ACCEPT_BROADCAST 0x0008 /**< accept broadcast packets. */ -#define ETH_VMDQ_ACCEPT_MULTICAST 0x0010 /**< multicast promiscuous. */ - -/** Maximum nb. of vlan per mirror rule */ -#define ETH_MIRROR_MAX_VLANS 64 - -#define ETH_MIRROR_VIRTUAL_POOL_UP 0x01 /**< Virtual Pool uplink Mirroring. */ -#define ETH_MIRROR_UPLINK_PORT 0x02 /**< Uplink Port Mirroring. */ -#define ETH_MIRROR_DOWNLINK_PORT 0x04 /**< Downlink Port Mirroring. */ -#define ETH_MIRROR_VLAN 0x08 /**< VLAN Mirroring. */ -#define ETH_MIRROR_VIRTUAL_POOL_DOWN 0x10 /**< Virtual Pool downlink Mirroring. */ - -/** - * A structure used to configure VLAN traffic mirror of an Ethernet port. - */ -struct rte_eth_vlan_mirror { - uint64_t vlan_mask; /**< mask for valid VLAN ID. */ - /** VLAN ID list for vlan mirroring. */ - uint16_t vlan_id[ETH_MIRROR_MAX_VLANS]; -}; - -/** - * A structure used to configure traffic mirror of an Ethernet port. - */ -struct rte_eth_mirror_conf { - uint8_t rule_type; /**< Mirroring rule type */ - uint8_t dst_pool; /**< Destination pool for this mirror rule. */ - uint64_t pool_mask; /**< Bitmap of pool for pool mirroring */ - /** VLAN ID setting for VLAN mirroring. */ - struct rte_eth_vlan_mirror vlan; -}; - -/** - * A structure used to configure 64 entries of Redirection Table of the - * Receive Side Scaling (RSS) feature of an Ethernet port. To configure - * more than 64 entries supported by hardware, an array of this structure - * is needed. - */ -struct rte_eth_rss_reta_entry64 { - uint64_t mask; - /**< Mask bits indicate which entries need to be updated/queried. */ - uint16_t reta[RTE_RETA_GROUP_SIZE]; - /**< Group of 64 redirection table entries. */ -}; - -/** - * This enum indicates the possible number of traffic classes - * in DCB configurations - */ -enum rte_eth_nb_tcs { - ETH_4_TCS = 4, /**< 4 TCs with DCB. */ - ETH_8_TCS = 8 /**< 8 TCs with DCB. */ -}; - -/** - * This enum indicates the possible number of queue pools - * in VMDQ configurations. - */ -enum rte_eth_nb_pools { - ETH_8_POOLS = 8, /**< 8 VMDq pools. */ - ETH_16_POOLS = 16, /**< 16 VMDq pools. */ - ETH_32_POOLS = 32, /**< 32 VMDq pools. */ - ETH_64_POOLS = 64 /**< 64 VMDq pools. */ -}; - -/* This structure may be extended in future. */ -struct rte_eth_dcb_rx_conf { - enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs */ - /** Traffic class each UP mapped to. */ - uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; -}; - -struct rte_eth_vmdq_dcb_tx_conf { - enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools. */ - /** Traffic class each UP mapped to. */ - uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; -}; - -struct rte_eth_dcb_tx_conf { - enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs. */ - /** Traffic class each UP mapped to. */ - uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; -}; - -struct rte_eth_vmdq_tx_conf { - enum rte_eth_nb_pools nb_queue_pools; /**< VMDq mode, 64 pools. */ -}; - -/** - * A structure used to configure the VMDQ+DCB feature - * of an Ethernet port. - * - * Using this feature, packets are routed to a pool of queues, based - * on the vlan id in the vlan tag, and then to a specific queue within - * that pool, using the user priority vlan tag field. - * - * A default pool may be used, if desired, to route all traffic which - * does not match the vlan filter rules. - */ -struct rte_eth_vmdq_dcb_conf { - enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools */ - uint8_t enable_default_pool; /**< If non-zero, use a default pool */ - uint8_t default_pool; /**< The default pool, if applicable */ - uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ - struct { - uint16_t vlan_id; /**< The vlan id of the received frame */ - uint64_t pools; /**< Bitmask of pools for packet rx */ - } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ - uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; - /**< Selects a queue in a pool */ -}; - -/** - * A structure used to configure the VMDQ feature of an Ethernet port when - * not combined with the DCB feature. - * - * Using this feature, packets are routed to a pool of queues. By default, - * the pool selection is based on the MAC address, the vlan id in the - * vlan tag as specified in the pool_map array. - * Passing the ETH_VMDQ_ACCEPT_UNTAG in the rx_mode field allows pool - * selection using only the MAC address. MAC address to pool mapping is done - * using the rte_eth_dev_mac_addr_add function, with the pool parameter - * corresponding to the pool id. - * - * Queue selection within the selected pool will be done using RSS when - * it is enabled or revert to the first queue of the pool if not. - * - * A default pool may be used, if desired, to route all traffic which - * does not match the vlan filter rules or any pool MAC address. - */ -struct rte_eth_vmdq_rx_conf { - enum rte_eth_nb_pools nb_queue_pools; /**< VMDq only mode, 8 or 64 pools */ - uint8_t enable_default_pool; /**< If non-zero, use a default pool */ - uint8_t default_pool; /**< The default pool, if applicable */ - uint8_t enable_loop_back; /**< Enable VT loop back */ - uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ - uint32_t rx_mode; /**< Flags from ETH_VMDQ_ACCEPT_* */ - struct { - uint16_t vlan_id; /**< The vlan id of the received frame */ - uint64_t pools; /**< Bitmask of pools for packet rx */ - } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ -}; - -/** - * A structure used to configure the TX features of an Ethernet port. - */ -struct rte_eth_txmode { - enum rte_eth_tx_mq_mode mq_mode; /**< TX multi-queues mode. */ - /** - * Per-port Tx offloads to be set using DEV_TX_OFFLOAD_* flags. - * Only offloads set on tx_offload_capa field on rte_eth_dev_info - * structure are allowed to be set. - */ - uint64_t offloads; - - /* For i40e specifically */ - uint16_t pvid; - __extension__ - uint8_t hw_vlan_reject_tagged : 1, - /**< If set, reject sending out tagged pkts */ - hw_vlan_reject_untagged : 1, - /**< If set, reject sending out untagged pkts */ - hw_vlan_insert_pvid : 1; - /**< If set, enable port based VLAN insertion */ -}; - -/** - * A structure used to configure an RX ring of an Ethernet port. - */ -struct rte_eth_rxconf { - struct rte_eth_thresh rx_thresh; /**< RX ring threshold registers. */ - uint16_t rx_free_thresh; /**< Drives the freeing of RX descriptors. */ - uint8_t rx_drop_en; /**< Drop packets if no descriptors are available. */ - uint8_t rx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ - /** - * Per-queue Rx offloads to be set using DEV_RX_OFFLOAD_* flags. - * Only offloads set on rx_queue_offload_capa or rx_offload_capa - * fields on rte_eth_dev_info structure are allowed to be set. - */ - uint64_t offloads; -}; - -#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */ -#define ETH_TXQ_FLAGS_NOREFCOUNT 0x0002 /**< refcnt can be ignored */ -#define ETH_TXQ_FLAGS_NOMULTMEMP 0x0004 /**< all bufs come from same mempool */ -#define ETH_TXQ_FLAGS_NOVLANOFFL 0x0100 /**< disable VLAN offload */ -#define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */ -#define ETH_TXQ_FLAGS_NOXSUMUDP 0x0400 /**< disable UDP checksum offload */ -#define ETH_TXQ_FLAGS_NOXSUMTCP 0x0800 /**< disable TCP checksum offload */ -#define ETH_TXQ_FLAGS_NOOFFLOADS \ - (ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \ - ETH_TXQ_FLAGS_NOXSUMUDP | ETH_TXQ_FLAGS_NOXSUMTCP) -#define ETH_TXQ_FLAGS_NOXSUMS \ - (ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \ - ETH_TXQ_FLAGS_NOXSUMTCP) -/** - * When set the txq_flags should be ignored, - * instead per-queue Tx offloads will be set on offloads field - * located on rte_eth_txq_conf struct. - * This flag is temporary till the rte_eth_txq_conf.txq_flags - * API will be deprecated. - */ -#define ETH_TXQ_FLAGS_IGNORE 0x8000 - -/** - * A structure used to configure a TX ring of an Ethernet port. - */ -struct rte_eth_txconf { - struct rte_eth_thresh tx_thresh; /**< TX ring threshold registers. */ - uint16_t tx_rs_thresh; /**< Drives the setting of RS bit on TXDs. */ - uint16_t tx_free_thresh; /**< Start freeing TX buffers if there are - less free descriptors than this value. */ - - uint32_t txq_flags; /**< Set flags for the Tx queue */ - uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ - /** - * Per-queue Tx offloads to be set using DEV_TX_OFFLOAD_* flags. - * Only offloads set on tx_queue_offload_capa or tx_offload_capa - * fields on rte_eth_dev_info structure are allowed to be set. - */ - uint64_t offloads; -}; - -/** - * A structure contains information about HW descriptor ring limitations. - */ -struct rte_eth_desc_lim { - uint16_t nb_max; /**< Max allowed number of descriptors. */ - uint16_t nb_min; /**< Min allowed number of descriptors. */ - uint16_t nb_align; /**< Number of descriptors should be aligned to. */ - - /** - * Max allowed number of segments per whole packet. - * - * - For TSO packet this is the total number of data descriptors allowed - * by device. - * - * @see nb_mtu_seg_max - */ - uint16_t nb_seg_max; - - /** - * Max number of segments per one MTU. - * - * - For non-TSO packet, this is the maximum allowed number of segments - * in a single transmit packet. - * - * - For TSO packet each segment within the TSO may span up to this - * value. - * - * @see nb_seg_max - */ - uint16_t nb_mtu_seg_max; -}; - -/** - * This enum indicates the flow control mode - */ -enum rte_eth_fc_mode { - RTE_FC_NONE = 0, /**< Disable flow control. */ - RTE_FC_RX_PAUSE, /**< RX pause frame, enable flowctrl on TX side. */ - RTE_FC_TX_PAUSE, /**< TX pause frame, enable flowctrl on RX side. */ - RTE_FC_FULL /**< Enable flow control on both side. */ -}; - -/** - * A structure used to configure Ethernet flow control parameter. - * These parameters will be configured into the register of the NIC. - * Please refer to the corresponding data sheet for proper value. - */ -struct rte_eth_fc_conf { - uint32_t high_water; /**< High threshold value to trigger XOFF */ - uint32_t low_water; /**< Low threshold value to trigger XON */ - uint16_t pause_time; /**< Pause quota in the Pause frame */ - uint16_t send_xon; /**< Is XON frame need be sent */ - enum rte_eth_fc_mode mode; /**< Link flow control mode */ - uint8_t mac_ctrl_frame_fwd; /**< Forward MAC control frames */ - uint8_t autoneg; /**< Use Pause autoneg */ -}; - -/** - * A structure used to configure Ethernet priority flow control parameter. - * These parameters will be configured into the register of the NIC. - * Please refer to the corresponding data sheet for proper value. - */ -struct rte_eth_pfc_conf { - struct rte_eth_fc_conf fc; /**< General flow control parameter. */ - uint8_t priority; /**< VLAN User Priority. */ -}; - -/** - * Memory space that can be configured to store Flow Director filters - * in the board memory. - */ -enum rte_fdir_pballoc_type { - RTE_FDIR_PBALLOC_64K = 0, /**< 64k. */ - RTE_FDIR_PBALLOC_128K, /**< 128k. */ - RTE_FDIR_PBALLOC_256K, /**< 256k. */ -}; - -/** - * Select report mode of FDIR hash information in RX descriptors. - */ -enum rte_fdir_status_mode { - RTE_FDIR_NO_REPORT_STATUS = 0, /**< Never report FDIR hash. */ - RTE_FDIR_REPORT_STATUS, /**< Only report FDIR hash for matching pkts. */ - RTE_FDIR_REPORT_STATUS_ALWAYS, /**< Always report FDIR hash. */ -}; - -/** - * A structure used to configure the Flow Director (FDIR) feature - * of an Ethernet port. - * - * If mode is RTE_FDIR_DISABLE, the pballoc value is ignored. - */ -struct rte_fdir_conf { - enum rte_fdir_mode mode; /**< Flow Director mode. */ - enum rte_fdir_pballoc_type pballoc; /**< Space for FDIR filters. */ - enum rte_fdir_status_mode status; /**< How to report FDIR hash. */ - /** RX queue of packets matching a "drop" filter in perfect mode. */ - uint8_t drop_queue; - struct rte_eth_fdir_masks mask; - struct rte_eth_fdir_flex_conf flex_conf; - /**< Flex payload configuration. */ -}; - -/** - * UDP tunneling configuration. - * Used to config the UDP port for a type of tunnel. - * NICs need the UDP port to identify the tunnel type. - * Normally a type of tunnel has a default UDP port, this structure can be used - * in case if the users want to change or support more UDP port. - */ -struct rte_eth_udp_tunnel { - uint16_t udp_port; /**< UDP port used for the tunnel. */ - uint8_t prot_type; /**< Tunnel type. Defined in rte_eth_tunnel_type. */ -}; - -/** - * A structure used to enable/disable specific device interrupts. - */ -struct rte_intr_conf { - /** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */ - uint32_t lsc:1; - /** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */ - uint32_t rxq:1; - /** enable/disable rmv interrupt. 0 (default) - disable, 1 enable */ - uint32_t rmv:1; -}; - -/** - * A structure used to configure an Ethernet port. - * Depending upon the RX multi-queue mode, extra advanced - * configuration settings may be needed. - */ -struct rte_eth_conf { - uint32_t link_speeds; /**< bitmap of ETH_LINK_SPEED_XXX of speeds to be - used. ETH_LINK_SPEED_FIXED disables link - autonegotiation, and a unique speed shall be - set. Otherwise, the bitmap defines the set of - speeds to be advertised. If the special value - ETH_LINK_SPEED_AUTONEG (0) is used, all speeds - supported are advertised. */ - struct rte_eth_rxmode rxmode; /**< Port RX configuration. */ - struct rte_eth_txmode txmode; /**< Port TX configuration. */ - uint32_t lpbk_mode; /**< Loopback operation mode. By default the value - is 0, meaning the loopback mode is disabled. - Read the datasheet of given ethernet controller - for details. The possible values of this field - are defined in implementation of each driver. */ - struct { - struct rte_eth_rss_conf rss_conf; /**< Port RSS configuration */ - struct rte_eth_vmdq_dcb_conf vmdq_dcb_conf; - /**< Port vmdq+dcb configuration. */ - struct rte_eth_dcb_rx_conf dcb_rx_conf; - /**< Port dcb RX configuration. */ - struct rte_eth_vmdq_rx_conf vmdq_rx_conf; - /**< Port vmdq RX configuration. */ - } rx_adv_conf; /**< Port RX filtering configuration. */ - union { - struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf; - /**< Port vmdq+dcb TX configuration. */ - struct rte_eth_dcb_tx_conf dcb_tx_conf; - /**< Port dcb TX configuration. */ - struct rte_eth_vmdq_tx_conf vmdq_tx_conf; - /**< Port vmdq TX configuration. */ - } tx_adv_conf; /**< Port TX DCB configuration (union). */ - /** Currently,Priority Flow Control(PFC) are supported,if DCB with PFC - is needed,and the variable must be set ETH_DCB_PFC_SUPPORT. */ - uint32_t dcb_capability_en; - struct rte_fdir_conf fdir_conf; /**< FDIR configuration. */ - struct rte_intr_conf intr_conf; /**< Interrupt mode configuration. */ -}; - -/** - * A structure used to retrieve the contextual information of - * an Ethernet device, such as the controlling driver of the device, - * its PCI context, etc... - */ - -/** - * RX offload capabilities of a device. - */ -#define DEV_RX_OFFLOAD_VLAN_STRIP 0x00000001 -#define DEV_RX_OFFLOAD_IPV4_CKSUM 0x00000002 -#define DEV_RX_OFFLOAD_UDP_CKSUM 0x00000004 -#define DEV_RX_OFFLOAD_TCP_CKSUM 0x00000008 -#define DEV_RX_OFFLOAD_TCP_LRO 0x00000010 -#define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020 -#define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040 -#define DEV_RX_OFFLOAD_MACSEC_STRIP 0x00000080 -#define DEV_RX_OFFLOAD_HEADER_SPLIT 0x00000100 -#define DEV_RX_OFFLOAD_VLAN_FILTER 0x00000200 -#define DEV_RX_OFFLOAD_VLAN_EXTEND 0x00000400 -#define DEV_RX_OFFLOAD_JUMBO_FRAME 0x00000800 -#define DEV_RX_OFFLOAD_CRC_STRIP 0x00001000 -#define DEV_RX_OFFLOAD_SCATTER 0x00002000 -#define DEV_RX_OFFLOAD_TIMESTAMP 0x00004000 -#define DEV_RX_OFFLOAD_SECURITY 0x00008000 -#define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \ - DEV_RX_OFFLOAD_UDP_CKSUM | \ - DEV_RX_OFFLOAD_TCP_CKSUM) -#define DEV_RX_OFFLOAD_VLAN (DEV_RX_OFFLOAD_VLAN_STRIP | \ - DEV_RX_OFFLOAD_VLAN_FILTER | \ - DEV_RX_OFFLOAD_VLAN_EXTEND) - -/* - * If new Rx offload capabilities are defined, they also must be - * mentioned in rte_rx_offload_names in rte_ethdev.c file. - */ - -/** - * TX offload capabilities of a device. - */ -#define DEV_TX_OFFLOAD_VLAN_INSERT 0x00000001 -#define DEV_TX_OFFLOAD_IPV4_CKSUM 0x00000002 -#define DEV_TX_OFFLOAD_UDP_CKSUM 0x00000004 -#define DEV_TX_OFFLOAD_TCP_CKSUM 0x00000008 -#define DEV_TX_OFFLOAD_SCTP_CKSUM 0x00000010 -#define DEV_TX_OFFLOAD_TCP_TSO 0x00000020 -#define DEV_TX_OFFLOAD_UDP_TSO 0x00000040 -#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */ -#define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100 -#define DEV_TX_OFFLOAD_VXLAN_TNL_TSO 0x00000200 /**< Used for tunneling packet. */ -#define DEV_TX_OFFLOAD_GRE_TNL_TSO 0x00000400 /**< Used for tunneling packet. */ -#define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */ -#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */ -#define DEV_TX_OFFLOAD_MACSEC_INSERT 0x00002000 -#define DEV_TX_OFFLOAD_MT_LOCKFREE 0x00004000 -/**< Multiple threads can invoke rte_eth_tx_burst() concurrently on the same - * tx queue without SW lock. - */ -#define DEV_TX_OFFLOAD_MULTI_SEGS 0x00008000 -/**< Device supports multi segment send. */ -#define DEV_TX_OFFLOAD_MBUF_FAST_FREE 0x00010000 -/**< Device supports optimization for fast release of mbufs. - * When set application must guarantee that per-queue all mbufs comes from - * the same mempool and has refcnt = 1. - */ -#define DEV_TX_OFFLOAD_SECURITY 0x00020000 - -/* - * If new Tx offload capabilities are defined, they also must be - * mentioned in rte_tx_offload_names in rte_ethdev.c file. - */ - -struct rte_pci_device; - -/** - * Ethernet device information - */ -struct rte_eth_dev_info { - struct rte_pci_device *pci_dev; /**< Device PCI information. */ - const char *driver_name; /**< Device Driver name. */ - unsigned int if_index; /**< Index to bound host interface, or 0 if none. - Use if_indextoname() to translate into an interface name. */ - uint32_t min_rx_bufsize; /**< Minimum size of RX buffer. */ - uint32_t max_rx_pktlen; /**< Maximum configurable length of RX pkt. */ - uint16_t max_rx_queues; /**< Maximum number of RX queues. */ - uint16_t max_tx_queues; /**< Maximum number of TX queues. */ - uint32_t max_mac_addrs; /**< Maximum number of MAC addresses. */ - uint32_t max_hash_mac_addrs; - /** Maximum number of hash MAC addresses for MTA and UTA. */ - uint16_t max_vfs; /**< Maximum number of VFs. */ - uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */ - uint64_t rx_offload_capa; - /**< Device per port RX offload capabilities. */ - uint64_t tx_offload_capa; - /**< Device per port TX offload capabilities. */ - uint64_t rx_queue_offload_capa; - /**< Device per queue RX offload capabilities. */ - uint64_t tx_queue_offload_capa; - /**< Device per queue TX offload capabilities. */ - uint16_t reta_size; - /**< Device redirection table size, the total number of entries. */ - uint8_t hash_key_size; /**< Hash key size in bytes */ - /** Bit mask of RSS offloads, the bit offset also means flow type */ - uint64_t flow_type_rss_offloads; - struct rte_eth_rxconf default_rxconf; /**< Default RX configuration */ - struct rte_eth_txconf default_txconf; /**< Default TX configuration */ - uint16_t vmdq_queue_base; /**< First queue ID for VMDQ pools. */ - uint16_t vmdq_queue_num; /**< Queue number for VMDQ pools. */ - uint16_t vmdq_pool_base; /**< First ID of VMDQ pools. */ - struct rte_eth_desc_lim rx_desc_lim; /**< RX descriptors limits */ - struct rte_eth_desc_lim tx_desc_lim; /**< TX descriptors limits */ - uint32_t speed_capa; /**< Supported speeds bitmap (ETH_LINK_SPEED_). */ - /** Configured number of rx/tx queues */ - uint16_t nb_rx_queues; /**< Number of RX queues. */ - uint16_t nb_tx_queues; /**< Number of TX queues. */ -}; - -/** - * Ethernet device RX queue information structure. - * Used to retieve information about configured queue. - */ -struct rte_eth_rxq_info { - struct rte_mempool *mp; /**< mempool used by that queue. */ - struct rte_eth_rxconf conf; /**< queue config parameters. */ - uint8_t scattered_rx; /**< scattered packets RX supported. */ - uint16_t nb_desc; /**< configured number of RXDs. */ -} __rte_cache_min_aligned; - -/** - * Ethernet device TX queue information structure. - * Used to retrieve information about configured queue. - */ -struct rte_eth_txq_info { - struct rte_eth_txconf conf; /**< queue config parameters. */ - uint16_t nb_desc; /**< configured number of TXDs. */ -} __rte_cache_min_aligned; - -/** Maximum name length for extended statistics counters */ -#define RTE_ETH_XSTATS_NAME_SIZE 64 - -/** - * An Ethernet device extended statistic structure - * - * This structure is used by rte_eth_xstats_get() to provide - * statistics that are not provided in the generic *rte_eth_stats* - * structure. - * It maps a name id, corresponding to an index in the array returned - * by rte_eth_xstats_get_names(), to a statistic value. - */ -struct rte_eth_xstat { - uint64_t id; /**< The index in xstats name array. */ - uint64_t value; /**< The statistic counter value. */ -}; - -/** - * A name element for extended statistics. - * - * An array of this structure is returned by rte_eth_xstats_get_names(). - * It lists the names of extended statistics for a PMD. The *rte_eth_xstat* - * structure references these names by their array index. - */ -struct rte_eth_xstat_name { - char name[RTE_ETH_XSTATS_NAME_SIZE]; /**< The statistic name. */ -}; - -#define ETH_DCB_NUM_TCS 8 -#define ETH_MAX_VMDQ_POOL 64 - -/** - * A structure used to get the information of queue and - * TC mapping on both TX and RX paths. - */ -struct rte_eth_dcb_tc_queue_mapping { - /** rx queues assigned to tc per Pool */ - struct { - uint8_t base; - uint8_t nb_queue; - } tc_rxq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; - /** rx queues assigned to tc per Pool */ - struct { - uint8_t base; - uint8_t nb_queue; - } tc_txq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; -}; - -/** - * A structure used to get the information of DCB. - * It includes TC UP mapping and queue TC mapping. - */ -struct rte_eth_dcb_info { - uint8_t nb_tcs; /**< number of TCs */ - uint8_t prio_tc[ETH_DCB_NUM_USER_PRIORITIES]; /**< Priority to tc */ - uint8_t tc_bws[ETH_DCB_NUM_TCS]; /**< TX BW percentage for each TC */ - /** rx queues assigned to tc */ - struct rte_eth_dcb_tc_queue_mapping tc_queue; -}; - -/** - * RX/TX queue states - */ -#define RTE_ETH_QUEUE_STATE_STOPPED 0 -#define RTE_ETH_QUEUE_STATE_STARTED 1 - -struct rte_eth_dev; - -#define RTE_ETH_ALL RTE_MAX_ETHPORTS - -/* Macros to check for valid port */ -#define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \ - if (!rte_eth_dev_is_valid_port(port_id)) { \ - RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \ - return retval; \ - } \ -} while (0) - -#define RTE_ETH_VALID_PORTID_OR_RET(port_id) do { \ - if (!rte_eth_dev_is_valid_port(port_id)) { \ - RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \ - return; \ - } \ -} while (0) - -/** - * l2 tunnel configuration. - */ - -/**< l2 tunnel enable mask */ -#define ETH_L2_TUNNEL_ENABLE_MASK 0x00000001 -/**< l2 tunnel insertion mask */ -#define ETH_L2_TUNNEL_INSERTION_MASK 0x00000002 -/**< l2 tunnel stripping mask */ -#define ETH_L2_TUNNEL_STRIPPING_MASK 0x00000004 -/**< l2 tunnel forwarding mask */ -#define ETH_L2_TUNNEL_FORWARDING_MASK 0x00000008 - -/** - * Function type used for RX packet processing packet callbacks. - * - * The callback function is called on RX with a burst of packets that have - * been received on the given port and queue. - * - * @param port_id - * The Ethernet port on which RX is being performed. - * @param queue - * The queue on the Ethernet port which is being used to receive the packets. - * @param pkts - * The burst of packets that have just been received. - * @param nb_pkts - * The number of packets in the burst pointed to by "pkts". - * @param max_pkts - * The max number of packets that can be stored in the "pkts" array. - * @param user_param - * The arbitrary user parameter passed in by the application when the callback - * was originally configured. - * @return - * The number of packets returned to the user. - */ -typedef uint16_t (*rte_rx_callback_fn)(uint16_t port_id, uint16_t queue, - struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t max_pkts, - void *user_param); - -/** - * Function type used for TX packet processing packet callbacks. - * - * The callback function is called on TX with a burst of packets immediately - * before the packets are put onto the hardware queue for transmission. - * - * @param port_id - * The Ethernet port on which TX is being performed. - * @param queue - * The queue on the Ethernet port which is being used to transmit the packets. - * @param pkts - * The burst of packets that are about to be transmitted. - * @param nb_pkts - * The number of packets in the burst pointed to by "pkts". - * @param user_param - * The arbitrary user parameter passed in by the application when the callback - * was originally configured. - * @return - * The number of packets to be written to the NIC. - */ -typedef uint16_t (*rte_tx_callback_fn)(uint16_t port_id, uint16_t queue, - struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param); - -/** - * A set of values to describe the possible states of an eth device. - */ -enum rte_eth_dev_state { - RTE_ETH_DEV_UNUSED = 0, - RTE_ETH_DEV_ATTACHED, - RTE_ETH_DEV_DEFERRED, - RTE_ETH_DEV_REMOVED, -}; - -struct rte_eth_dev_sriov { - uint8_t active; /**< SRIOV is active with 16, 32 or 64 pools */ - uint8_t nb_q_per_pool; /**< rx queue number per pool */ - uint16_t def_vmdq_idx; /**< Default pool num used for PF */ - uint16_t def_pool_q_idx; /**< Default pool queue start reg index */ -}; -#define RTE_ETH_DEV_SRIOV(dev) ((dev)->data->sriov) - -#define RTE_ETH_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN - -#define RTE_ETH_DEV_NO_OWNER 0 - -#define RTE_ETH_MAX_OWNER_NAME_LEN 64 - -struct rte_eth_dev_owner { - uint64_t id; /**< The owner unique identifier. */ - char name[RTE_ETH_MAX_OWNER_NAME_LEN]; /**< The owner name. */ -}; - -/** Device supports link state interrupt */ -#define RTE_ETH_DEV_INTR_LSC 0x0002 -/** Device is a bonded slave */ -#define RTE_ETH_DEV_BONDED_SLAVE 0x0004 -/** Device supports device removal interrupt */ -#define RTE_ETH_DEV_INTR_RMV 0x0008 - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * - * Iterates over valid ethdev ports owned by a specific owner. - * - * @param port_id - * The id of the next possible valid owned port. - * @param owner_id - * The owner identifier. - * RTE_ETH_DEV_NO_OWNER means iterate over all valid ownerless ports. - * @return - * Next valid port id owned by owner_id, RTE_MAX_ETHPORTS if there is none. - */ -uint64_t __rte_experimental rte_eth_find_next_owned_by(uint16_t port_id, - const uint64_t owner_id); - -/** - * Macro to iterate over all enabled ethdev ports owned by a specific owner. - */ -#define RTE_ETH_FOREACH_DEV_OWNED_BY(p, o) \ - for (p = rte_eth_find_next_owned_by(0, o); \ - (unsigned int)p < (unsigned int)RTE_MAX_ETHPORTS; \ - p = rte_eth_find_next_owned_by(p + 1, o)) - -/** - * Iterates over valid ethdev ports. - * - * @param port_id - * The id of the next possible valid port. - * @return - * Next valid port id, RTE_MAX_ETHPORTS if there is none. - */ -uint16_t rte_eth_find_next(uint16_t port_id); - -/** - * Macro to iterate over all enabled and ownerless ethdev ports. - */ -#define RTE_ETH_FOREACH_DEV(p) \ - RTE_ETH_FOREACH_DEV_OWNED_BY(p, RTE_ETH_DEV_NO_OWNER) - - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * - * Get a new unique owner identifier. - * An owner identifier is used to owns Ethernet devices by only one DPDK entity - * to avoid multiple management of device by different entities. - * - * @param owner_id - * Owner identifier pointer. - * @return - * Negative errno value on error, 0 on success. - */ -int __rte_experimental rte_eth_dev_owner_new(uint64_t *owner_id); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * - * Set an Ethernet device owner. - * - * @param port_id - * The identifier of the port to own. - * @param owner - * The owner pointer. - * @return - * Negative errno value on error, 0 on success. - */ -int __rte_experimental rte_eth_dev_owner_set(const uint16_t port_id, - const struct rte_eth_dev_owner *owner); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * - * Unset Ethernet device owner to make the device ownerless. - * - * @param port_id - * The identifier of port to make ownerless. - * @param owner_id - * The owner identifier. - * @return - * 0 on success, negative errno value on error. - */ -int __rte_experimental rte_eth_dev_owner_unset(const uint16_t port_id, - const uint64_t owner_id); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * - * Remove owner from all Ethernet devices owned by a specific owner. - * - * @param owner_id - * The owner identifier. - */ -void __rte_experimental rte_eth_dev_owner_delete(const uint64_t owner_id); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * - * Get the owner of an Ethernet device. - * - * @param port_id - * The port identifier. - * @param owner - * The owner structure pointer to fill. - * @return - * 0 on success, negative errno value on error.. - */ -int __rte_experimental rte_eth_dev_owner_get(const uint16_t port_id, - struct rte_eth_dev_owner *owner); - -/** - * Get the total number of Ethernet devices that have been successfully - * initialized by the matching Ethernet driver during the PCI probing phase - * and that are available for applications to use. These devices must be - * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with - * non-contiguous ranges of devices. - * These non-contiguous ranges can be created by calls to hotplug functions or - * by some PMDs. - * - * @return - * - The total number of usable Ethernet devices. - */ -uint16_t rte_eth_dev_count(void); - -/** - * Attach a new Ethernet device specified by arguments. - * - * @param devargs - * A pointer to a strings array describing the new device - * to be attached. The strings should be a pci address like - * '0000:01:00.0' or virtual device name like 'net_pcap0'. - * @param port_id - * A pointer to a port identifier actually attached. - * @return - * 0 on success and port_id is filled, negative on error - */ -int rte_eth_dev_attach(const char *devargs, uint16_t *port_id); - -/** - * Detach a Ethernet device specified by port identifier. - * This function must be called when the device is in the - * closed state. - * - * @param port_id - * The port identifier of the device to detach. - * @param devname - * A pointer to a buffer that will be filled with the device name. - * This buffer must be at least RTE_DEV_NAME_MAX_LEN long. - * @return - * 0 on success and devname is filled, negative on error - */ -int rte_eth_dev_detach(uint16_t port_id, char *devname); - -/** - * Convert a numerical speed in Mbps to a bitmap flag that can be used in - * the bitmap link_speeds of the struct rte_eth_conf - * - * @param speed - * Numerical speed value in Mbps - * @param duplex - * ETH_LINK_[HALF/FULL]_DUPLEX (only for 10/100M speeds) - * @return - * 0 if the speed cannot be mapped - */ -uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * - * Get DEV_RX_OFFLOAD_* flag name. - * - * @param offload - * Offload flag. - * @return - * Offload name or 'UNKNOWN' if the flag cannot be recognised. - */ -const char * __rte_experimental rte_eth_dev_rx_offload_name(uint64_t offload); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * - * Get DEV_TX_OFFLOAD_* flag name. - * - * @param offload - * Offload flag. - * @return - * Offload name or 'UNKNOWN' if the flag cannot be recognised. - */ -const char * __rte_experimental rte_eth_dev_tx_offload_name(uint64_t offload); - -/** - * Configure an Ethernet device. - * This function must be invoked first before any other function in the - * Ethernet API. This function can also be re-invoked when a device is in the - * stopped state. - * - * @param port_id - * The port identifier of the Ethernet device to configure. - * @param nb_rx_queue - * The number of receive queues to set up for the Ethernet device. - * @param nb_tx_queue - * The number of transmit queues to set up for the Ethernet device. - * @param eth_conf - * The pointer to the configuration data to be used for the Ethernet device. - * The *rte_eth_conf* structure includes: - * - the hardware offload features to activate, with dedicated fields for - * each statically configurable offload hardware feature provided by - * Ethernet devices, such as IP checksum or VLAN tag stripping for - * example. - * The Rx offload bitfield API is obsolete and will be deprecated. - * Applications should set the ignore_bitfield_offloads bit on *rxmode* - * structure and use offloads field to set per-port offloads instead. - * - the Receive Side Scaling (RSS) configuration when using multiple RX - * queues per port. - * - * Embedding all configuration information in a single data structure - * is the more flexible method that allows the addition of new features - * without changing the syntax of the API. - * @return - * - 0: Success, device configured. - * - <0: Error code returned by the driver configuration function. - */ -int rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_queue, - uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * - * Check if an Ethernet device was physically removed. - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * 1 when the Ethernet device is removed, otherwise 0. - */ -int __rte_experimental -rte_eth_dev_is_removed(uint16_t port_id); - -/** - * Allocate and set up a receive queue for an Ethernet device. - * - * The function allocates a contiguous block of memory for *nb_rx_desc* - * receive descriptors from a memory zone associated with *socket_id* - * and initializes each receive descriptor with a network buffer allocated - * from the memory pool *mb_pool*. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param rx_queue_id - * The index of the receive queue to set up. - * The value must be in the range [0, nb_rx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param nb_rx_desc - * The number of receive descriptors to allocate for the receive ring. - * @param socket_id - * The *socket_id* argument is the socket identifier in case of NUMA. - * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for - * the DMA memory allocated for the receive descriptors of the ring. - * @param rx_conf - * The pointer to the configuration data to be used for the receive queue. - * NULL value is allowed, in which case default RX configuration - * will be used. - * The *rx_conf* structure contains an *rx_thresh* structure with the values - * of the Prefetch, Host, and Write-Back threshold registers of the receive - * ring. - * In addition it contains the hardware offloads features to activate using - * the DEV_RX_OFFLOAD_* flags. - * @param mb_pool - * The pointer to the memory pool from which to allocate *rte_mbuf* network - * memory buffers to populate each descriptor of the receive ring. - * @return - * - 0: Success, receive queue correctly set up. - * - -EIO: if device is removed. - * - -EINVAL: The size of network buffers which can be allocated from the - * memory pool does not fit the various buffer sizes allowed by the - * device controller. - * - -ENOMEM: Unable to allocate the receive ring descriptors or to - * allocate network memory buffers from the memory pool when - * initializing receive descriptors. - */ -int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, - uint16_t nb_rx_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mb_pool); - -/** - * Allocate and set up a transmit queue for an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param tx_queue_id - * The index of the transmit queue to set up. - * The value must be in the range [0, nb_tx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param nb_tx_desc - * The number of transmit descriptors to allocate for the transmit ring. - * @param socket_id - * The *socket_id* argument is the socket identifier in case of NUMA. - * Its value can be *SOCKET_ID_ANY* if there is no NUMA constraint for - * the DMA memory allocated for the transmit descriptors of the ring. - * @param tx_conf - * The pointer to the configuration data to be used for the transmit queue. - * NULL value is allowed, in which case default TX configuration - * will be used. - * The *tx_conf* structure contains the following data: - * - The *tx_thresh* structure with the values of the Prefetch, Host, and - * Write-Back threshold registers of the transmit ring. - * When setting Write-Back threshold to the value greater then zero, - * *tx_rs_thresh* value should be explicitly set to one. - * - The *tx_free_thresh* value indicates the [minimum] number of network - * buffers that must be pending in the transmit ring to trigger their - * [implicit] freeing by the driver transmit function. - * - The *tx_rs_thresh* value indicates the [minimum] number of transmit - * descriptors that must be pending in the transmit ring before setting the - * RS bit on a descriptor by the driver transmit function. - * The *tx_rs_thresh* value should be less or equal then - * *tx_free_thresh* value, and both of them should be less then - * *nb_tx_desc* - 3. - * - The *txq_flags* member contains flags to pass to the TX queue setup - * function to configure the behavior of the TX queue. This should be set - * to 0 if no special configuration is required. - * This API is obsolete and will be deprecated. Applications - * should set it to ETH_TXQ_FLAGS_IGNORE and use - * the offloads field below. - * - The *offloads* member contains Tx offloads to be enabled. - * Offloads which are not set cannot be used on the datapath. - * - * Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces - * the transmit function to use default values. - * @return - * - 0: Success, the transmit queue is correctly set up. - * - -ENOMEM: Unable to allocate the transmit ring descriptors. - */ -int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, - uint16_t nb_tx_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf); - -/** - * Return the NUMA socket to which an Ethernet device is connected - * - * @param port_id - * The port identifier of the Ethernet device - * @return - * The NUMA socket id to which the Ethernet device is connected or - * a default of zero if the socket could not be determined. - * -1 is returned is the port_id value is out of range. - */ -int rte_eth_dev_socket_id(uint16_t port_id); - -/** - * Check if port_id of device is attached - * - * @param port_id - * The port identifier of the Ethernet device - * @return - * - 0 if port is out of range or not attached - * - 1 if device is attached - */ -int rte_eth_dev_is_valid_port(uint16_t port_id); - -/** - * Start specified RX queue of a port. It is used when rx_deferred_start - * flag of the specified queue is true. - * - * @param port_id - * The port identifier of the Ethernet device - * @param rx_queue_id - * The index of the rx queue to update the ring. - * The value must be in the range [0, nb_rx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @return - * - 0: Success, the receive queue is started. - * - -EINVAL: The port_id or the queue_id out of range. - * - -EIO: if device is removed. - * - -ENOTSUP: The function not supported in PMD driver. - */ -int rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id); - -/** - * Stop specified RX queue of a port - * - * @param port_id - * The port identifier of the Ethernet device - * @param rx_queue_id - * The index of the rx queue to update the ring. - * The value must be in the range [0, nb_rx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @return - * - 0: Success, the receive queue is stopped. - * - -EINVAL: The port_id or the queue_id out of range. - * - -EIO: if device is removed. - * - -ENOTSUP: The function not supported in PMD driver. - */ -int rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id); - -/** - * Start TX for specified queue of a port. It is used when tx_deferred_start - * flag of the specified queue is true. - * - * @param port_id - * The port identifier of the Ethernet device - * @param tx_queue_id - * The index of the tx queue to update the ring. - * The value must be in the range [0, nb_tx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @return - * - 0: Success, the transmit queue is started. - * - -EINVAL: The port_id or the queue_id out of range. - * - -EIO: if device is removed. - * - -ENOTSUP: The function not supported in PMD driver. - */ -int rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id); - -/** - * Stop specified TX queue of a port - * - * @param port_id - * The port identifier of the Ethernet device - * @param tx_queue_id - * The index of the tx queue to update the ring. - * The value must be in the range [0, nb_tx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @return - * - 0: Success, the transmit queue is stopped. - * - -EINVAL: The port_id or the queue_id out of range. - * - -EIO: if device is removed. - * - -ENOTSUP: The function not supported in PMD driver. - */ -int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id); - -/** - * Start an Ethernet device. - * - * The device start step is the last one and consists of setting the configured - * offload features and in starting the transmit and the receive units of the - * device. - * On success, all basic functions exported by the Ethernet API (link status, - * receive/transmit, and so on) can be invoked. - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * - 0: Success, Ethernet device started. - * - <0: Error code of the driver device start function. - */ -int rte_eth_dev_start(uint16_t port_id); - -/** - * Stop an Ethernet device. The device can be restarted with a call to - * rte_eth_dev_start() - * - * @param port_id - * The port identifier of the Ethernet device. - */ -void rte_eth_dev_stop(uint16_t port_id); - -/** - * Link up an Ethernet device. - * - * Set device link up will re-enable the device rx/tx - * functionality after it is previously set device linked down. - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * - 0: Success, Ethernet device linked up. - * - <0: Error code of the driver device link up function. - */ -int rte_eth_dev_set_link_up(uint16_t port_id); - -/** - * Link down an Ethernet device. - * The device rx/tx functionality will be disabled if success, - * and it can be re-enabled with a call to - * rte_eth_dev_set_link_up() - * - * @param port_id - * The port identifier of the Ethernet device. - */ -int rte_eth_dev_set_link_down(uint16_t port_id); - -/** - * Close a stopped Ethernet device. The device cannot be restarted! - * The function frees all resources except for needed by the - * closed state. To free these resources, call rte_eth_dev_detach(). - * - * @param port_id - * The port identifier of the Ethernet device. - */ -void rte_eth_dev_close(uint16_t port_id); - -/** - * Reset a Ethernet device and keep its port id. - * - * When a port has to be reset passively, the DPDK application can invoke - * this function. For example when a PF is reset, all its VFs should also - * be reset. Normally a DPDK application can invoke this function when - * RTE_ETH_EVENT_INTR_RESET event is detected, but can also use it to start - * a port reset in other circumstances. - * - * When this function is called, it first stops the port and then calls the - * PMD specific dev_uninit( ) and dev_init( ) to return the port to initial - * state, in which no Tx and Rx queues are setup, as if the port has been - * reset and not started. The port keeps the port id it had before the - * function call. - * - * After calling rte_eth_dev_reset( ), the application should use - * rte_eth_dev_configure( ), rte_eth_rx_queue_setup( ), - * rte_eth_tx_queue_setup( ), and rte_eth_dev_start( ) - * to reconfigure the device as appropriate. - * - * Note: To avoid unexpected behavior, the application should stop calling - * Tx and Rx functions before calling rte_eth_dev_reset( ). For thread - * safety, all these controlling functions should be called from the same - * thread. - * - * @param port_id - * The port identifier of the Ethernet device. - * - * @return - * - (0) if successful. - * - (-EINVAL) if port identifier is invalid. - * - (-ENOTSUP) if hardware doesn't support this function. - * - (-EPERM) if not ran from the primary process. - * - (-EIO) if re-initialisation failed or device is removed. - * - (-ENOMEM) if the reset failed due to OOM. - * - (-EAGAIN) if the reset temporarily failed and should be retried later. - */ -int rte_eth_dev_reset(uint16_t port_id); - -/** - * Enable receipt in promiscuous mode for an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - */ -void rte_eth_promiscuous_enable(uint16_t port_id); - -/** - * Disable receipt in promiscuous mode for an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - */ -void rte_eth_promiscuous_disable(uint16_t port_id); - -/** - * Return the value of promiscuous mode for an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * - (1) if promiscuous is enabled - * - (0) if promiscuous is disabled. - * - (-1) on error - */ -int rte_eth_promiscuous_get(uint16_t port_id); - -/** - * Enable the receipt of any multicast frame by an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - */ -void rte_eth_allmulticast_enable(uint16_t port_id); - -/** - * Disable the receipt of all multicast frames by an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - */ -void rte_eth_allmulticast_disable(uint16_t port_id); - -/** - * Return the value of allmulticast mode for an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * - (1) if allmulticast is enabled - * - (0) if allmulticast is disabled. - * - (-1) on error - */ -int rte_eth_allmulticast_get(uint16_t port_id); - -/** - * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX - * or FULL-DUPLEX) of the physical link of an Ethernet device. It might need - * to wait up to 9 seconds in it. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param link - * A pointer to an *rte_eth_link* structure to be filled with - * the status, the speed and the mode of the Ethernet device link. - */ -void rte_eth_link_get(uint16_t port_id, struct rte_eth_link *link); - -/** - * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX - * or FULL-DUPLEX) of the physical link of an Ethernet device. It is a no-wait - * version of rte_eth_link_get(). - * - * @param port_id - * The port identifier of the Ethernet device. - * @param link - * A pointer to an *rte_eth_link* structure to be filled with - * the status, the speed and the mode of the Ethernet device link. - */ -void rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *link); - -/** - * Retrieve the general I/O statistics of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param stats - * A pointer to a structure of type *rte_eth_stats* to be filled with - * the values of device counters for the following set of statistics: - * - *ipackets* with the total of successfully received packets. - * - *opackets* with the total of successfully transmitted packets. - * - *ibytes* with the total of successfully received bytes. - * - *obytes* with the total of successfully transmitted bytes. - * - *ierrors* with the total of erroneous received packets. - * - *oerrors* with the total of failed transmitted packets. - * @return - * Zero if successful. Non-zero otherwise. - */ -int rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats); - -/** - * Reset the general I/O statistics of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * - (0) if device notified to reset stats. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port_id* invalid. - */ -int rte_eth_stats_reset(uint16_t port_id); - -/** - * Retrieve names of extended statistics of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param xstats_names - * An rte_eth_xstat_name array of at least *size* elements to - * be filled. If set to NULL, the function returns the required number - * of elements. - * @param size - * The size of the xstats_names array (number of elements). - * @return - * - A positive value lower or equal to size: success. The return value - * is the number of entries filled in the stats table. - * - A positive value higher than size: error, the given statistics table - * is too small. The return value corresponds to the size that should - * be given to succeed. The entries in the table are not valid and - * shall not be used by the caller. - * - A negative value on error (invalid port id). - */ -int rte_eth_xstats_get_names(uint16_t port_id, - struct rte_eth_xstat_name *xstats_names, - unsigned int size); - -/** - * Retrieve extended statistics of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param xstats - * A pointer to a table of structure of type *rte_eth_xstat* - * to be filled with device statistics ids and values: id is the - * index of the name string in xstats_names (see rte_eth_xstats_get_names()), - * and value is the statistic counter. - * This parameter can be set to NULL if n is 0. - * @param n - * The size of the xstats array (number of elements). - * @return - * - A positive value lower or equal to n: success. The return value - * is the number of entries filled in the stats table. - * - A positive value higher than n: error, the given statistics table - * is too small. The return value corresponds to the size that should - * be given to succeed. The entries in the table are not valid and - * shall not be used by the caller. - * - A negative value on error (invalid port id). - */ -int rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, - unsigned int n); - -/** - * Retrieve names of extended statistics of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param xstats_names - * An rte_eth_xstat_name array of at least *size* elements to - * be filled. If set to NULL, the function returns the required number - * of elements. - * @param ids - * IDs array given by app to retrieve specific statistics - * @param size - * The size of the xstats_names array (number of elements). - * @return - * - A positive value lower or equal to size: success. The return value - * is the number of entries filled in the stats table. - * - A positive value higher than size: error, the given statistics table - * is too small. The return value corresponds to the size that should - * be given to succeed. The entries in the table are not valid and - * shall not be used by the caller. - * - A negative value on error (invalid port id). - */ -int -rte_eth_xstats_get_names_by_id(uint16_t port_id, - struct rte_eth_xstat_name *xstats_names, unsigned int size, - uint64_t *ids); - -/** - * Retrieve extended statistics of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param ids - * A pointer to an ids array passed by application. This tells which - * statistics values function should retrieve. This parameter - * can be set to NULL if size is 0. In this case function will retrieve - * all avalible statistics. - * @param values - * A pointer to a table to be filled with device statistics values. - * @param size - * The size of the ids array (number of elements). - * @return - * - A positive value lower or equal to size: success. The return value - * is the number of entries filled in the stats table. - * - A positive value higher than size: error, the given statistics table - * is too small. The return value corresponds to the size that should - * be given to succeed. The entries in the table are not valid and - * shall not be used by the caller. - * - A negative value on error (invalid port id). - */ -int rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids, - uint64_t *values, unsigned int size); - -/** - * Gets the ID of a statistic from its name. - * - * This function searches for the statistics using string compares, and - * as such should not be used on the fast-path. For fast-path retrieval of - * specific statistics, store the ID as provided in *id* from this function, - * and pass the ID to rte_eth_xstats_get() - * - * @param port_id The port to look up statistics from - * @param xstat_name The name of the statistic to return - * @param[out] id A pointer to an app-supplied uint64_t which should be - * set to the ID of the stat if the stat exists. - * @return - * 0 on success - * -ENODEV for invalid port_id, - * -EIO if device is removed, - * -EINVAL if the xstat_name doesn't exist in port_id - */ -int rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name, - uint64_t *id); - -/** - * Reset extended statistics of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - */ -void rte_eth_xstats_reset(uint16_t port_id); - -/** - * Set a mapping for the specified transmit queue to the specified per-queue - * statistics counter. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param tx_queue_id - * The index of the transmit queue for which a queue stats mapping is required. - * The value must be in the range [0, nb_tx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param stat_idx - * The per-queue packet statistics functionality number that the transmit - * queue is to be assigned. - * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. - * @return - * Zero if successful. Non-zero otherwise. - */ -int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, - uint16_t tx_queue_id, uint8_t stat_idx); - -/** - * Set a mapping for the specified receive queue to the specified per-queue - * statistics counter. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param rx_queue_id - * The index of the receive queue for which a queue stats mapping is required. - * The value must be in the range [0, nb_rx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param stat_idx - * The per-queue packet statistics functionality number that the receive - * queue is to be assigned. - * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. - * @return - * Zero if successful. Non-zero otherwise. - */ -int rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, - uint16_t rx_queue_id, - uint8_t stat_idx); - -/** - * Retrieve the Ethernet address of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param mac_addr - * A pointer to a structure of type *ether_addr* to be filled with - * the Ethernet address of the Ethernet device. - */ -void rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr); - -/** - * Retrieve the contextual information of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param dev_info - * A pointer to a structure of type *rte_eth_dev_info* to be filled with - * the contextual information of the Ethernet device. - */ -void rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info); - -/** - * Retrieve the firmware version of a device. - * - * @param port_id - * The port identifier of the device. - * @param fw_version - * A pointer to a string array storing the firmware version of a device, - * the string includes terminating null. This pointer is allocated by caller. - * @param fw_size - * The size of the string array pointed by fw_version, which should be - * large enough to store firmware version of the device. - * @return - * - (0) if successful. - * - (-ENOTSUP) if operation is not supported. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - (>0) if *fw_size* is not enough to store firmware version, return - * the size of the non truncated string. - */ -int rte_eth_dev_fw_version_get(uint16_t port_id, - char *fw_version, size_t fw_size); - -/** - * Retrieve the supported packet types of an Ethernet device. - * - * When a packet type is announced as supported, it *must* be recognized by - * the PMD. For instance, if RTE_PTYPE_L2_ETHER, RTE_PTYPE_L2_ETHER_VLAN - * and RTE_PTYPE_L3_IPV4 are announced, the PMD must return the following - * packet types for these packets: - * - Ether/IPv4 -> RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4 - * - Ether/Vlan/IPv4 -> RTE_PTYPE_L2_ETHER_VLAN | RTE_PTYPE_L3_IPV4 - * - Ether/[anything else] -> RTE_PTYPE_L2_ETHER - * - Ether/Vlan/[anything else] -> RTE_PTYPE_L2_ETHER_VLAN - * - * When a packet is received by a PMD, the most precise type must be - * returned among the ones supported. However a PMD is allowed to set - * packet type that is not in the supported list, at the condition that it - * is more precise. Therefore, a PMD announcing no supported packet types - * can still set a matching packet type in a received packet. - * - * @note - * Better to invoke this API after the device is already started or rx burst - * function is decided, to obtain correct supported ptypes. - * @note - * if a given PMD does not report what ptypes it supports, then the supported - * ptype count is reported as 0. - * @param port_id - * The port identifier of the Ethernet device. - * @param ptype_mask - * A hint of what kind of packet type which the caller is interested in. - * @param ptypes - * An array pointer to store adequate packet types, allocated by caller. - * @param num - * Size of the array pointed by param ptypes. - * @return - * - (>=0) Number of supported ptypes. If the number of types exceeds num, - * only num entries will be filled into the ptypes array, but the full - * count of supported ptypes will be returned. - * - (-ENODEV) if *port_id* invalid. - */ -int rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask, - uint32_t *ptypes, int num); - -/** - * Retrieve the MTU of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param mtu - * A pointer to a uint16_t where the retrieved MTU is to be stored. - * @return - * - (0) if successful. - * - (-ENODEV) if *port_id* invalid. - */ -int rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu); - -/** - * Change the MTU of an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param mtu - * A uint16_t for the MTU to be applied. - * @return - * - (0) if successful. - * - (-ENOTSUP) if operation is not supported. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - (-EINVAL) if *mtu* invalid. - * - (-EBUSY) if operation is not allowed when the port is running - */ -int rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu); - -/** - * Enable/Disable hardware filtering by an Ethernet device of received - * VLAN packets tagged with a given VLAN Tag Identifier. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param vlan_id - * The VLAN Tag Identifier whose filtering must be enabled or disabled. - * @param on - * If > 0, enable VLAN filtering of VLAN packets tagged with *vlan_id*. - * Otherwise, disable VLAN filtering of VLAN packets tagged with *vlan_id*. - * @return - * - (0) if successful. - * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - (-ENOSYS) if VLAN filtering on *port_id* disabled. - * - (-EINVAL) if *vlan_id* > 4095. - */ -int rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on); - -/** - * Enable/Disable hardware VLAN Strip by a rx queue of an Ethernet device. - * 82599/X540/X550 can support VLAN stripping at the rx queue level - * - * @param port_id - * The port identifier of the Ethernet device. - * @param rx_queue_id - * The index of the receive queue for which a queue stats mapping is required. - * The value must be in the range [0, nb_rx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param on - * If 1, Enable VLAN Stripping of the receive queue of the Ethernet port. - * If 0, Disable VLAN Stripping of the receive queue of the Ethernet port. - * @return - * - (0) if successful. - * - (-ENOSUP) if hardware-assisted VLAN stripping not configured. - * - (-ENODEV) if *port_id* invalid. - * - (-EINVAL) if *rx_queue_id* invalid. - */ -int rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id, - int on); - -/** - * Set the Outer VLAN Ether Type by an Ethernet device, it can be inserted to - * the VLAN Header. This is a register setup available on some Intel NIC, not - * but all, please check the data sheet for availability. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param vlan_type - * The vlan type. - * @param tag_type - * The Tag Protocol ID - * @return - * - (0) if successful. - * - (-ENOSUP) if hardware-assisted VLAN TPID setup is not supported. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - */ -int rte_eth_dev_set_vlan_ether_type(uint16_t port_id, - enum rte_vlan_type vlan_type, - uint16_t tag_type); - -/** - * Set VLAN offload configuration on an Ethernet device - * Enable/Disable Extended VLAN by an Ethernet device, This is a register setup - * available on some Intel NIC, not but all, please check the data sheet for - * availability. - * Enable/Disable VLAN Strip can be done on rx queue for certain NIC, but here - * the configuration is applied on the port level. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param offload_mask - * The VLAN Offload bit mask can be mixed use with "OR" - * ETH_VLAN_STRIP_OFFLOAD - * ETH_VLAN_FILTER_OFFLOAD - * ETH_VLAN_EXTEND_OFFLOAD - * @return - * - (0) if successful. - * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - */ -int rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask); - -/** - * Read VLAN Offload configuration from an Ethernet device - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * - (>0) if successful. Bit mask to indicate - * ETH_VLAN_STRIP_OFFLOAD - * ETH_VLAN_FILTER_OFFLOAD - * ETH_VLAN_EXTEND_OFFLOAD - * - (-ENODEV) if *port_id* invalid. - */ -int rte_eth_dev_get_vlan_offload(uint16_t port_id); - -/** - * Set port based TX VLAN insertion on or off. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param pvid - * Port based TX VLAN identifier together with user priority. - * @param on - * Turn on or off the port based TX VLAN insertion. - * - * @return - * - (0) if successful. - * - negative if failed. - */ -int rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on); - -typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count, - void *userdata); - -/** - * Structure used to buffer packets for future TX - * Used by APIs rte_eth_tx_buffer and rte_eth_tx_buffer_flush - */ -struct rte_eth_dev_tx_buffer { - buffer_tx_error_fn error_callback; - void *error_userdata; - uint16_t size; /**< Size of buffer for buffered tx */ - uint16_t length; /**< Number of packets in the array */ - struct rte_mbuf *pkts[]; - /**< Pending packets to be sent on explicit flush or when full */ -}; - -/** - * Calculate the size of the tx buffer. - * - * @param sz - * Number of stored packets. - */ -#define RTE_ETH_TX_BUFFER_SIZE(sz) \ - (sizeof(struct rte_eth_dev_tx_buffer) + (sz) * sizeof(struct rte_mbuf *)) - -/** - * Initialize default values for buffered transmitting - * - * @param buffer - * Tx buffer to be initialized. - * @param size - * Buffer size - * @return - * 0 if no error - */ -int -rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size); - -/** - * Configure a callback for buffered packets which cannot be sent - * - * Register a specific callback to be called when an attempt is made to send - * all packets buffered on an ethernet port, but not all packets can - * successfully be sent. The callback registered here will be called only - * from calls to rte_eth_tx_buffer() and rte_eth_tx_buffer_flush() APIs. - * The default callback configured for each queue by default just frees the - * packets back to the calling mempool. If additional behaviour is required, - * for example, to count dropped packets, or to retry transmission of packets - * which cannot be sent, this function should be used to register a suitable - * callback function to implement the desired behaviour. - * The example callback "rte_eth_count_unsent_packet_callback()" is also - * provided as reference. - * - * @param buffer - * The port identifier of the Ethernet device. - * @param callback - * The function to be used as the callback. - * @param userdata - * Arbitrary parameter to be passed to the callback function - * @return - * 0 on success, or -1 on error with rte_errno set appropriately - */ -int -rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer, - buffer_tx_error_fn callback, void *userdata); - -/** - * Callback function for silently dropping unsent buffered packets. - * - * This function can be passed to rte_eth_tx_buffer_set_err_callback() to - * adjust the default behavior when buffered packets cannot be sent. This - * function drops any unsent packets silently and is used by tx buffered - * operations as default behavior. - * - * NOTE: this function should not be called directly, instead it should be used - * as a callback for packet buffering. - * - * NOTE: when configuring this function as a callback with - * rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter - * should point to an uint64_t value. - * - * @param pkts - * The previously buffered packets which could not be sent - * @param unsent - * The number of unsent packets in the pkts array - * @param userdata - * Not used - */ -void -rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent, - void *userdata); - -/** - * Callback function for tracking unsent buffered packets. - * - * This function can be passed to rte_eth_tx_buffer_set_err_callback() to - * adjust the default behavior when buffered packets cannot be sent. This - * function drops any unsent packets, but also updates a user-supplied counter - * to track the overall number of packets dropped. The counter should be an - * uint64_t variable. - * - * NOTE: this function should not be called directly, instead it should be used - * as a callback for packet buffering. - * - * NOTE: when configuring this function as a callback with - * rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter - * should point to an uint64_t value. - * - * @param pkts - * The previously buffered packets which could not be sent - * @param unsent - * The number of unsent packets in the pkts array - * @param userdata - * Pointer to an uint64_t value, which will be incremented by unsent - */ -void -rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, - void *userdata); - -/** - * Request the driver to free mbufs currently cached by the driver. The - * driver will only free the mbuf if it is no longer in use. It is the - * application's responsibity to ensure rte_eth_tx_buffer_flush(..) is - * called if needed. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The index of the transmit queue through which output packets must be - * sent. - * The value must be in the range [0, nb_tx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param free_cnt - * Maximum number of packets to free. Use 0 to indicate all possible packets - * should be freed. Note that a packet may be using multiple mbufs. - * @return - * Failure: < 0 - * -ENODEV: Invalid interface - * -EIO: device is removed - * -ENOTSUP: Driver does not support function - * Success: >= 0 - * 0-n: Number of packets freed. More packets may still remain in ring that - * are in use. - */ -int -rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt); - -/** - * The eth device event type for interrupt, and maybe others in the future. - */ -enum rte_eth_event_type { - RTE_ETH_EVENT_UNKNOWN, /**< unknown event type */ - RTE_ETH_EVENT_INTR_LSC, /**< lsc interrupt event */ - RTE_ETH_EVENT_QUEUE_STATE, - /**< queue state event (enabled/disabled) */ - RTE_ETH_EVENT_INTR_RESET, - /**< reset interrupt event, sent to VF on PF reset */ - RTE_ETH_EVENT_VF_MBOX, /**< message from the VF received by PF */ - RTE_ETH_EVENT_MACSEC, /**< MACsec offload related event */ - RTE_ETH_EVENT_INTR_RMV, /**< device removal event */ - RTE_ETH_EVENT_NEW, /**< port is probed */ - RTE_ETH_EVENT_DESTROY, /**< port is released */ - RTE_ETH_EVENT_MAX /**< max value of this enum */ -}; - -typedef int (*rte_eth_dev_cb_fn)(uint16_t port_id, - enum rte_eth_event_type event, void *cb_arg, void *ret_param); -/**< user application callback to be registered for interrupts */ - -/** - * Register a callback function for port event. - * - * @param port_id - * Port id. - * RTE_ETH_ALL means register the event for all port ids. - * @param event - * Event interested. - * @param cb_fn - * User supplied callback function to be called. - * @param cb_arg - * Pointer to the parameters for the registered callback. - * - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int rte_eth_dev_callback_register(uint16_t port_id, - enum rte_eth_event_type event, - rte_eth_dev_cb_fn cb_fn, void *cb_arg); - -/** - * Unregister a callback function for port event. - * - * @param port_id - * Port id. - * RTE_ETH_ALL means unregister the event for all port ids. - * @param event - * Event interested. - * @param cb_fn - * User supplied callback function to be called. - * @param cb_arg - * Pointer to the parameters for the registered callback. -1 means to - * remove all for the same callback address and same event. - * - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int rte_eth_dev_callback_unregister(uint16_t port_id, - enum rte_eth_event_type event, - rte_eth_dev_cb_fn cb_fn, void *cb_arg); - -/** - * When there is no rx packet coming in Rx Queue for a long time, we can - * sleep lcore related to RX Queue for power saving, and enable rx interrupt - * to be triggered when Rx packet arrives. - * - * The rte_eth_dev_rx_intr_enable() function enables rx queue - * interrupt on specific rx queue of a port. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The index of the receive queue from which to retrieve input packets. - * The value must be in the range [0, nb_rx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @return - * - (0) if successful. - * - (-ENOTSUP) if underlying hardware OR driver doesn't support - * that operation. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - */ -int rte_eth_dev_rx_intr_enable(uint16_t port_id, uint16_t queue_id); - -/** - * When lcore wakes up from rx interrupt indicating packet coming, disable rx - * interrupt and returns to polling mode. - * - * The rte_eth_dev_rx_intr_disable() function disables rx queue - * interrupt on specific rx queue of a port. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The index of the receive queue from which to retrieve input packets. - * The value must be in the range [0, nb_rx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @return - * - (0) if successful. - * - (-ENOTSUP) if underlying hardware OR driver doesn't support - * that operation. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - */ -int rte_eth_dev_rx_intr_disable(uint16_t port_id, uint16_t queue_id); - -/** - * RX Interrupt control per port. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param epfd - * Epoll instance fd which the intr vector associated to. - * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. - * @param op - * The operation be performed for the vector. - * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. - * @param data - * User raw data. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data); - -/** - * RX Interrupt control per queue. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The index of the receive queue from which to retrieve input packets. - * The value must be in the range [0, nb_rx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param epfd - * Epoll instance fd which the intr vector associated to. - * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. - * @param op - * The operation be performed for the vector. - * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. - * @param data - * User raw data. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id, - int epfd, int op, void *data); - -/** - * Turn on the LED on the Ethernet device. - * This function turns on the LED on the Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * - (0) if successful. - * - (-ENOTSUP) if underlying hardware OR driver doesn't support - * that operation. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - */ -int rte_eth_led_on(uint16_t port_id); - -/** - * Turn off the LED on the Ethernet device. - * This function turns off the LED on the Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * - (0) if successful. - * - (-ENOTSUP) if underlying hardware OR driver doesn't support - * that operation. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - */ -int rte_eth_led_off(uint16_t port_id); - -/** - * Get current status of the Ethernet link flow control for Ethernet device - * - * @param port_id - * The port identifier of the Ethernet device. - * @param fc_conf - * The pointer to the structure where to store the flow control parameters. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support flow control. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - */ -int rte_eth_dev_flow_ctrl_get(uint16_t port_id, - struct rte_eth_fc_conf *fc_conf); - -/** - * Configure the Ethernet link flow control for Ethernet device - * - * @param port_id - * The port identifier of the Ethernet device. - * @param fc_conf - * The pointer to the structure of the flow control parameters. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support flow control mode. - * - (-ENODEV) if *port_id* invalid. - * - (-EINVAL) if bad parameter - * - (-EIO) if flow control setup failure or device is removed. - */ -int rte_eth_dev_flow_ctrl_set(uint16_t port_id, - struct rte_eth_fc_conf *fc_conf); - -/** - * Configure the Ethernet priority flow control under DCB environment - * for Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param pfc_conf - * The pointer to the structure of the priority flow control parameters. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support priority flow control mode. - * - (-ENODEV) if *port_id* invalid. - * - (-EINVAL) if bad parameter - * - (-EIO) if flow control setup failure or device is removed. - */ -int rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id, - struct rte_eth_pfc_conf *pfc_conf); - -/** - * Add a MAC address to an internal array of addresses used to enable whitelist - * filtering to accept packets only if the destination MAC address matches. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param mac_addr - * The MAC address to add. - * @param pool - * VMDq pool index to associate address with (if VMDq is enabled). If VMDq is - * not enabled, this should be set to 0. - * @return - * - (0) if successfully added or *mac_addr* was already added. - * - (-ENOTSUP) if hardware doesn't support this feature. - * - (-ENODEV) if *port* is invalid. - * - (-EIO) if device is removed. - * - (-ENOSPC) if no more MAC addresses can be added. - * - (-EINVAL) if MAC address is invalid. - */ -int rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *mac_addr, - uint32_t pool); - -/** - * Remove a MAC address from the internal array of addresses. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param mac_addr - * MAC address to remove. - * @return - * - (0) if successful, or *mac_addr* didn't exist. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port* invalid. - * - (-EADDRINUSE) if attempting to remove the default MAC address - */ -int rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *mac_addr); - -/** - * Set the default MAC address. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param mac_addr - * New default MAC address. - * @return - * - (0) if successful, or *mac_addr* didn't exist. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port* invalid. - * - (-EINVAL) if MAC address is invalid. - */ -int rte_eth_dev_default_mac_addr_set(uint16_t port_id, - struct ether_addr *mac_addr); - -/** - * Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param reta_conf - * RETA to update. - * @param reta_size - * Redirection table size. The table size can be queried by - * rte_eth_dev_info_get(). - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - * - (-EIO) if device is removed. - */ -int rte_eth_dev_rss_reta_update(uint16_t port_id, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size); - - /** - * Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param reta_conf - * RETA to query. - * @param reta_size - * Redirection table size. The table size can be queried by - * rte_eth_dev_info_get(). - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - * - (-EIO) if device is removed. - */ -int rte_eth_dev_rss_reta_query(uint16_t port_id, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size); - - /** - * Updates unicast hash table for receiving packet with the given destination - * MAC address, and the packet is routed to all VFs for which the RX mode is - * accept packets that match the unicast hash table. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param addr - * Unicast MAC address. - * @param on - * 1 - Set an unicast hash bit for receiving packets with the MAC address. - * 0 - Clear an unicast hash bit. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr, - uint8_t on); - - /** - * Updates all unicast hash bitmaps for receiving packet with any Unicast - * Ethernet MAC addresses,the packet is routed to all VFs for which the RX - * mode is accept packets that match the unicast hash table. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param on - * 1 - Set all unicast hash bitmaps for receiving all the Ethernet - * MAC addresses - * 0 - Clear all unicast hash bitmaps - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_uc_all_hash_table_set(uint16_t port_id, uint8_t on); - -/** - * Set a traffic mirroring rule on an Ethernet device - * - * @param port_id - * The port identifier of the Ethernet device. - * @param mirror_conf - * The pointer to the traffic mirroring structure describing the mirroring rule. - * The *rte_eth_vm_mirror_conf* structure includes the type of mirroring rule, - * destination pool and the value of rule if enable vlan or pool mirroring. - * - * @param rule_id - * The index of traffic mirroring rule, we support four separated rules. - * @param on - * 1 - Enable a mirroring rule. - * 0 - Disable a mirroring rule. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support this feature. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - (-EINVAL) if the mr_conf information is not correct. - */ -int rte_eth_mirror_rule_set(uint16_t port_id, - struct rte_eth_mirror_conf *mirror_conf, - uint8_t rule_id, - uint8_t on); - -/** - * Reset a traffic mirroring rule on an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param rule_id - * The index of traffic mirroring rule, we support four separated rules. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support this feature. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_mirror_rule_reset(uint16_t port_id, - uint8_t rule_id); - -/** - * Set the rate limitation for a queue on an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_idx - * The queue id. - * @param tx_rate - * The tx rate in Mbps. Allocated from the total port link speed. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support this feature. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx, - uint16_t tx_rate); - - /** - * Configuration of Receive Side Scaling hash computation of Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param rss_conf - * The new configuration to use for RSS hash computation on the port. - * @return - * - (0) if successful. - * - (-ENODEV) if port identifier is invalid. - * - (-EIO) if device is removed. - * - (-ENOTSUP) if hardware doesn't support. - * - (-EINVAL) if bad parameter. - */ -int rte_eth_dev_rss_hash_update(uint16_t port_id, - struct rte_eth_rss_conf *rss_conf); - - /** - * Retrieve current configuration of Receive Side Scaling hash computation - * of Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param rss_conf - * Where to store the current RSS hash configuration of the Ethernet device. - * @return - * - (0) if successful. - * - (-ENODEV) if port identifier is invalid. - * - (-EIO) if device is removed. - * - (-ENOTSUP) if hardware doesn't support RSS. - */ -int -rte_eth_dev_rss_hash_conf_get(uint16_t port_id, - struct rte_eth_rss_conf *rss_conf); - - /** - * Add UDP tunneling port for a specific type of tunnel. - * The packets with this UDP port will be identified as this type of tunnel. - * Before enabling any offloading function for a tunnel, users can call this API - * to change or add more UDP port for the tunnel. So the offloading function - * can take effect on the packets with the specific UDP port. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param tunnel_udp - * UDP tunneling configuration. - * - * @return - * - (0) if successful. - * - (-ENODEV) if port identifier is invalid. - * - (-EIO) if device is removed. - * - (-ENOTSUP) if hardware doesn't support tunnel type. - */ -int -rte_eth_dev_udp_tunnel_port_add(uint16_t port_id, - struct rte_eth_udp_tunnel *tunnel_udp); - - /** - * Delete UDP tunneling port a specific type of tunnel. - * The packets with this UDP port will not be identified as this type of tunnel - * any more. - * Before enabling any offloading function for a tunnel, users can call this API - * to delete a UDP port for the tunnel. So the offloading function will not take - * effect on the packets with the specific UDP port. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param tunnel_udp - * UDP tunneling configuration. - * - * @return - * - (0) if successful. - * - (-ENODEV) if port identifier is invalid. - * - (-EIO) if device is removed. - * - (-ENOTSUP) if hardware doesn't support tunnel type. - */ -int -rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id, - struct rte_eth_udp_tunnel *tunnel_udp); - -/** - * Check whether the filter type is supported on an Ethernet device. - * All the supported filter types are defined in 'rte_eth_ctrl.h'. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param filter_type - * Filter type. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support this filter type. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - */ -int rte_eth_dev_filter_supported(uint16_t port_id, - enum rte_filter_type filter_type); - -/** - * Take operations to assigned filter type on an Ethernet device. - * All the supported operations and filter types are defined in 'rte_eth_ctrl.h'. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param filter_type - * Filter type. - * @param filter_op - * Type of operation. - * @param arg - * A pointer to arguments defined specifically for the operation. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - others depends on the specific operations implementation. - */ -int rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type, - enum rte_filter_op filter_op, void *arg); - -/** - * Get DCB information on an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param dcb_info - * dcb information. - * @return - * - (0) if successful. - * - (-ENODEV) if port identifier is invalid. - * - (-EIO) if device is removed. - * - (-ENOTSUP) if hardware doesn't support. - */ -int rte_eth_dev_get_dcb_info(uint16_t port_id, - struct rte_eth_dcb_info *dcb_info); - -/** - * Add a callback to be called on packet RX on a given port and queue. - * - * This API configures a function to be called for each burst of - * packets received on a given NIC port queue. The return value is a pointer - * that can be used to later remove the callback using - * rte_eth_remove_rx_callback(). - * - * Multiple functions are called in the order that they are added. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The queue on the Ethernet device on which the callback is to be added. - * @param fn - * The callback function - * @param user_param - * A generic pointer parameter which will be passed to each invocation of the - * callback function on this port and queue. - * - * @return - * NULL on error. - * On success, a pointer value which can later be used to remove the callback. - */ -void *rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id, - rte_rx_callback_fn fn, void *user_param); - -/** - * Add a callback that must be called first on packet RX on a given port - * and queue. - * - * This API configures a first function to be called for each burst of - * packets received on a given NIC port queue. The return value is a pointer - * that can be used to later remove the callback using - * rte_eth_remove_rx_callback(). - * - * Multiple functions are called in the order that they are added. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The queue on the Ethernet device on which the callback is to be added. - * @param fn - * The callback function - * @param user_param - * A generic pointer parameter which will be passed to each invocation of the - * callback function on this port and queue. - * - * @return - * NULL on error. - * On success, a pointer value which can later be used to remove the callback. - */ -void *rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, - rte_rx_callback_fn fn, void *user_param); - -/** - * Add a callback to be called on packet TX on a given port and queue. - * - * This API configures a function to be called for each burst of - * packets sent on a given NIC port queue. The return value is a pointer - * that can be used to later remove the callback using - * rte_eth_remove_tx_callback(). - * - * Multiple functions are called in the order that they are added. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The queue on the Ethernet device on which the callback is to be added. - * @param fn - * The callback function - * @param user_param - * A generic pointer parameter which will be passed to each invocation of the - * callback function on this port and queue. - * - * @return - * NULL on error. - * On success, a pointer value which can later be used to remove the callback. - */ -void *rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id, - rte_tx_callback_fn fn, void *user_param); - -struct rte_eth_rxtx_callback; - -/** - * Remove an RX packet callback from a given port and queue. - * - * This function is used to removed callbacks that were added to a NIC port - * queue using rte_eth_add_rx_callback(). - * - * Note: the callback is removed from the callback list but it isn't freed - * since the it may still be in use. The memory for the callback can be - * subsequently freed back by the application by calling rte_free(): - * - * - Immediately - if the port is stopped, or the user knows that no - * callbacks are in flight e.g. if called from the thread doing RX/TX - * on that queue. - * - * - After a short delay - where the delay is sufficient to allow any - * in-flight callbacks to complete. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The queue on the Ethernet device from which the callback is to be removed. - * @param user_cb - * User supplied callback created via rte_eth_add_rx_callback(). - * - * @return - * - 0: Success. Callback was removed. - * - -ENOTSUP: Callback support is not available. - * - -EINVAL: The port_id or the queue_id is out of range, or the callback - * is NULL or not found for the port/queue. - */ -int rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id, - struct rte_eth_rxtx_callback *user_cb); - -/** - * Remove a TX packet callback from a given port and queue. - * - * This function is used to removed callbacks that were added to a NIC port - * queue using rte_eth_add_tx_callback(). - * - * Note: the callback is removed from the callback list but it isn't freed - * since the it may still be in use. The memory for the callback can be - * subsequently freed back by the application by calling rte_free(): - * - * - Immediately - if the port is stopped, or the user knows that no - * callbacks are in flight e.g. if called from the thread doing RX/TX - * on that queue. - * - * - After a short delay - where the delay is sufficient to allow any - * in-flight callbacks to complete. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The queue on the Ethernet device from which the callback is to be removed. - * @param user_cb - * User supplied callback created via rte_eth_add_tx_callback(). - * - * @return - * - 0: Success. Callback was removed. - * - -ENOTSUP: Callback support is not available. - * - -EINVAL: The port_id or the queue_id is out of range, or the callback - * is NULL or not found for the port/queue. - */ -int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id, - struct rte_eth_rxtx_callback *user_cb); - -/** - * Retrieve information about given port's RX queue. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The RX queue on the Ethernet device for which information - * will be retrieved. - * @param qinfo - * A pointer to a structure of type *rte_eth_rxq_info_info* to be filled with - * the information of the Ethernet device. - * - * @return - * - 0: Success - * - -ENOTSUP: routine is not supported by the device PMD. - * - -EINVAL: The port_id or the queue_id is out of range. - */ -int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id, - struct rte_eth_rxq_info *qinfo); - -/** - * Retrieve information about given port's TX queue. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The TX queue on the Ethernet device for which information - * will be retrieved. - * @param qinfo - * A pointer to a structure of type *rte_eth_txq_info_info* to be filled with - * the information of the Ethernet device. - * - * @return - * - 0: Success - * - -ENOTSUP: routine is not supported by the device PMD. - * - -EINVAL: The port_id or the queue_id is out of range. - */ -int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id, - struct rte_eth_txq_info *qinfo); - -/** - * Retrieve device registers and register attributes (number of registers and - * register size) - * - * @param port_id - * The port identifier of the Ethernet device. - * @param info - * Pointer to rte_dev_reg_info structure to fill in. If info->data is - * NULL the function fills in the width and length fields. If non-NULL - * the registers are put into the buffer pointed at by the data field. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - others depends on the specific operations implementation. - */ -int rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info); - -/** - * Retrieve size of device EEPROM - * - * @param port_id - * The port identifier of the Ethernet device. - * @return - * - (>=0) EEPROM size if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - others depends on the specific operations implementation. - */ -int rte_eth_dev_get_eeprom_length(uint16_t port_id); - -/** - * Retrieve EEPROM and EEPROM attribute - * - * @param port_id - * The port identifier of the Ethernet device. - * @param info - * The template includes buffer for return EEPROM data and - * EEPROM attributes to be filled. - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - others depends on the specific operations implementation. - */ -int rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); - -/** - * Program EEPROM with provided data - * - * @param port_id - * The port identifier of the Ethernet device. - * @param info - * The template includes EEPROM data for programming and - * EEPROM attributes to be filled - * @return - * - (0) if successful. - * - (-ENOTSUP) if hardware doesn't support. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - others depends on the specific operations implementation. - */ -int rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); - -/** - * Set the list of multicast addresses to filter on an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param mc_addr_set - * The array of multicast addresses to set. Equal to NULL when the function - * is invoked to flush the set of filtered addresses. - * @param nb_mc_addr - * The number of multicast addresses in the *mc_addr_set* array. Equal to 0 - * when the function is invoked to flush the set of filtered addresses. - * @return - * - (0) if successful. - * - (-ENODEV) if *port_id* invalid. - * - (-EIO) if device is removed. - * - (-ENOTSUP) if PMD of *port_id* doesn't support multicast filtering. - * - (-ENOSPC) if *port_id* has not enough multicast filtering resources. - */ -int rte_eth_dev_set_mc_addr_list(uint16_t port_id, - struct ether_addr *mc_addr_set, - uint32_t nb_mc_addr); - -/** - * Enable IEEE1588/802.1AS timestamping for an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * - * @return - * - 0: Success. - * - -ENODEV: The port ID is invalid. - * - -EIO: if device is removed. - * - -ENOTSUP: The function is not supported by the Ethernet driver. - */ -int rte_eth_timesync_enable(uint16_t port_id); - -/** - * Disable IEEE1588/802.1AS timestamping for an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * - * @return - * - 0: Success. - * - -ENODEV: The port ID is invalid. - * - -EIO: if device is removed. - * - -ENOTSUP: The function is not supported by the Ethernet driver. - */ -int rte_eth_timesync_disable(uint16_t port_id); - -/** - * Read an IEEE1588/802.1AS RX timestamp from an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param timestamp - * Pointer to the timestamp struct. - * @param flags - * Device specific flags. Used to pass the RX timesync register index to - * i40e. Unused in igb/ixgbe, pass 0 instead. - * - * @return - * - 0: Success. - * - -EINVAL: No timestamp is available. - * - -ENODEV: The port ID is invalid. - * - -EIO: if device is removed. - * - -ENOTSUP: The function is not supported by the Ethernet driver. - */ -int rte_eth_timesync_read_rx_timestamp(uint16_t port_id, - struct timespec *timestamp, uint32_t flags); - -/** - * Read an IEEE1588/802.1AS TX timestamp from an Ethernet device. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param timestamp - * Pointer to the timestamp struct. - * - * @return - * - 0: Success. - * - -EINVAL: No timestamp is available. - * - -ENODEV: The port ID is invalid. - * - -EIO: if device is removed. - * - -ENOTSUP: The function is not supported by the Ethernet driver. - */ -int rte_eth_timesync_read_tx_timestamp(uint16_t port_id, - struct timespec *timestamp); - -/** - * Adjust the timesync clock on an Ethernet device. - * - * This is usually used in conjunction with other Ethdev timesync functions to - * synchronize the device time using the IEEE1588/802.1AS protocol. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param delta - * The adjustment in nanoseconds. - * - * @return - * - 0: Success. - * - -ENODEV: The port ID is invalid. - * - -EIO: if device is removed. - * - -ENOTSUP: The function is not supported by the Ethernet driver. - */ -int rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta); - -/** - * Read the time from the timesync clock on an Ethernet device. - * - * This is usually used in conjunction with other Ethdev timesync functions to - * synchronize the device time using the IEEE1588/802.1AS protocol. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param time - * Pointer to the timespec struct that holds the time. - * - * @return - * - 0: Success. - */ -int rte_eth_timesync_read_time(uint16_t port_id, struct timespec *time); - -/** - * Set the time of the timesync clock on an Ethernet device. - * - * This is usually used in conjunction with other Ethdev timesync functions to - * synchronize the device time using the IEEE1588/802.1AS protocol. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param time - * Pointer to the timespec struct that holds the time. - * - * @return - * - 0: Success. - * - -EINVAL: No timestamp is available. - * - -ENODEV: The port ID is invalid. - * - -EIO: if device is removed. - * - -ENOTSUP: The function is not supported by the Ethernet driver. - */ -int rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *time); - -/** - * Config l2 tunnel ether type of an Ethernet device for filtering specific - * tunnel packets by ether type. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param l2_tunnel - * l2 tunnel configuration. - * - * @return - * - (0) if successful. - * - (-ENODEV) if port identifier is invalid. - * - (-EIO) if device is removed. - * - (-ENOTSUP) if hardware doesn't support tunnel type. - */ -int -rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id, - struct rte_eth_l2_tunnel_conf *l2_tunnel); - -/** - * Enable/disable l2 tunnel offload functions. Include, - * 1, The ability of parsing a type of l2 tunnel of an Ethernet device. - * Filtering, forwarding and offloading this type of tunnel packets depend on - * this ability. - * 2, Stripping the l2 tunnel tag. - * 3, Insertion of the l2 tunnel tag. - * 4, Forwarding the packets based on the l2 tunnel tag. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param l2_tunnel - * l2 tunnel parameters. - * @param mask - * Indicate the offload function. - * @param en - * Enable or disable this function. - * - * @return - * - (0) if successful. - * - (-ENODEV) if port identifier is invalid. - * - (-EIO) if device is removed. - * - (-ENOTSUP) if hardware doesn't support tunnel type. - */ -int -rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id, - struct rte_eth_l2_tunnel_conf *l2_tunnel, - uint32_t mask, - uint8_t en); - -/** -* Get the port id from pci address or device name -* Ex: 0000:2:00.0 or vdev name net_pcap0 -* -* @param name -* pci address or name of the device -* @param port_id -* pointer to port identifier of the device -* @return -* - (0) if successful and port_id is filled. -* - (-ENODEV or -EINVAL) on failure. -*/ -int -rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id); - -/** -* Get the device name from port id -* -* @param port_id -* pointer to port identifier of the device -* @param name -* pci address or name of the device -* @return -* - (0) if successful. -* - (-EINVAL) on failure. -*/ -int -rte_eth_dev_get_name_by_port(uint16_t port_id, char *name); - -/** - * Check that numbers of Rx and Tx descriptors satisfy descriptors limits from - * the ethernet device information, otherwise adjust them to boundaries. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param nb_rx_desc - * A pointer to a uint16_t where the number of receive - * descriptors stored. - * @param nb_tx_desc - * A pointer to a uint16_t where the number of transmit - * descriptors stored. - * @return - * - (0) if successful. - * - (-ENOTSUP, -ENODEV or -EINVAL) on failure. - */ -int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id, - uint16_t *nb_rx_desc, - uint16_t *nb_tx_desc); - -/** - * Test if a port supports specific mempool ops. - * - * @param port_id - * Port identifier of the Ethernet device. - * @param [in] pool - * The name of the pool operations to test. - * @return - * - 0: best mempool ops choice for this port. - * - 1: mempool ops are supported for this port. - * - -ENOTSUP: mempool ops not supported for this port. - * - -ENODEV: Invalid port Identifier. - * - -EINVAL: Pool param is null. - */ -int -rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool); - -/** - * Get the security context for the Ethernet device. - * - * @param port_id - * Port identifier of the Ethernet device - * @return - * - NULL on error. - * - pointer to security context on success. - */ -void * -rte_eth_dev_get_sec_ctx(uint8_t port_id); - - -#include - -/** - * - * Retrieve a burst of input packets from a receive queue of an Ethernet - * device. The retrieved packets are stored in *rte_mbuf* structures whose - * pointers are supplied in the *rx_pkts* array. - * - * The rte_eth_rx_burst() function loops, parsing the RX ring of the - * receive queue, up to *nb_pkts* packets, and for each completed RX - * descriptor in the ring, it performs the following operations: - * - * - Initialize the *rte_mbuf* data structure associated with the - * RX descriptor according to the information provided by the NIC into - * that RX descriptor. - * - * - Store the *rte_mbuf* data structure into the next entry of the - * *rx_pkts* array. - * - * - Replenish the RX descriptor with a new *rte_mbuf* buffer - * allocated from the memory pool associated with the receive queue at - * initialization time. - * - * When retrieving an input packet that was scattered by the controller - * into multiple receive descriptors, the rte_eth_rx_burst() function - * appends the associated *rte_mbuf* buffers to the first buffer of the - * packet. - * - * The rte_eth_rx_burst() function returns the number of packets - * actually retrieved, which is the number of *rte_mbuf* data structures - * effectively supplied into the *rx_pkts* array. - * A return value equal to *nb_pkts* indicates that the RX queue contained - * at least *rx_pkts* packets, and this is likely to signify that other - * received packets remain in the input queue. Applications implementing - * a "retrieve as much received packets as possible" policy can check this - * specific case and keep invoking the rte_eth_rx_burst() function until - * a value less than *nb_pkts* is returned. - * - * This receive method has the following advantages: - * - * - It allows a run-to-completion network stack engine to retrieve and - * to immediately process received packets in a fast burst-oriented - * approach, avoiding the overhead of unnecessary intermediate packet - * queue/dequeue operations. - * - * - Conversely, it also allows an asynchronous-oriented processing - * method to retrieve bursts of received packets and to immediately - * queue them for further parallel processing by another logical core, - * for instance. However, instead of having received packets being - * individually queued by the driver, this approach allows the caller - * of the rte_eth_rx_burst() function to queue a burst of retrieved - * packets at a time and therefore dramatically reduce the cost of - * enqueue/dequeue operations per packet. - * - * - It allows the rte_eth_rx_burst() function of the driver to take - * advantage of burst-oriented hardware features (CPU cache, - * prefetch instructions, and so on) to minimize the number of CPU - * cycles per packet. - * - * To summarize, the proposed receive API enables many - * burst-oriented optimizations in both synchronous and asynchronous - * packet processing environments with no overhead in both cases. - * - * The rte_eth_rx_burst() function does not provide any error - * notification to avoid the corresponding overhead. As a hint, the - * upper-level application might check the status of the device link once - * being systematically returned a 0 value for a given number of tries. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The index of the receive queue from which to retrieve input packets. - * The value must be in the range [0, nb_rx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param rx_pkts - * The address of an array of pointers to *rte_mbuf* structures that - * must be large enough to store *nb_pkts* pointers in it. - * @param nb_pkts - * The maximum number of packets to retrieve. - * @return - * The number of packets actually retrieved, which is the number - * of pointers to *rte_mbuf* structures effectively supplied to the - * *rx_pkts* array. - */ -static inline uint16_t -rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id, - struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); - RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0); - - if (queue_id >= dev->data->nb_rx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id); - return 0; - } -#endif - int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], - rx_pkts, nb_pkts); - -#ifdef RTE_ETHDEV_RXTX_CALLBACKS - struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id]; - - if (unlikely(cb != NULL)) { - do { - nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx, - nb_pkts, cb->param); - cb = cb->next; - } while (cb != NULL); - } -#endif - - return nb_rx; -} - -/** - * Get the number of used descriptors of a rx queue - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The queue id on the specific port. - * @return - * The number of used descriptors in the specific queue, or: - * (-EINVAL) if *port_id* or *queue_id* is invalid - * (-ENOTSUP) if the device does not support this function - */ -static inline int -rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id) -{ - struct rte_eth_dev *dev; - - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - dev = &rte_eth_devices[port_id]; - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_count, -ENOTSUP); - if (queue_id >= dev->data->nb_rx_queues) - return -EINVAL; - - return (*dev->dev_ops->rx_queue_count)(dev, queue_id); -} - -/** - * Check if the DD bit of the specific RX descriptor in the queue has been set - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The queue id on the specific port. - * @param offset - * The offset of the descriptor ID from tail. - * @return - * - (1) if the specific DD bit is set. - * - (0) if the specific DD bit is not set. - * - (-ENODEV) if *port_id* invalid. - * - (-ENOTSUP) if the device does not support this function - */ -static inline int -rte_eth_rx_descriptor_done(uint16_t port_id, uint16_t queue_id, uint16_t offset) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_done, -ENOTSUP); - return (*dev->dev_ops->rx_descriptor_done)( \ - dev->data->rx_queues[queue_id], offset); -} - -#define RTE_ETH_RX_DESC_AVAIL 0 /**< Desc available for hw. */ -#define RTE_ETH_RX_DESC_DONE 1 /**< Desc done, filled by hw. */ -#define RTE_ETH_RX_DESC_UNAVAIL 2 /**< Desc used by driver or hw. */ - -/** - * Check the status of a Rx descriptor in the queue - * - * It should be called in a similar context than the Rx function: - * - on a dataplane core - * - not concurrently on the same queue - * - * Since it's a dataplane function, no check is performed on port_id and - * queue_id. The caller must therefore ensure that the port is enabled - * and the queue is configured and running. - * - * Note: accessing to a random descriptor in the ring may trigger cache - * misses and have a performance impact. - * - * @param port_id - * A valid port identifier of the Ethernet device which. - * @param queue_id - * A valid Rx queue identifier on this port. - * @param offset - * The offset of the descriptor starting from tail (0 is the next - * packet to be received by the driver). - * - * @return - * - (RTE_ETH_RX_DESC_AVAIL): Descriptor is available for the hardware to - * receive a packet. - * - (RTE_ETH_RX_DESC_DONE): Descriptor is done, it is filled by hw, but - * not yet processed by the driver (i.e. in the receive queue). - * - (RTE_ETH_RX_DESC_UNAVAIL): Descriptor is unavailable, either hold by - * the driver and not yet returned to hw, or reserved by the hw. - * - (-EINVAL) bad descriptor offset. - * - (-ENOTSUP) if the device does not support this function. - * - (-ENODEV) bad port or queue (only if compiled with debug). - */ -static inline int -rte_eth_rx_descriptor_status(uint16_t port_id, uint16_t queue_id, - uint16_t offset) -{ - struct rte_eth_dev *dev; - void *rxq; - -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); -#endif - dev = &rte_eth_devices[port_id]; -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - if (queue_id >= dev->data->nb_rx_queues) - return -ENODEV; -#endif - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_status, -ENOTSUP); - rxq = dev->data->rx_queues[queue_id]; - - return (*dev->dev_ops->rx_descriptor_status)(rxq, offset); -} - -#define RTE_ETH_TX_DESC_FULL 0 /**< Desc filled for hw, waiting xmit. */ -#define RTE_ETH_TX_DESC_DONE 1 /**< Desc done, packet is transmitted. */ -#define RTE_ETH_TX_DESC_UNAVAIL 2 /**< Desc used by driver or hw. */ - -/** - * Check the status of a Tx descriptor in the queue. - * - * It should be called in a similar context than the Tx function: - * - on a dataplane core - * - not concurrently on the same queue - * - * Since it's a dataplane function, no check is performed on port_id and - * queue_id. The caller must therefore ensure that the port is enabled - * and the queue is configured and running. - * - * Note: accessing to a random descriptor in the ring may trigger cache - * misses and have a performance impact. - * - * @param port_id - * A valid port identifier of the Ethernet device which. - * @param queue_id - * A valid Tx queue identifier on this port. - * @param offset - * The offset of the descriptor starting from tail (0 is the place where - * the next packet will be send). - * - * @return - * - (RTE_ETH_TX_DESC_FULL) Descriptor is being processed by the hw, i.e. - * in the transmit queue. - * - (RTE_ETH_TX_DESC_DONE) Hardware is done with this descriptor, it can - * be reused by the driver. - * - (RTE_ETH_TX_DESC_UNAVAIL): Descriptor is unavailable, reserved by the - * driver or the hardware. - * - (-EINVAL) bad descriptor offset. - * - (-ENOTSUP) if the device does not support this function. - * - (-ENODEV) bad port or queue (only if compiled with debug). - */ -static inline int rte_eth_tx_descriptor_status(uint16_t port_id, - uint16_t queue_id, uint16_t offset) -{ - struct rte_eth_dev *dev; - void *txq; - -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); -#endif - dev = &rte_eth_devices[port_id]; -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - if (queue_id >= dev->data->nb_tx_queues) - return -ENODEV; -#endif - RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_descriptor_status, -ENOTSUP); - txq = dev->data->tx_queues[queue_id]; - - return (*dev->dev_ops->tx_descriptor_status)(txq, offset); -} - -/** - * Send a burst of output packets on a transmit queue of an Ethernet device. - * - * The rte_eth_tx_burst() function is invoked to transmit output packets - * on the output queue *queue_id* of the Ethernet device designated by its - * *port_id*. - * The *nb_pkts* parameter is the number of packets to send which are - * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them - * allocated from a pool created with rte_pktmbuf_pool_create(). - * The rte_eth_tx_burst() function loops, sending *nb_pkts* packets, - * up to the number of transmit descriptors available in the TX ring of the - * transmit queue. - * For each packet to send, the rte_eth_tx_burst() function performs - * the following operations: - * - * - Pick up the next available descriptor in the transmit ring. - * - * - Free the network buffer previously sent with that descriptor, if any. - * - * - Initialize the transmit descriptor with the information provided - * in the *rte_mbuf data structure. - * - * In the case of a segmented packet composed of a list of *rte_mbuf* buffers, - * the rte_eth_tx_burst() function uses several transmit descriptors - * of the ring. - * - * The rte_eth_tx_burst() function returns the number of packets it - * actually sent. A return value equal to *nb_pkts* means that all packets - * have been sent, and this is likely to signify that other output packets - * could be immediately transmitted again. Applications that implement a - * "send as many packets to transmit as possible" policy can check this - * specific case and keep invoking the rte_eth_tx_burst() function until - * a value less than *nb_pkts* is returned. - * - * It is the responsibility of the rte_eth_tx_burst() function to - * transparently free the memory buffers of packets previously sent. - * This feature is driven by the *tx_free_thresh* value supplied to the - * rte_eth_dev_configure() function at device configuration time. - * When the number of free TX descriptors drops below this threshold, the - * rte_eth_tx_burst() function must [attempt to] free the *rte_mbuf* buffers - * of those packets whose transmission was effectively completed. - * - * If the PMD is DEV_TX_OFFLOAD_MT_LOCKFREE capable, multiple threads can - * invoke this function concurrently on the same tx queue without SW lock. - * @see rte_eth_dev_info_get, struct rte_eth_txconf::txq_flags - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The index of the transmit queue through which output packets must be - * sent. - * The value must be in the range [0, nb_tx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param tx_pkts - * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures - * which contain the output packets. - * @param nb_pkts - * The maximum number of packets to transmit. - * @return - * The number of output packets actually stored in transmit descriptors of - * the transmit ring. The return value can be less than the value of the - * *tx_pkts* parameter when the transmit ring is full or has been filled up. - */ -static inline uint16_t -rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id, - struct rte_mbuf **tx_pkts, uint16_t nb_pkts) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); - RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0); - - if (queue_id >= dev->data->nb_tx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); - return 0; - } -#endif - -#ifdef RTE_ETHDEV_RXTX_CALLBACKS - struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id]; - - if (unlikely(cb != NULL)) { - do { - nb_pkts = cb->fn.tx(port_id, queue_id, tx_pkts, nb_pkts, - cb->param); - cb = cb->next; - } while (cb != NULL); - } -#endif - - return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); -} - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * - * Process a burst of output packets on a transmit queue of an Ethernet device. - * - * The rte_eth_tx_prepare() function is invoked to prepare output packets to be - * transmitted on the output queue *queue_id* of the Ethernet device designated - * by its *port_id*. - * The *nb_pkts* parameter is the number of packets to be prepared which are - * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them - * allocated from a pool created with rte_pktmbuf_pool_create(). - * For each packet to send, the rte_eth_tx_prepare() function performs - * the following operations: - * - * - Check if packet meets devices requirements for tx offloads. - * - * - Check limitations about number of segments. - * - * - Check additional requirements when debug is enabled. - * - * - Update and/or reset required checksums when tx offload is set for packet. - * - * Since this function can modify packet data, provided mbufs must be safely - * writable (e.g. modified data cannot be in shared segment). - * - * The rte_eth_tx_prepare() function returns the number of packets ready to be - * sent. A return value equal to *nb_pkts* means that all packets are valid and - * ready to be sent, otherwise stops processing on the first invalid packet and - * leaves the rest packets untouched. - * - * When this functionality is not implemented in the driver, all packets are - * are returned untouched. - * - * @param port_id - * The port identifier of the Ethernet device. - * The value must be a valid port id. - * @param queue_id - * The index of the transmit queue through which output packets must be - * sent. - * The value must be in the range [0, nb_tx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param tx_pkts - * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures - * which contain the output packets. - * @param nb_pkts - * The maximum number of packets to process. - * @return - * The number of packets correct and ready to be sent. The return value can be - * less than the value of the *tx_pkts* parameter when some packet doesn't - * meet devices requirements with rte_errno set appropriately: - * - -EINVAL: offload flags are not correctly set - * - -ENOTSUP: the offload feature is not supported by the hardware - * - */ - -#ifndef RTE_ETHDEV_TX_PREPARE_NOOP - -static inline uint16_t -rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id, - struct rte_mbuf **tx_pkts, uint16_t nb_pkts) -{ - struct rte_eth_dev *dev; - -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - if (!rte_eth_dev_is_valid_port(port_id)) { - RTE_PMD_DEBUG_TRACE("Invalid TX port_id=%d\n", port_id); - rte_errno = -EINVAL; - return 0; - } -#endif - - dev = &rte_eth_devices[port_id]; - -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - if (queue_id >= dev->data->nb_tx_queues) { - RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); - rte_errno = -EINVAL; - return 0; - } -#endif - - if (!dev->tx_pkt_prepare) - return nb_pkts; - - return (*dev->tx_pkt_prepare)(dev->data->tx_queues[queue_id], - tx_pkts, nb_pkts); -} - -#else - -/* - * Native NOOP operation for compilation targets which doesn't require any - * preparations steps, and functional NOOP may introduce unnecessary performance - * drop. - * - * Generally this is not a good idea to turn it on globally and didn't should - * be used if behavior of tx_preparation can change. - */ - -static inline uint16_t -rte_eth_tx_prepare(__rte_unused uint16_t port_id, - __rte_unused uint16_t queue_id, - __rte_unused struct rte_mbuf **tx_pkts, uint16_t nb_pkts) -{ - return nb_pkts; -} - -#endif - -/** - * Send any packets queued up for transmission on a port and HW queue - * - * This causes an explicit flush of packets previously buffered via the - * rte_eth_tx_buffer() function. It returns the number of packets successfully - * sent to the NIC, and calls the error callback for any unsent packets. Unless - * explicitly set up otherwise, the default callback simply frees the unsent - * packets back to the owning mempool. - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The index of the transmit queue through which output packets must be - * sent. - * The value must be in the range [0, nb_tx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param buffer - * Buffer of packets to be transmit. - * @return - * The number of packets successfully sent to the Ethernet device. The error - * callback is called for any packets which could not be sent. - */ -static inline uint16_t -rte_eth_tx_buffer_flush(uint16_t port_id, uint16_t queue_id, - struct rte_eth_dev_tx_buffer *buffer) -{ - uint16_t sent; - uint16_t to_send = buffer->length; - - if (to_send == 0) - return 0; - - sent = rte_eth_tx_burst(port_id, queue_id, buffer->pkts, to_send); - - buffer->length = 0; - - /* All packets sent, or to be dealt with by callback below */ - if (unlikely(sent != to_send)) - buffer->error_callback(&buffer->pkts[sent], to_send - sent, - buffer->error_userdata); - - return sent; -} - -/** - * Buffer a single packet for future transmission on a port and queue - * - * This function takes a single mbuf/packet and buffers it for later - * transmission on the particular port and queue specified. Once the buffer is - * full of packets, an attempt will be made to transmit all the buffered - * packets. In case of error, where not all packets can be transmitted, a - * callback is called with the unsent packets as a parameter. If no callback - * is explicitly set up, the unsent packets are just freed back to the owning - * mempool. The function returns the number of packets actually sent i.e. - * 0 if no buffer flush occurred, otherwise the number of packets successfully - * flushed - * - * @param port_id - * The port identifier of the Ethernet device. - * @param queue_id - * The index of the transmit queue through which output packets must be - * sent. - * The value must be in the range [0, nb_tx_queue - 1] previously supplied - * to rte_eth_dev_configure(). - * @param buffer - * Buffer used to collect packets to be sent. - * @param tx_pkt - * Pointer to the packet mbuf to be sent. - * @return - * 0 = packet has been buffered for later transmission - * N > 0 = packet has been buffered, and the buffer was subsequently flushed, - * causing N packets to be sent, and the error callback to be called for - * the rest. - */ -static __rte_always_inline uint16_t -rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id, - struct rte_eth_dev_tx_buffer *buffer, struct rte_mbuf *tx_pkt) -{ - buffer->pkts[buffer->length++] = tx_pkt; - if (buffer->length < buffer->size) - return 0; - - return rte_eth_tx_buffer_flush(port_id, queue_id, buffer); -} - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_ETHDEV_H_ */ diff --git a/lib/librte_ether/rte_ethdev_core.h b/lib/librte_ether/rte_ethdev_core.h deleted file mode 100644 index e5681e46..00000000 --- a/lib/librte_ether/rte_ethdev_core.h +++ /dev/null @@ -1,613 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2017 Intel Corporation - */ - -#ifndef _RTE_ETHDEV_CORE_H_ -#define _RTE_ETHDEV_CORE_H_ - -/** - * @file - * - * RTE Ethernet Device internal header. - * - * This header contains internal data types. But they are still part of the - * public API because they are used by inline functions in the published API. - * - * Applications should not use these directly. - * - */ - -struct rte_eth_dev_callback; -/** @internal Structure to keep track of registered callbacks */ -TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback); - -/* - * Definitions of all functions exported by an Ethernet driver through the - * the generic structure of type *eth_dev_ops* supplied in the *rte_eth_dev* - * structure associated with an Ethernet device. - */ -struct rte_eth_dev; - -typedef int (*eth_dev_configure_t)(struct rte_eth_dev *dev); -/**< @internal Ethernet device configuration. */ - -typedef int (*eth_dev_start_t)(struct rte_eth_dev *dev); -/**< @internal Function used to start a configured Ethernet device. */ - -typedef void (*eth_dev_stop_t)(struct rte_eth_dev *dev); -/**< @internal Function used to stop a configured Ethernet device. */ - -typedef int (*eth_dev_set_link_up_t)(struct rte_eth_dev *dev); -/**< @internal Function used to link up a configured Ethernet device. */ - -typedef int (*eth_dev_set_link_down_t)(struct rte_eth_dev *dev); -/**< @internal Function used to link down a configured Ethernet device. */ - -typedef void (*eth_dev_close_t)(struct rte_eth_dev *dev); -/**< @internal Function used to close a configured Ethernet device. */ - -typedef int (*eth_dev_reset_t)(struct rte_eth_dev *dev); -/** <@internal Function used to reset a configured Ethernet device. */ - -typedef int (*eth_is_removed_t)(struct rte_eth_dev *dev); -/**< @internal Function used to detect an Ethernet device removal. */ - -typedef void (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev); -/**< @internal Function used to enable the RX promiscuous mode of an Ethernet device. */ - -typedef void (*eth_promiscuous_disable_t)(struct rte_eth_dev *dev); -/**< @internal Function used to disable the RX promiscuous mode of an Ethernet device. */ - -typedef void (*eth_allmulticast_enable_t)(struct rte_eth_dev *dev); -/**< @internal Enable the receipt of all multicast packets by an Ethernet device. */ - -typedef void (*eth_allmulticast_disable_t)(struct rte_eth_dev *dev); -/**< @internal Disable the receipt of all multicast packets by an Ethernet device. */ - -typedef int (*eth_link_update_t)(struct rte_eth_dev *dev, - int wait_to_complete); -/**< @internal Get link speed, duplex mode and state (up/down) of an Ethernet device. */ - -typedef int (*eth_stats_get_t)(struct rte_eth_dev *dev, - struct rte_eth_stats *igb_stats); -/**< @internal Get global I/O statistics of an Ethernet device. */ - -typedef void (*eth_stats_reset_t)(struct rte_eth_dev *dev); -/**< @internal Reset global I/O statistics of an Ethernet device to 0. */ - -typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev, - struct rte_eth_xstat *stats, unsigned n); -/**< @internal Get extended stats of an Ethernet device. */ - -typedef int (*eth_xstats_get_by_id_t)(struct rte_eth_dev *dev, - const uint64_t *ids, - uint64_t *values, - unsigned int n); -/**< @internal Get extended stats of an Ethernet device. */ - -typedef void (*eth_xstats_reset_t)(struct rte_eth_dev *dev); -/**< @internal Reset extended stats of an Ethernet device. */ - -typedef int (*eth_xstats_get_names_t)(struct rte_eth_dev *dev, - struct rte_eth_xstat_name *xstats_names, unsigned size); -/**< @internal Get names of extended stats of an Ethernet device. */ - -typedef int (*eth_xstats_get_names_by_id_t)(struct rte_eth_dev *dev, - struct rte_eth_xstat_name *xstats_names, const uint64_t *ids, - unsigned int size); -/**< @internal Get names of extended stats of an Ethernet device. */ - -typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev, - uint16_t queue_id, - uint8_t stat_idx, - uint8_t is_rx); -/**< @internal Set a queue statistics mapping for a tx/rx queue of an Ethernet device. */ - -typedef void (*eth_dev_infos_get_t)(struct rte_eth_dev *dev, - struct rte_eth_dev_info *dev_info); -/**< @internal Get specific informations of an Ethernet device. */ - -typedef const uint32_t *(*eth_dev_supported_ptypes_get_t)(struct rte_eth_dev *dev); -/**< @internal Get supported ptypes of an Ethernet device. */ - -typedef int (*eth_queue_start_t)(struct rte_eth_dev *dev, - uint16_t queue_id); -/**< @internal Start rx and tx of a queue of an Ethernet device. */ - -typedef int (*eth_queue_stop_t)(struct rte_eth_dev *dev, - uint16_t queue_id); -/**< @internal Stop rx and tx of a queue of an Ethernet device. */ - -typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev, - uint16_t rx_queue_id, - uint16_t nb_rx_desc, - unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mb_pool); -/**< @internal Set up a receive queue of an Ethernet device. */ - -typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev, - uint16_t tx_queue_id, - uint16_t nb_tx_desc, - unsigned int socket_id, - const struct rte_eth_txconf *tx_conf); -/**< @internal Setup a transmit queue of an Ethernet device. */ - -typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev, - uint16_t rx_queue_id); -/**< @internal Enable interrupt of a receive queue of an Ethernet device. */ - -typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev, - uint16_t rx_queue_id); -/**< @internal Disable interrupt of a receive queue of an Ethernet device. */ - -typedef void (*eth_queue_release_t)(void *queue); -/**< @internal Release memory resources allocated by given RX/TX queue. */ - -typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev, - uint16_t rx_queue_id); -/**< @internal Get number of used descriptors on a receive queue. */ - -typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset); -/**< @internal Check DD bit of specific RX descriptor */ - -typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset); -/**< @internal Check the status of a Rx descriptor */ - -typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset); -/**< @internal Check the status of a Tx descriptor */ - -typedef int (*eth_fw_version_get_t)(struct rte_eth_dev *dev, - char *fw_version, size_t fw_size); -/**< @internal Get firmware information of an Ethernet device. */ - -typedef int (*eth_tx_done_cleanup_t)(void *txq, uint32_t free_cnt); -/**< @internal Force mbufs to be from TX ring. */ - -typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev, - uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo); - -typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev, - uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo); - -typedef int (*mtu_set_t)(struct rte_eth_dev *dev, uint16_t mtu); -/**< @internal Set MTU. */ - -typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev, - uint16_t vlan_id, - int on); -/**< @internal filtering of a VLAN Tag Identifier by an Ethernet device. */ - -typedef int (*vlan_tpid_set_t)(struct rte_eth_dev *dev, - enum rte_vlan_type type, uint16_t tpid); -/**< @internal set the outer/inner VLAN-TPID by an Ethernet device. */ - -typedef int (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask); -/**< @internal set VLAN offload function by an Ethernet device. */ - -typedef int (*vlan_pvid_set_t)(struct rte_eth_dev *dev, - uint16_t vlan_id, - int on); -/**< @internal set port based TX VLAN insertion by an Ethernet device. */ - -typedef void (*vlan_strip_queue_set_t)(struct rte_eth_dev *dev, - uint16_t rx_queue_id, - int on); -/**< @internal VLAN stripping enable/disable by an queue of Ethernet device. */ - -typedef uint16_t (*eth_rx_burst_t)(void *rxq, - struct rte_mbuf **rx_pkts, - uint16_t nb_pkts); -/**< @internal Retrieve input packets from a receive queue of an Ethernet device. */ - -typedef uint16_t (*eth_tx_burst_t)(void *txq, - struct rte_mbuf **tx_pkts, - uint16_t nb_pkts); -/**< @internal Send output packets on a transmit queue of an Ethernet device. */ - -typedef uint16_t (*eth_tx_prep_t)(void *txq, - struct rte_mbuf **tx_pkts, - uint16_t nb_pkts); -/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ - -typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, - struct rte_eth_fc_conf *fc_conf); -/**< @internal Get current flow control parameter on an Ethernet device */ - -typedef int (*flow_ctrl_set_t)(struct rte_eth_dev *dev, - struct rte_eth_fc_conf *fc_conf); -/**< @internal Setup flow control parameter on an Ethernet device */ - -typedef int (*priority_flow_ctrl_set_t)(struct rte_eth_dev *dev, - struct rte_eth_pfc_conf *pfc_conf); -/**< @internal Setup priority flow control parameter on an Ethernet device */ - -typedef int (*reta_update_t)(struct rte_eth_dev *dev, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size); -/**< @internal Update RSS redirection table on an Ethernet device */ - -typedef int (*reta_query_t)(struct rte_eth_dev *dev, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size); -/**< @internal Query RSS redirection table on an Ethernet device */ - -typedef int (*rss_hash_update_t)(struct rte_eth_dev *dev, - struct rte_eth_rss_conf *rss_conf); -/**< @internal Update RSS hash configuration of an Ethernet device */ - -typedef int (*rss_hash_conf_get_t)(struct rte_eth_dev *dev, - struct rte_eth_rss_conf *rss_conf); -/**< @internal Get current RSS hash configuration of an Ethernet device */ - -typedef int (*eth_dev_led_on_t)(struct rte_eth_dev *dev); -/**< @internal Turn on SW controllable LED on an Ethernet device */ - -typedef int (*eth_dev_led_off_t)(struct rte_eth_dev *dev); -/**< @internal Turn off SW controllable LED on an Ethernet device */ - -typedef void (*eth_mac_addr_remove_t)(struct rte_eth_dev *dev, uint32_t index); -/**< @internal Remove MAC address from receive address register */ - -typedef int (*eth_mac_addr_add_t)(struct rte_eth_dev *dev, - struct ether_addr *mac_addr, - uint32_t index, - uint32_t vmdq); -/**< @internal Set a MAC address into Receive Address Address Register */ - -typedef void (*eth_mac_addr_set_t)(struct rte_eth_dev *dev, - struct ether_addr *mac_addr); -/**< @internal Set a MAC address into Receive Address Address Register */ - -typedef int (*eth_uc_hash_table_set_t)(struct rte_eth_dev *dev, - struct ether_addr *mac_addr, - uint8_t on); -/**< @internal Set a Unicast Hash bitmap */ - -typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev, - uint8_t on); -/**< @internal Set all Unicast Hash bitmap */ - -typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev, - uint16_t queue_idx, - uint16_t tx_rate); -/**< @internal Set queue TX rate */ - -typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev, - struct rte_eth_mirror_conf *mirror_conf, - uint8_t rule_id, - uint8_t on); -/**< @internal Add a traffic mirroring rule on an Ethernet device */ - -typedef int (*eth_mirror_rule_reset_t)(struct rte_eth_dev *dev, - uint8_t rule_id); -/**< @internal Remove a traffic mirroring rule on an Ethernet device */ - -typedef int (*eth_udp_tunnel_port_add_t)(struct rte_eth_dev *dev, - struct rte_eth_udp_tunnel *tunnel_udp); -/**< @internal Add tunneling UDP port */ - -typedef int (*eth_udp_tunnel_port_del_t)(struct rte_eth_dev *dev, - struct rte_eth_udp_tunnel *tunnel_udp); -/**< @internal Delete tunneling UDP port */ - -typedef int (*eth_set_mc_addr_list_t)(struct rte_eth_dev *dev, - struct ether_addr *mc_addr_set, - uint32_t nb_mc_addr); -/**< @internal set the list of multicast addresses on an Ethernet device */ - -typedef int (*eth_timesync_enable_t)(struct rte_eth_dev *dev); -/**< @internal Function used to enable IEEE1588/802.1AS timestamping. */ - -typedef int (*eth_timesync_disable_t)(struct rte_eth_dev *dev); -/**< @internal Function used to disable IEEE1588/802.1AS timestamping. */ - -typedef int (*eth_timesync_read_rx_timestamp_t)(struct rte_eth_dev *dev, - struct timespec *timestamp, - uint32_t flags); -/**< @internal Function used to read an RX IEEE1588/802.1AS timestamp. */ - -typedef int (*eth_timesync_read_tx_timestamp_t)(struct rte_eth_dev *dev, - struct timespec *timestamp); -/**< @internal Function used to read a TX IEEE1588/802.1AS timestamp. */ - -typedef int (*eth_timesync_adjust_time)(struct rte_eth_dev *dev, int64_t); -/**< @internal Function used to adjust the device clock */ - -typedef int (*eth_timesync_read_time)(struct rte_eth_dev *dev, - struct timespec *timestamp); -/**< @internal Function used to get time from the device clock. */ - -typedef int (*eth_timesync_write_time)(struct rte_eth_dev *dev, - const struct timespec *timestamp); -/**< @internal Function used to get time from the device clock */ - -typedef int (*eth_get_reg_t)(struct rte_eth_dev *dev, - struct rte_dev_reg_info *info); -/**< @internal Retrieve registers */ - -typedef int (*eth_get_eeprom_length_t)(struct rte_eth_dev *dev); -/**< @internal Retrieve eeprom size */ - -typedef int (*eth_get_eeprom_t)(struct rte_eth_dev *dev, - struct rte_dev_eeprom_info *info); -/**< @internal Retrieve eeprom data */ - -typedef int (*eth_set_eeprom_t)(struct rte_eth_dev *dev, - struct rte_dev_eeprom_info *info); -/**< @internal Program eeprom data */ - -typedef int (*eth_l2_tunnel_eth_type_conf_t) - (struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel); -/**< @internal config l2 tunnel ether type */ - -typedef int (*eth_l2_tunnel_offload_set_t) - (struct rte_eth_dev *dev, - struct rte_eth_l2_tunnel_conf *l2_tunnel, - uint32_t mask, - uint8_t en); -/**< @internal enable/disable the l2 tunnel offload functions */ - - -typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev, - enum rte_filter_type filter_type, - enum rte_filter_op filter_op, - void *arg); -/**< @internal Take operations to assigned filter type on an Ethernet device */ - -typedef int (*eth_tm_ops_get_t)(struct rte_eth_dev *dev, void *ops); -/**< @internal Get Traffic Management (TM) operations on an Ethernet device */ - -typedef int (*eth_mtr_ops_get_t)(struct rte_eth_dev *dev, void *ops); -/**< @internal Get Trafffic Metering and Policing (MTR) operations */ - -typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev, - struct rte_eth_dcb_info *dcb_info); -/**< @internal Get dcb information on an Ethernet device */ - -typedef int (*eth_pool_ops_supported_t)(struct rte_eth_dev *dev, - const char *pool); -/**< @internal Test if a port supports specific mempool ops */ - -/** - * @internal A structure containing the functions exported by an Ethernet driver. - */ -struct eth_dev_ops { - eth_dev_configure_t dev_configure; /**< Configure device. */ - eth_dev_start_t dev_start; /**< Start device. */ - eth_dev_stop_t dev_stop; /**< Stop device. */ - eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */ - eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */ - eth_dev_close_t dev_close; /**< Close device. */ - eth_dev_reset_t dev_reset; /**< Reset device. */ - eth_link_update_t link_update; /**< Get device link state. */ - eth_is_removed_t is_removed; - /**< Check if the device was physically removed. */ - - eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ - eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */ - eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */ - eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OFF. */ - eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address. */ - eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address. */ - eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address. */ - eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs. */ - mtu_set_t mtu_set; /**< Set MTU. */ - - eth_stats_get_t stats_get; /**< Get generic device statistics. */ - eth_stats_reset_t stats_reset; /**< Reset generic device statistics. */ - eth_xstats_get_t xstats_get; /**< Get extended device statistics. */ - eth_xstats_reset_t xstats_reset; /**< Reset extended device statistics. */ - eth_xstats_get_names_t xstats_get_names; - /**< Get names of extended statistics. */ - eth_queue_stats_mapping_set_t queue_stats_mapping_set; - /**< Configure per queue stat counter mapping. */ - - eth_dev_infos_get_t dev_infos_get; /**< Get device info. */ - eth_rxq_info_get_t rxq_info_get; /**< retrieve RX queue information. */ - eth_txq_info_get_t txq_info_get; /**< retrieve TX queue information. */ - eth_fw_version_get_t fw_version_get; /**< Get firmware version. */ - eth_dev_supported_ptypes_get_t dev_supported_ptypes_get; - /**< Get packet types supported and identified by device. */ - - vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ - vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */ - vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on queue. */ - vlan_offload_set_t vlan_offload_set; /**< Set VLAN Offload. */ - vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion. */ - - eth_queue_start_t rx_queue_start;/**< Start RX for a queue. */ - eth_queue_stop_t rx_queue_stop; /**< Stop RX for a queue. */ - eth_queue_start_t tx_queue_start;/**< Start TX for a queue. */ - eth_queue_stop_t tx_queue_stop; /**< Stop TX for a queue. */ - eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue. */ - eth_queue_release_t rx_queue_release; /**< Release RX queue. */ - eth_rx_queue_count_t rx_queue_count; - /**< Get the number of used RX descriptors. */ - eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit. */ - eth_rx_descriptor_status_t rx_descriptor_status; - /**< Check the status of a Rx descriptor. */ - eth_tx_descriptor_status_t tx_descriptor_status; - /**< Check the status of a Tx descriptor. */ - eth_rx_enable_intr_t rx_queue_intr_enable; /**< Enable Rx queue interrupt. */ - eth_rx_disable_intr_t rx_queue_intr_disable; /**< Disable Rx queue interrupt. */ - eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue. */ - eth_queue_release_t tx_queue_release; /**< Release TX queue. */ - eth_tx_done_cleanup_t tx_done_cleanup;/**< Free tx ring mbufs */ - - eth_dev_led_on_t dev_led_on; /**< Turn on LED. */ - eth_dev_led_off_t dev_led_off; /**< Turn off LED. */ - - flow_ctrl_get_t flow_ctrl_get; /**< Get flow control. */ - flow_ctrl_set_t flow_ctrl_set; /**< Setup flow control. */ - priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control. */ - - eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array. */ - eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap. */ - - eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule. */ - eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule. */ - - eth_udp_tunnel_port_add_t udp_tunnel_port_add; /** Add UDP tunnel port. */ - eth_udp_tunnel_port_del_t udp_tunnel_port_del; /** Del UDP tunnel port. */ - eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; - /** Config ether type of l2 tunnel. */ - eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; - /** Enable/disable l2 tunnel offload functions. */ - - eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit. */ - - rss_hash_update_t rss_hash_update; /** Configure RSS hash protocols. */ - rss_hash_conf_get_t rss_hash_conf_get; /** Get current RSS hash configuration. */ - reta_update_t reta_update; /** Update redirection table. */ - reta_query_t reta_query; /** Query redirection table. */ - - eth_get_reg_t get_reg; /**< Get registers. */ - eth_get_eeprom_length_t get_eeprom_length; /**< Get eeprom length. */ - eth_get_eeprom_t get_eeprom; /**< Get eeprom data. */ - eth_set_eeprom_t set_eeprom; /**< Set eeprom. */ - - - eth_filter_ctrl_t filter_ctrl; /**< common filter control. */ - - eth_get_dcb_info get_dcb_info; /** Get DCB information. */ - - eth_timesync_enable_t timesync_enable; - /** Turn IEEE1588/802.1AS timestamping on. */ - eth_timesync_disable_t timesync_disable; - /** Turn IEEE1588/802.1AS timestamping off. */ - eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp; - /** Read the IEEE1588/802.1AS RX timestamp. */ - eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp; - /** Read the IEEE1588/802.1AS TX timestamp. */ - eth_timesync_adjust_time timesync_adjust_time; /** Adjust the device clock. */ - eth_timesync_read_time timesync_read_time; /** Get the device clock time. */ - eth_timesync_write_time timesync_write_time; /** Set the device clock time. */ - - eth_xstats_get_by_id_t xstats_get_by_id; - /**< Get extended device statistic values by ID. */ - eth_xstats_get_names_by_id_t xstats_get_names_by_id; - /**< Get name of extended device statistics by ID. */ - - eth_tm_ops_get_t tm_ops_get; - /**< Get Traffic Management (TM) operations. */ - - eth_mtr_ops_get_t mtr_ops_get; - /**< Get Traffic Metering and Policing (MTR) operations. */ - - eth_pool_ops_supported_t pool_ops_supported; - /**< Test if a port supports specific mempool ops */ -}; - -/** - * @internal - * Structure used to hold information about the callbacks to be called for a - * queue on RX and TX. - */ -struct rte_eth_rxtx_callback { - struct rte_eth_rxtx_callback *next; - union{ - rte_rx_callback_fn rx; - rte_tx_callback_fn tx; - } fn; - void *param; -}; - -/** - * @internal - * The generic data structure associated with each ethernet device. - * - * Pointers to burst-oriented packet receive and transmit functions are - * located at the beginning of the structure, along with the pointer to - * where all the data elements for the particular device are stored in shared - * memory. This split allows the function pointer and driver data to be per- - * process, while the actual configuration data for the device is shared. - */ -struct rte_eth_dev { - eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ - eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ - eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */ - struct rte_eth_dev_data *data; /**< Pointer to device data */ - const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ - struct rte_device *device; /**< Backing device */ - struct rte_intr_handle *intr_handle; /**< Device interrupt handle */ - /** User application callbacks for NIC interrupts */ - struct rte_eth_dev_cb_list link_intr_cbs; - /** - * User-supplied functions called from rx_burst to post-process - * received packets before passing them to the user - */ - struct rte_eth_rxtx_callback *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; - /** - * User-supplied functions called from tx_burst to pre-process - * received packets before passing them to the driver for transmission. - */ - struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; - enum rte_eth_dev_state state; /**< Flag indicating the port state */ - void *security_ctx; /**< Context for security ops */ -} __rte_cache_aligned; - -struct rte_eth_dev_sriov; -struct rte_eth_dev_owner; - -/** - * @internal - * The data part, with no function pointers, associated with each ethernet device. - * - * This structure is safe to place in shared memory to be common among different - * processes in a multi-process configuration. - */ -struct rte_eth_dev_data { - char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */ - - void **rx_queues; /**< Array of pointers to RX queues. */ - void **tx_queues; /**< Array of pointers to TX queues. */ - uint16_t nb_rx_queues; /**< Number of RX queues. */ - uint16_t nb_tx_queues; /**< Number of TX queues. */ - - struct rte_eth_dev_sriov sriov; /**< SRIOV data */ - - void *dev_private; /**< PMD-specific private data */ - - struct rte_eth_link dev_link; - /**< Link-level information & status */ - - struct rte_eth_conf dev_conf; /**< Configuration applied to device. */ - uint16_t mtu; /**< Maximum Transmission Unit. */ - - uint32_t min_rx_buf_size; - /**< Common rx buffer size handled by all queues */ - - uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */ - struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */ - uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR]; - /** bitmap array of associating Ethernet MAC addresses to pools */ - struct ether_addr* hash_mac_addrs; - /** Device Ethernet MAC addresses of hash filtering. */ - uint16_t port_id; /**< Device [external] port identifier. */ - __extension__ - uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */ - scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */ - all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */ - dev_started : 1, /**< Device state: STARTED(1) / STOPPED(0). */ - lro : 1; /**< RX LRO is ON(1) / OFF(0) */ - uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT]; - /** Queues state: STARTED(1) / STOPPED(0) */ - uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT]; - /** Queues state: STARTED(1) / STOPPED(0) */ - uint32_t dev_flags; /**< Capabilities */ - enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ - int numa_node; /**< NUMA node connection */ - struct rte_vlan_filter_conf vlan_filter_conf; - /**< VLAN filter configuration. */ - struct rte_eth_dev_owner owner; /**< The port owner. */ -} __rte_cache_aligned; - -/** - * @internal - * The pool of *rte_eth_dev* structures. The size of the pool - * is configured at compile-time in the file. - */ -extern struct rte_eth_dev rte_eth_devices[]; - -#endif /* _RTE_ETHDEV_CORE_H_ */ diff --git a/lib/librte_ether/rte_ethdev_driver.h b/lib/librte_ether/rte_ethdev_driver.h deleted file mode 100644 index 45f08c65..00000000 --- a/lib/librte_ether/rte_ethdev_driver.h +++ /dev/null @@ -1,132 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2017 Intel Corporation - */ - -#ifndef _RTE_ETHDEV_DRIVER_H_ -#define _RTE_ETHDEV_DRIVER_H_ - -/** - * @file - * - * RTE Ethernet Device PMD API - * - * These APIs for the use from Ethernet drivers, user applications shouldn't - * use them. - * - */ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @internal - * Returns a ethdev slot specified by the unique identifier name. - * - * @param name - * The pointer to the Unique identifier name for each Ethernet device - * @return - * - The pointer to the ethdev slot, on success. NULL on error - */ -struct rte_eth_dev *rte_eth_dev_allocated(const char *name); - -/** - * @internal - * Allocates a new ethdev slot for an ethernet device and returns the pointer - * to that slot for the driver to use. - * - * @param name Unique identifier name for each Ethernet device - * @param type Device type of this Ethernet device - * @return - * - Slot in the rte_dev_devices array for a new device; - */ -struct rte_eth_dev *rte_eth_dev_allocate(const char *name); - -/** - * @internal - * Attach to the ethdev already initialized by the primary - * process. - * - * @param name Ethernet device's name. - * @return - * - Success: Slot in the rte_dev_devices array for attached - * device. - * - Error: Null pointer. - */ -struct rte_eth_dev *rte_eth_dev_attach_secondary(const char *name); - -/** - * @internal - * Release the specified ethdev port. - * - * @param eth_dev - * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. - * @return - * - 0 on success, negative on error - */ -int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); - -/** - * @internal - * Release device queues and clear its configuration to force the user - * application to reconfigure it. It is for internal use only. - * - * @param dev - * Pointer to struct rte_eth_dev. - * - * @return - * void - */ -void _rte_eth_dev_reset(struct rte_eth_dev *dev); - -/** - * @internal Executes all the user application registered callbacks for - * the specific device. It is for DPDK internal user only. User - * application should not call it directly. - * - * @param dev - * Pointer to struct rte_eth_dev. - * @param event - * Eth device interrupt event type. - * @param ret_param - * To pass data back to user application. - * This allows the user application to decide if a particular function - * is permitted or not. - * - * @return - * int - */ -int _rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event, void *ret_param); - -/** - * Create memzone for HW rings. - * malloc can't be used as the physical address is needed. - * If the memzone is already created, then this function returns a ptr - * to the old one. - * - * @param eth_dev - * The *eth_dev* pointer is the address of the *rte_eth_dev* structure - * @param name - * The name of the memory zone - * @param queue_id - * The index of the queue to add to name - * @param size - * The sizeof of the memory area - * @param align - * Alignment for resulting memzone. Must be a power of 2. - * @param socket_id - * The *socket_id* argument is the socket identifier in case of NUMA. - */ -const struct rte_memzone * -rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name, - uint16_t queue_id, size_t size, - unsigned align, int socket_id); - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_ETHDEV_DRIVER_H_ */ diff --git a/lib/librte_ether/rte_ethdev_pci.h b/lib/librte_ether/rte_ethdev_pci.h deleted file mode 100644 index 897ce5b4..00000000 --- a/lib/librte_ether/rte_ethdev_pci.h +++ /dev/null @@ -1,196 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Brocade Communications Systems, Inc. - * Author: Jan Blunck - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_ETHDEV_PCI_H_ -#define _RTE_ETHDEV_PCI_H_ - -#include -#include -#include -#include -#include - -/** - * Copy pci device info to the Ethernet device data. - * - * @param eth_dev - * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. - * @param pci_dev - * The *pci_dev* pointer is the address of the *rte_pci_device* structure. - */ -static inline void -rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, - struct rte_pci_device *pci_dev) -{ - if ((eth_dev == NULL) || (pci_dev == NULL)) { - RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n", - eth_dev, pci_dev); - return; - } - - eth_dev->intr_handle = &pci_dev->intr_handle; - - eth_dev->data->dev_flags = 0; - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) - eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_RMV) - eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV; - - eth_dev->data->kdrv = pci_dev->kdrv; - eth_dev->data->numa_node = pci_dev->device.numa_node; -} - -/** - * @internal - * Allocates a new ethdev slot for an ethernet device and returns the pointer - * to that slot for the driver to use. - * - * @param dev - * Pointer to the PCI device - * - * @param private_data_size - * Size of private data structure - * - * @return - * A pointer to a rte_eth_dev or NULL if allocation failed. - */ -static inline struct rte_eth_dev * -rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size) -{ - struct rte_eth_dev *eth_dev; - const char *name; - - if (!dev) - return NULL; - - name = dev->device.name; - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - eth_dev = rte_eth_dev_allocate(name); - if (!eth_dev) - return NULL; - - if (private_data_size) { - eth_dev->data->dev_private = rte_zmalloc_socket(name, - private_data_size, RTE_CACHE_LINE_SIZE, - dev->device.numa_node); - if (!eth_dev->data->dev_private) { - rte_eth_dev_release_port(eth_dev); - return NULL; - } - } - } else { - eth_dev = rte_eth_dev_attach_secondary(name); - if (!eth_dev) - return NULL; - } - - eth_dev->device = &dev->device; - rte_eth_copy_pci_info(eth_dev, dev); - return eth_dev; -} - -static inline void -rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev) -{ - /* free ether device */ - rte_eth_dev_release_port(eth_dev); - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - rte_free(eth_dev->data->dev_private); - - eth_dev->data->dev_private = NULL; - - /* - * Secondary process will check the name to attach. - * Clear this field to avoid attaching a released ports. - */ - eth_dev->data->name[0] = '\0'; - - eth_dev->device = NULL; - eth_dev->intr_handle = NULL; -} - -typedef int (*eth_dev_pci_callback_t)(struct rte_eth_dev *eth_dev); - -/** - * @internal - * Wrapper for use by pci drivers in a .probe function to attach to a ethdev - * interface. - */ -static inline int -rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev, - size_t private_data_size, eth_dev_pci_callback_t dev_init) -{ - struct rte_eth_dev *eth_dev; - int ret; - - eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size); - if (!eth_dev) - return -ENOMEM; - - RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL); - ret = dev_init(eth_dev); - if (ret) - rte_eth_dev_pci_release(eth_dev); - - return ret; -} - -/** - * @internal - * Wrapper for use by pci drivers in a .remove function to detach a ethdev - * interface. - */ -static inline int -rte_eth_dev_pci_generic_remove(struct rte_pci_device *pci_dev, - eth_dev_pci_callback_t dev_uninit) -{ - struct rte_eth_dev *eth_dev; - int ret; - - eth_dev = rte_eth_dev_allocated(pci_dev->device.name); - if (!eth_dev) - return -ENODEV; - - if (dev_uninit) { - ret = dev_uninit(eth_dev); - if (ret) - return ret; - } - - rte_eth_dev_pci_release(eth_dev); - return 0; -} - -#endif /* _RTE_ETHDEV_PCI_H_ */ diff --git a/lib/librte_ether/rte_ethdev_vdev.h b/lib/librte_ether/rte_ethdev_vdev.h deleted file mode 100644 index 259feda3..00000000 --- a/lib/librte_ether/rte_ethdev_vdev.h +++ /dev/null @@ -1,84 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Brocade Communications Systems, Inc. - * Author: Jan Blunck - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_ETHDEV_VDEV_H_ -#define _RTE_ETHDEV_VDEV_H_ - -#include -#include -#include -#include - -/** - * @internal - * Allocates a new ethdev slot for an ethernet device and returns the pointer - * to that slot for the driver to use. - * - * @param dev - * Pointer to virtual device - * - * @param private_data_size - * Size of private data structure - * - * @return - * A pointer to a rte_eth_dev or NULL if allocation failed. - */ -static inline struct rte_eth_dev * -rte_eth_vdev_allocate(struct rte_vdev_device *dev, size_t private_data_size) -{ - struct rte_eth_dev *eth_dev; - const char *name = rte_vdev_device_name(dev); - - eth_dev = rte_eth_dev_allocate(name); - if (!eth_dev) - return NULL; - - if (private_data_size) { - eth_dev->data->dev_private = rte_zmalloc_socket(name, - private_data_size, RTE_CACHE_LINE_SIZE, - dev->device.numa_node); - if (!eth_dev->data->dev_private) { - rte_eth_dev_release_port(eth_dev); - return NULL; - } - } - - eth_dev->device = &dev->device; - eth_dev->intr_handle = NULL; - - eth_dev->data->kdrv = RTE_KDRV_NONE; - eth_dev->data->numa_node = dev->device.numa_node; - return eth_dev; -} - -#endif /* _RTE_ETHDEV_VDEV_H_ */ diff --git a/lib/librte_ether/rte_ethdev_version.map b/lib/librte_ether/rte_ethdev_version.map deleted file mode 100644 index 87f02fb7..00000000 --- a/lib/librte_ether/rte_ethdev_version.map +++ /dev/null @@ -1,232 +0,0 @@ -DPDK_2.2 { - global: - - rte_eth_add_rx_callback; - rte_eth_add_tx_callback; - rte_eth_allmulticast_disable; - rte_eth_allmulticast_enable; - rte_eth_allmulticast_get; - rte_eth_dev_allocate; - rte_eth_dev_allocated; - rte_eth_dev_attach; - rte_eth_dev_callback_register; - rte_eth_dev_callback_unregister; - rte_eth_dev_close; - rte_eth_dev_configure; - rte_eth_dev_count; - rte_eth_dev_default_mac_addr_set; - rte_eth_dev_detach; - rte_eth_dev_filter_ctrl; - rte_eth_dev_filter_supported; - rte_eth_dev_flow_ctrl_get; - rte_eth_dev_flow_ctrl_set; - rte_eth_dev_get_dcb_info; - rte_eth_dev_get_eeprom; - rte_eth_dev_get_eeprom_length; - rte_eth_dev_get_mtu; - rte_eth_dev_get_reg_info; - rte_eth_dev_get_vlan_offload; - rte_eth_devices; - rte_eth_dev_info_get; - rte_eth_dev_is_valid_port; - rte_eth_dev_mac_addr_add; - rte_eth_dev_mac_addr_remove; - rte_eth_dev_priority_flow_ctrl_set; - rte_eth_dev_release_port; - rte_eth_dev_rss_hash_conf_get; - rte_eth_dev_rss_hash_update; - rte_eth_dev_rss_reta_query; - rte_eth_dev_rss_reta_update; - rte_eth_dev_rx_intr_ctl; - rte_eth_dev_rx_intr_ctl_q; - rte_eth_dev_rx_intr_disable; - rte_eth_dev_rx_intr_enable; - rte_eth_dev_rx_queue_start; - rte_eth_dev_rx_queue_stop; - rte_eth_dev_set_eeprom; - rte_eth_dev_set_link_down; - rte_eth_dev_set_link_up; - rte_eth_dev_set_mc_addr_list; - rte_eth_dev_set_mtu; - rte_eth_dev_set_rx_queue_stats_mapping; - rte_eth_dev_set_tx_queue_stats_mapping; - rte_eth_dev_set_vlan_offload; - rte_eth_dev_set_vlan_pvid; - rte_eth_dev_set_vlan_strip_on_queue; - rte_eth_dev_socket_id; - rte_eth_dev_start; - rte_eth_dev_stop; - rte_eth_dev_tx_queue_start; - rte_eth_dev_tx_queue_stop; - rte_eth_dev_uc_all_hash_table_set; - rte_eth_dev_uc_hash_table_set; - rte_eth_dev_vlan_filter; - rte_eth_dma_zone_reserve; - rte_eth_led_off; - rte_eth_led_on; - rte_eth_link; - rte_eth_link_get; - rte_eth_link_get_nowait; - rte_eth_macaddr_get; - rte_eth_mirror_rule_reset; - rte_eth_mirror_rule_set; - rte_eth_promiscuous_disable; - rte_eth_promiscuous_enable; - rte_eth_promiscuous_get; - rte_eth_remove_rx_callback; - rte_eth_remove_tx_callback; - rte_eth_rx_queue_info_get; - rte_eth_rx_queue_setup; - rte_eth_set_queue_rate_limit; - rte_eth_stats; - rte_eth_stats_get; - rte_eth_stats_reset; - rte_eth_timesync_adjust_time; - rte_eth_timesync_disable; - rte_eth_timesync_enable; - rte_eth_timesync_read_rx_timestamp; - rte_eth_timesync_read_time; - rte_eth_timesync_read_tx_timestamp; - rte_eth_timesync_write_time; - rte_eth_tx_queue_info_get; - rte_eth_tx_queue_setup; - rte_eth_xstats_get; - rte_eth_xstats_reset; - - local: *; -}; - -DPDK_16.04 { - global: - - rte_eth_dev_get_supported_ptypes; - rte_eth_dev_l2_tunnel_eth_type_conf; - rte_eth_dev_l2_tunnel_offload_set; - rte_eth_dev_set_vlan_ether_type; - rte_eth_dev_udp_tunnel_port_add; - rte_eth_dev_udp_tunnel_port_delete; - rte_eth_speed_bitflag; - rte_eth_tx_buffer_count_callback; - rte_eth_tx_buffer_drop_callback; - rte_eth_tx_buffer_init; - rte_eth_tx_buffer_set_err_callback; - -} DPDK_2.2; - -DPDK_16.07 { - global: - - rte_eth_add_first_rx_callback; - rte_eth_dev_get_name_by_port; - rte_eth_dev_get_port_by_name; - rte_eth_xstats_get_names; - -} DPDK_16.04; - -DPDK_17.02 { - global: - - _rte_eth_dev_reset; - rte_eth_dev_fw_version_get; - rte_flow_create; - rte_flow_destroy; - rte_flow_flush; - rte_flow_query; - rte_flow_validate; - -} DPDK_16.07; - -DPDK_17.05 { - global: - - rte_eth_dev_attach_secondary; - rte_eth_find_next; - rte_eth_tx_done_cleanup; - rte_eth_xstats_get_by_id; - rte_eth_xstats_get_id_by_name; - rte_eth_xstats_get_names_by_id; - -} DPDK_17.02; - -DPDK_17.08 { - global: - - _rte_eth_dev_callback_process; - rte_eth_dev_adjust_nb_rx_tx_desc; - rte_flow_copy; - rte_flow_isolate; - rte_tm_capabilities_get; - rte_tm_hierarchy_commit; - rte_tm_level_capabilities_get; - rte_tm_mark_ip_dscp; - rte_tm_mark_ip_ecn; - rte_tm_mark_vlan_dei; - rte_tm_node_add; - rte_tm_node_capabilities_get; - rte_tm_node_cman_update; - rte_tm_node_delete; - rte_tm_node_parent_update; - rte_tm_node_resume; - rte_tm_node_shaper_update; - rte_tm_node_shared_shaper_update; - rte_tm_node_shared_wred_context_update; - rte_tm_node_stats_read; - rte_tm_node_stats_update; - rte_tm_node_suspend; - rte_tm_node_type_get; - rte_tm_node_wfq_weight_mode_update; - rte_tm_node_wred_context_update; - rte_tm_shaper_profile_add; - rte_tm_shaper_profile_delete; - rte_tm_shared_shaper_add_update; - rte_tm_shared_shaper_delete; - rte_tm_shared_wred_context_add_update; - rte_tm_shared_wred_context_delete; - rte_tm_wred_profile_add; - rte_tm_wred_profile_delete; - -} DPDK_17.05; - -DPDK_17.11 { - global: - - rte_eth_dev_get_sec_ctx; - rte_eth_dev_pool_ops_supported; - rte_eth_dev_reset; - rte_flow_error_set; - -} DPDK_17.08; - -DPDK_18.02 { - global: - - rte_eth_dev_filter_ctrl; - -} DPDK_17.11; - -EXPERIMENTAL { - global: - - rte_eth_dev_is_removed; - rte_eth_dev_owner_delete; - rte_eth_dev_owner_get; - rte_eth_dev_owner_new; - rte_eth_dev_owner_set; - rte_eth_dev_owner_unset; - rte_eth_dev_rx_offload_name; - rte_eth_dev_tx_offload_name; - rte_eth_find_next_owned_by; - rte_mtr_capabilities_get; - rte_mtr_create; - rte_mtr_destroy; - rte_mtr_meter_disable; - rte_mtr_meter_dscp_table_update; - rte_mtr_meter_enable; - rte_mtr_meter_profile_add; - rte_mtr_meter_profile_delete; - rte_mtr_meter_profile_update; - rte_mtr_policer_actions_update; - rte_mtr_stats_read; - rte_mtr_stats_update; - -} DPDK_17.11; diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c deleted file mode 100644 index 38f2d27b..00000000 --- a/lib/librte_ether/rte_flow.c +++ /dev/null @@ -1,420 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2016 6WIND S.A. - * Copyright 2016 Mellanox. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include "rte_ethdev.h" -#include "rte_flow_driver.h" -#include "rte_flow.h" - -/** - * Flow elements description tables. - */ -struct rte_flow_desc_data { - const char *name; - size_t size; -}; - -/** Generate flow_item[] entry. */ -#define MK_FLOW_ITEM(t, s) \ - [RTE_FLOW_ITEM_TYPE_ ## t] = { \ - .name = # t, \ - .size = s, \ - } - -/** Information about known flow pattern items. */ -static const struct rte_flow_desc_data rte_flow_desc_item[] = { - MK_FLOW_ITEM(END, 0), - MK_FLOW_ITEM(VOID, 0), - MK_FLOW_ITEM(INVERT, 0), - MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)), - MK_FLOW_ITEM(PF, 0), - MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)), - MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)), - MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), /* +pattern[] */ - MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)), - MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)), - MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)), - MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)), - MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)), - MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)), - MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)), - MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)), - MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)), - MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)), - MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)), - MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)), - MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)), - MK_FLOW_ITEM(GENEVE, sizeof(struct rte_flow_item_geneve)), -}; - -/** Generate flow_action[] entry. */ -#define MK_FLOW_ACTION(t, s) \ - [RTE_FLOW_ACTION_TYPE_ ## t] = { \ - .name = # t, \ - .size = s, \ - } - -/** Information about known flow actions. */ -static const struct rte_flow_desc_data rte_flow_desc_action[] = { - MK_FLOW_ACTION(END, 0), - MK_FLOW_ACTION(VOID, 0), - MK_FLOW_ACTION(PASSTHRU, 0), - MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)), - MK_FLOW_ACTION(FLAG, 0), - MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)), - MK_FLOW_ACTION(DROP, 0), - MK_FLOW_ACTION(COUNT, 0), - MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)), - MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */ - MK_FLOW_ACTION(PF, 0), - MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)), -}; - -static int -flow_err(uint16_t port_id, int ret, struct rte_flow_error *error) -{ - if (ret == 0) - return 0; - if (rte_eth_dev_is_removed(port_id)) - return rte_flow_error_set(error, EIO, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(EIO)); - return ret; -} - -/* Get generic flow operations structure from a port. */ -const struct rte_flow_ops * -rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - const struct rte_flow_ops *ops; - int code; - - if (unlikely(!rte_eth_dev_is_valid_port(port_id))) - code = ENODEV; - else if (unlikely(!dev->dev_ops->filter_ctrl || - dev->dev_ops->filter_ctrl(dev, - RTE_ETH_FILTER_GENERIC, - RTE_ETH_FILTER_GET, - &ops) || - !ops)) - code = ENOSYS; - else - return ops; - rte_flow_error_set(error, code, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(code)); - return NULL; -} - -/* Check whether a flow rule can be created on a given port. */ -int -rte_flow_validate(uint16_t port_id, - const struct rte_flow_attr *attr, - const struct rte_flow_item pattern[], - const struct rte_flow_action actions[], - struct rte_flow_error *error) -{ - const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - - if (unlikely(!ops)) - return -rte_errno; - if (likely(!!ops->validate)) - return flow_err(port_id, ops->validate(dev, attr, pattern, - actions, error), error); - return rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); -} - -/* Create a flow rule on a given port. */ -struct rte_flow * -rte_flow_create(uint16_t port_id, - const struct rte_flow_attr *attr, - const struct rte_flow_item pattern[], - const struct rte_flow_action actions[], - struct rte_flow_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - struct rte_flow *flow; - const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); - - if (unlikely(!ops)) - return NULL; - if (likely(!!ops->create)) { - flow = ops->create(dev, attr, pattern, actions, error); - if (flow == NULL) - flow_err(port_id, -rte_errno, error); - return flow; - } - rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); - return NULL; -} - -/* Destroy a flow rule on a given port. */ -int -rte_flow_destroy(uint16_t port_id, - struct rte_flow *flow, - struct rte_flow_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); - - if (unlikely(!ops)) - return -rte_errno; - if (likely(!!ops->destroy)) - return flow_err(port_id, ops->destroy(dev, flow, error), - error); - return rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); -} - -/* Destroy all flow rules associated with a port. */ -int -rte_flow_flush(uint16_t port_id, - struct rte_flow_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); - - if (unlikely(!ops)) - return -rte_errno; - if (likely(!!ops->flush)) - return flow_err(port_id, ops->flush(dev, error), error); - return rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); -} - -/* Query an existing flow rule. */ -int -rte_flow_query(uint16_t port_id, - struct rte_flow *flow, - enum rte_flow_action_type action, - void *data, - struct rte_flow_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); - - if (!ops) - return -rte_errno; - if (likely(!!ops->query)) - return flow_err(port_id, ops->query(dev, flow, action, data, - error), error); - return rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); -} - -/* Restrict ingress traffic to the defined flow rules. */ -int -rte_flow_isolate(uint16_t port_id, - int set, - struct rte_flow_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); - - if (!ops) - return -rte_errno; - if (likely(!!ops->isolate)) - return flow_err(port_id, ops->isolate(dev, set, error), error); - return rte_flow_error_set(error, ENOSYS, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, rte_strerror(ENOSYS)); -} - -/* Initialize flow error structure. */ -int -rte_flow_error_set(struct rte_flow_error *error, - int code, - enum rte_flow_error_type type, - const void *cause, - const char *message) -{ - if (error) { - *error = (struct rte_flow_error){ - .type = type, - .cause = cause, - .message = message, - }; - } - rte_errno = code; - return -code; -} - -/** Compute storage space needed by item specification. */ -static void -flow_item_spec_size(const struct rte_flow_item *item, - size_t *size, size_t *pad) -{ - if (!item->spec) { - *size = 0; - goto empty; - } - switch (item->type) { - union { - const struct rte_flow_item_raw *raw; - } spec; - - /* Not a fall-through */ - case RTE_FLOW_ITEM_TYPE_RAW: - spec.raw = item->spec; - *size = offsetof(struct rte_flow_item_raw, pattern) + - spec.raw->length * sizeof(*spec.raw->pattern); - break; - default: - *size = rte_flow_desc_item[item->type].size; - break; - } -empty: - *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size; -} - -/** Compute storage space needed by action configuration. */ -static void -flow_action_conf_size(const struct rte_flow_action *action, - size_t *size, size_t *pad) -{ - if (!action->conf) { - *size = 0; - goto empty; - } - switch (action->type) { - union { - const struct rte_flow_action_rss *rss; - } conf; - - /* Not a fall-through. */ - case RTE_FLOW_ACTION_TYPE_RSS: - conf.rss = action->conf; - *size = offsetof(struct rte_flow_action_rss, queue) + - conf.rss->num * sizeof(*conf.rss->queue); - break; - default: - *size = rte_flow_desc_action[action->type].size; - break; - } -empty: - *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size; -} - -/** Store a full rte_flow description. */ -size_t -rte_flow_copy(struct rte_flow_desc *desc, size_t len, - const struct rte_flow_attr *attr, - const struct rte_flow_item *items, - const struct rte_flow_action *actions) -{ - struct rte_flow_desc *fd = NULL; - size_t tmp; - size_t pad; - size_t off1 = 0; - size_t off2 = 0; - size_t size = 0; - -store: - if (items) { - const struct rte_flow_item *item; - - item = items; - if (fd) - fd->items = (void *)&fd->data[off1]; - do { - struct rte_flow_item *dst = NULL; - - if ((size_t)item->type >= - RTE_DIM(rte_flow_desc_item) || - !rte_flow_desc_item[item->type].name) { - rte_errno = ENOTSUP; - return 0; - } - if (fd) - dst = memcpy(fd->data + off1, item, - sizeof(*item)); - off1 += sizeof(*item); - flow_item_spec_size(item, &tmp, &pad); - if (item->spec) { - if (fd) - dst->spec = memcpy(fd->data + off2, - item->spec, tmp); - off2 += tmp + pad; - } - if (item->last) { - if (fd) - dst->last = memcpy(fd->data + off2, - item->last, tmp); - off2 += tmp + pad; - } - if (item->mask) { - if (fd) - dst->mask = memcpy(fd->data + off2, - item->mask, tmp); - off2 += tmp + pad; - } - off2 = RTE_ALIGN_CEIL(off2, sizeof(double)); - } while ((item++)->type != RTE_FLOW_ITEM_TYPE_END); - off1 = RTE_ALIGN_CEIL(off1, sizeof(double)); - } - if (actions) { - const struct rte_flow_action *action; - - action = actions; - if (fd) - fd->actions = (void *)&fd->data[off1]; - do { - struct rte_flow_action *dst = NULL; - - if ((size_t)action->type >= - RTE_DIM(rte_flow_desc_action) || - !rte_flow_desc_action[action->type].name) { - rte_errno = ENOTSUP; - return 0; - } - if (fd) - dst = memcpy(fd->data + off1, action, - sizeof(*action)); - off1 += sizeof(*action); - flow_action_conf_size(action, &tmp, &pad); - if (action->conf) { - if (fd) - dst->conf = memcpy(fd->data + off2, - action->conf, tmp); - off2 += tmp + pad; - } - off2 = RTE_ALIGN_CEIL(off2, sizeof(double)); - } while ((action++)->type != RTE_FLOW_ACTION_TYPE_END); - } - if (fd != NULL) - return size; - off1 = RTE_ALIGN_CEIL(off1, sizeof(double)); - tmp = RTE_ALIGN_CEIL(offsetof(struct rte_flow_desc, data), - sizeof(double)); - size = tmp + off1 + off2; - if (size > len) - return size; - fd = desc; - if (fd != NULL) { - *fd = (const struct rte_flow_desc) { - .size = size, - .attr = *attr, - }; - tmp -= offsetof(struct rte_flow_desc, data); - off2 = tmp + off1; - off1 = tmp; - goto store; - } - return 0; -} diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h deleted file mode 100644 index 13e42021..00000000 --- a/lib/librte_ether/rte_flow.h +++ /dev/null @@ -1,1483 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2016 6WIND S.A. - * Copyright 2016 Mellanox. - */ - -#ifndef RTE_FLOW_H_ -#define RTE_FLOW_H_ - -/** - * @file - * RTE generic flow API - * - * This interface provides the ability to program packet matching and - * associated actions in hardware through flow rules. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Flow rule attributes. - * - * Priorities are set on two levels: per group and per rule within groups. - * - * Lower values denote higher priority, the highest priority for both levels - * is 0, so that a rule with priority 0 in group 8 is always matched after a - * rule with priority 8 in group 0. - * - * Although optional, applications are encouraged to group similar rules as - * much as possible to fully take advantage of hardware capabilities - * (e.g. optimized matching) and work around limitations (e.g. a single - * pattern type possibly allowed in a given group). - * - * Group and priority levels are arbitrary and up to the application, they - * do not need to be contiguous nor start from 0, however the maximum number - * varies between devices and may be affected by existing flow rules. - * - * If a packet is matched by several rules of a given group for a given - * priority level, the outcome is undefined. It can take any path, may be - * duplicated or even cause unrecoverable errors. - * - * Note that support for more than a single group and priority level is not - * guaranteed. - * - * Flow rules can apply to inbound and/or outbound traffic (ingress/egress). - * - * Several pattern items and actions are valid and can be used in both - * directions. Those valid for only one direction are described as such. - * - * At least one direction must be specified. - * - * Specifying both directions at once for a given rule is not recommended - * but may be valid in a few cases (e.g. shared counter). - */ -struct rte_flow_attr { - uint32_t group; /**< Priority group. */ - uint32_t priority; /**< Priority level within group. */ - uint32_t ingress:1; /**< Rule applies to ingress traffic. */ - uint32_t egress:1; /**< Rule applies to egress traffic. */ - uint32_t reserved:30; /**< Reserved, must be zero. */ -}; - -/** - * Matching pattern item types. - * - * Pattern items fall in two categories: - * - * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4, - * IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a - * specification structure. These must be stacked in the same order as the - * protocol layers to match, starting from the lowest. - * - * - Matching meta-data or affecting pattern processing (END, VOID, INVERT, - * PF, VF, PORT and so on), often without a specification structure. Since - * they do not match packet contents, these can be specified anywhere - * within item lists without affecting others. - * - * See the description of individual types for more information. Those - * marked with [META] fall into the second category. - */ -enum rte_flow_item_type { - /** - * [META] - * - * End marker for item lists. Prevents further processing of items, - * thereby ending the pattern. - * - * No associated specification structure. - */ - RTE_FLOW_ITEM_TYPE_END, - - /** - * [META] - * - * Used as a placeholder for convenience. It is ignored and simply - * discarded by PMDs. - * - * No associated specification structure. - */ - RTE_FLOW_ITEM_TYPE_VOID, - - /** - * [META] - * - * Inverted matching, i.e. process packets that do not match the - * pattern. - * - * No associated specification structure. - */ - RTE_FLOW_ITEM_TYPE_INVERT, - - /** - * Matches any protocol in place of the current layer, a single ANY - * may also stand for several protocol layers. - * - * See struct rte_flow_item_any. - */ - RTE_FLOW_ITEM_TYPE_ANY, - - /** - * [META] - * - * Matches packets addressed to the physical function of the device. - * - * If the underlying device function differs from the one that would - * normally receive the matched traffic, specifying this item - * prevents it from reaching that device unless the flow rule - * contains a PF action. Packets are not duplicated between device - * instances by default. - * - * No associated specification structure. - */ - RTE_FLOW_ITEM_TYPE_PF, - - /** - * [META] - * - * Matches packets addressed to a virtual function ID of the device. - * - * If the underlying device function differs from the one that would - * normally receive the matched traffic, specifying this item - * prevents it from reaching that device unless the flow rule - * contains a VF action. Packets are not duplicated between device - * instances by default. - * - * See struct rte_flow_item_vf. - */ - RTE_FLOW_ITEM_TYPE_VF, - - /** - * [META] - * - * Matches packets coming from the specified physical port of the - * underlying device. - * - * The first PORT item overrides the physical port normally - * associated with the specified DPDK input port (port_id). This - * item can be provided several times to match additional physical - * ports. - * - * See struct rte_flow_item_port. - */ - RTE_FLOW_ITEM_TYPE_PORT, - - /** - * Matches a byte string of a given length at a given offset. - * - * See struct rte_flow_item_raw. - */ - RTE_FLOW_ITEM_TYPE_RAW, - - /** - * Matches an Ethernet header. - * - * See struct rte_flow_item_eth. - */ - RTE_FLOW_ITEM_TYPE_ETH, - - /** - * Matches an 802.1Q/ad VLAN tag. - * - * See struct rte_flow_item_vlan. - */ - RTE_FLOW_ITEM_TYPE_VLAN, - - /** - * Matches an IPv4 header. - * - * See struct rte_flow_item_ipv4. - */ - RTE_FLOW_ITEM_TYPE_IPV4, - - /** - * Matches an IPv6 header. - * - * See struct rte_flow_item_ipv6. - */ - RTE_FLOW_ITEM_TYPE_IPV6, - - /** - * Matches an ICMP header. - * - * See struct rte_flow_item_icmp. - */ - RTE_FLOW_ITEM_TYPE_ICMP, - - /** - * Matches a UDP header. - * - * See struct rte_flow_item_udp. - */ - RTE_FLOW_ITEM_TYPE_UDP, - - /** - * Matches a TCP header. - * - * See struct rte_flow_item_tcp. - */ - RTE_FLOW_ITEM_TYPE_TCP, - - /** - * Matches a SCTP header. - * - * See struct rte_flow_item_sctp. - */ - RTE_FLOW_ITEM_TYPE_SCTP, - - /** - * Matches a VXLAN header. - * - * See struct rte_flow_item_vxlan. - */ - RTE_FLOW_ITEM_TYPE_VXLAN, - - /** - * Matches a E_TAG header. - * - * See struct rte_flow_item_e_tag. - */ - RTE_FLOW_ITEM_TYPE_E_TAG, - - /** - * Matches a NVGRE header. - * - * See struct rte_flow_item_nvgre. - */ - RTE_FLOW_ITEM_TYPE_NVGRE, - - /** - * Matches a MPLS header. - * - * See struct rte_flow_item_mpls. - */ - RTE_FLOW_ITEM_TYPE_MPLS, - - /** - * Matches a GRE header. - * - * See struct rte_flow_item_gre. - */ - RTE_FLOW_ITEM_TYPE_GRE, - - /** - * [META] - * - * Fuzzy pattern match, expect faster than default. - * - * This is for device that support fuzzy matching option. - * Usually a fuzzy matching is fast but the cost is accuracy. - * - * See struct rte_flow_item_fuzzy. - */ - RTE_FLOW_ITEM_TYPE_FUZZY, - - /** - * Matches a GTP header. - * - * Configure flow for GTP packets. - * - * See struct rte_flow_item_gtp. - */ - RTE_FLOW_ITEM_TYPE_GTP, - - /** - * Matches a GTP header. - * - * Configure flow for GTP-C packets. - * - * See struct rte_flow_item_gtp. - */ - RTE_FLOW_ITEM_TYPE_GTPC, - - /** - * Matches a GTP header. - * - * Configure flow for GTP-U packets. - * - * See struct rte_flow_item_gtp. - */ - RTE_FLOW_ITEM_TYPE_GTPU, - - /** - * Matches a ESP header. - * - * See struct rte_flow_item_esp. - */ - RTE_FLOW_ITEM_TYPE_ESP, - - /** - * Matches a GENEVE header. - * - * See struct rte_flow_item_geneve. - */ - RTE_FLOW_ITEM_TYPE_GENEVE, -}; - -/** - * RTE_FLOW_ITEM_TYPE_ANY - * - * Matches any protocol in place of the current layer, a single ANY may also - * stand for several protocol layers. - * - * This is usually specified as the first pattern item when looking for a - * protocol anywhere in a packet. - * - * A zeroed mask stands for any number of layers. - */ -struct rte_flow_item_any { - uint32_t num; /**< Number of layers covered. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_ANY. */ -#ifndef __cplusplus -static const struct rte_flow_item_any rte_flow_item_any_mask = { - .num = 0x00000000, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_VF - * - * Matches packets addressed to a virtual function ID of the device. - * - * If the underlying device function differs from the one that would - * normally receive the matched traffic, specifying this item prevents it - * from reaching that device unless the flow rule contains a VF - * action. Packets are not duplicated between device instances by default. - * - * - Likely to return an error or never match any traffic if this causes a - * VF device to match traffic addressed to a different VF. - * - Can be specified multiple times to match traffic addressed to several - * VF IDs. - * - Can be combined with a PF item to match both PF and VF traffic. - * - * A zeroed mask can be used to match any VF ID. - */ -struct rte_flow_item_vf { - uint32_t id; /**< Destination VF ID. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_VF. */ -#ifndef __cplusplus -static const struct rte_flow_item_vf rte_flow_item_vf_mask = { - .id = 0x00000000, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_PORT - * - * Matches packets coming from the specified physical port of the underlying - * device. - * - * The first PORT item overrides the physical port normally associated with - * the specified DPDK input port (port_id). This item can be provided - * several times to match additional physical ports. - * - * Note that physical ports are not necessarily tied to DPDK input ports - * (port_id) when those are not under DPDK control. Possible values are - * specific to each device, they are not necessarily indexed from zero and - * may not be contiguous. - * - * As a device property, the list of allowed values as well as the value - * associated with a port_id should be retrieved by other means. - * - * A zeroed mask can be used to match any port index. - */ -struct rte_flow_item_port { - uint32_t index; /**< Physical port index. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_PORT. */ -#ifndef __cplusplus -static const struct rte_flow_item_port rte_flow_item_port_mask = { - .index = 0x00000000, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_RAW - * - * Matches a byte string of a given length at a given offset. - * - * Offset is either absolute (using the start of the packet) or relative to - * the end of the previous matched item in the stack, in which case negative - * values are allowed. - * - * If search is enabled, offset is used as the starting point. The search - * area can be delimited by setting limit to a nonzero value, which is the - * maximum number of bytes after offset where the pattern may start. - * - * Matching a zero-length pattern is allowed, doing so resets the relative - * offset for subsequent items. - * - * This type does not support ranges (struct rte_flow_item.last). - */ -struct rte_flow_item_raw { - uint32_t relative:1; /**< Look for pattern after the previous item. */ - uint32_t search:1; /**< Search pattern from offset (see also limit). */ - uint32_t reserved:30; /**< Reserved, must be set to zero. */ - int32_t offset; /**< Absolute or relative offset for pattern. */ - uint16_t limit; /**< Search area limit for start of pattern. */ - uint16_t length; /**< Pattern length. */ - uint8_t pattern[]; /**< Byte string to look for. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_RAW. */ -#ifndef __cplusplus -static const struct rte_flow_item_raw rte_flow_item_raw_mask = { - .relative = 1, - .search = 1, - .reserved = 0x3fffffff, - .offset = 0xffffffff, - .limit = 0xffff, - .length = 0xffff, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_ETH - * - * Matches an Ethernet header. - */ -struct rte_flow_item_eth { - struct ether_addr dst; /**< Destination MAC. */ - struct ether_addr src; /**< Source MAC. */ - rte_be16_t type; /**< EtherType. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */ -#ifndef __cplusplus -static const struct rte_flow_item_eth rte_flow_item_eth_mask = { - .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", - .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", - .type = RTE_BE16(0x0000), -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_VLAN - * - * Matches an 802.1Q/ad VLAN tag. - * - * This type normally follows either RTE_FLOW_ITEM_TYPE_ETH or - * RTE_FLOW_ITEM_TYPE_VLAN. - */ -struct rte_flow_item_vlan { - rte_be16_t tpid; /**< Tag protocol identifier. */ - rte_be16_t tci; /**< Tag control information. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */ -#ifndef __cplusplus -static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = { - .tpid = RTE_BE16(0x0000), - .tci = RTE_BE16(0xffff), -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_IPV4 - * - * Matches an IPv4 header. - * - * Note: IPv4 options are handled by dedicated pattern items. - */ -struct rte_flow_item_ipv4 { - struct ipv4_hdr hdr; /**< IPv4 header definition. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_IPV4. */ -#ifndef __cplusplus -static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = { - .hdr = { - .src_addr = RTE_BE32(0xffffffff), - .dst_addr = RTE_BE32(0xffffffff), - }, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_IPV6. - * - * Matches an IPv6 header. - * - * Note: IPv6 options are handled by dedicated pattern items. - */ -struct rte_flow_item_ipv6 { - struct ipv6_hdr hdr; /**< IPv6 header definition. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6. */ -#ifndef __cplusplus -static const struct rte_flow_item_ipv6 rte_flow_item_ipv6_mask = { - .hdr = { - .src_addr = - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff", - .dst_addr = - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff", - }, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_ICMP. - * - * Matches an ICMP header. - */ -struct rte_flow_item_icmp { - struct icmp_hdr hdr; /**< ICMP header definition. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP. */ -#ifndef __cplusplus -static const struct rte_flow_item_icmp rte_flow_item_icmp_mask = { - .hdr = { - .icmp_type = 0xff, - .icmp_code = 0xff, - }, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_UDP. - * - * Matches a UDP header. - */ -struct rte_flow_item_udp { - struct udp_hdr hdr; /**< UDP header definition. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_UDP. */ -#ifndef __cplusplus -static const struct rte_flow_item_udp rte_flow_item_udp_mask = { - .hdr = { - .src_port = RTE_BE16(0xffff), - .dst_port = RTE_BE16(0xffff), - }, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_TCP. - * - * Matches a TCP header. - */ -struct rte_flow_item_tcp { - struct tcp_hdr hdr; /**< TCP header definition. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_TCP. */ -#ifndef __cplusplus -static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = { - .hdr = { - .src_port = RTE_BE16(0xffff), - .dst_port = RTE_BE16(0xffff), - }, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_SCTP. - * - * Matches a SCTP header. - */ -struct rte_flow_item_sctp { - struct sctp_hdr hdr; /**< SCTP header definition. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_SCTP. */ -#ifndef __cplusplus -static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = { - .hdr = { - .src_port = RTE_BE16(0xffff), - .dst_port = RTE_BE16(0xffff), - }, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_VXLAN. - * - * Matches a VXLAN header (RFC 7348). - */ -struct rte_flow_item_vxlan { - uint8_t flags; /**< Normally 0x08 (I flag). */ - uint8_t rsvd0[3]; /**< Reserved, normally 0x000000. */ - uint8_t vni[3]; /**< VXLAN identifier. */ - uint8_t rsvd1; /**< Reserved, normally 0x00. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN. */ -#ifndef __cplusplus -static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = { - .vni = "\xff\xff\xff", -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_E_TAG. - * - * Matches a E-tag header. - */ -struct rte_flow_item_e_tag { - rte_be16_t tpid; /**< Tag protocol identifier (0x893F). */ - /** - * E-Tag control information (E-TCI). - * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b). - */ - rte_be16_t epcp_edei_in_ecid_b; - /** Reserved (2b), GRP (2b), E-CID base (12b). */ - rte_be16_t rsvd_grp_ecid_b; - uint8_t in_ecid_e; /**< Ingress E-CID ext. */ - uint8_t ecid_e; /**< E-CID ext. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_E_TAG. */ -#ifndef __cplusplus -static const struct rte_flow_item_e_tag rte_flow_item_e_tag_mask = { - .rsvd_grp_ecid_b = RTE_BE16(0x3fff), -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_NVGRE. - * - * Matches a NVGRE header. - */ -struct rte_flow_item_nvgre { - /** - * Checksum (1b), undefined (1b), key bit (1b), sequence number (1b), - * reserved 0 (9b), version (3b). - * - * c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637. - */ - rte_be16_t c_k_s_rsvd0_ver; - rte_be16_t protocol; /**< Protocol type (0x6558). */ - uint8_t tni[3]; /**< Virtual subnet ID. */ - uint8_t flow_id; /**< Flow ID. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_NVGRE. */ -#ifndef __cplusplus -static const struct rte_flow_item_nvgre rte_flow_item_nvgre_mask = { - .tni = "\xff\xff\xff", -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_MPLS. - * - * Matches a MPLS header. - */ -struct rte_flow_item_mpls { - /** - * Label (20b), TC (3b), Bottom of Stack (1b). - */ - uint8_t label_tc_s[3]; - uint8_t ttl; /** Time-to-Live. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_MPLS. */ -#ifndef __cplusplus -static const struct rte_flow_item_mpls rte_flow_item_mpls_mask = { - .label_tc_s = "\xff\xff\xf0", -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_GRE. - * - * Matches a GRE header. - */ -struct rte_flow_item_gre { - /** - * Checksum (1b), reserved 0 (12b), version (3b). - * Refer to RFC 2784. - */ - rte_be16_t c_rsvd0_ver; - rte_be16_t protocol; /**< Protocol type. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_GRE. */ -#ifndef __cplusplus -static const struct rte_flow_item_gre rte_flow_item_gre_mask = { - .protocol = RTE_BE16(0xffff), -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_FUZZY - * - * Fuzzy pattern match, expect faster than default. - * - * This is for device that support fuzzy match option. - * Usually a fuzzy match is fast but the cost is accuracy. - * i.e. Signature Match only match pattern's hash value, but it is - * possible two different patterns have the same hash value. - * - * Matching accuracy level can be configure by threshold. - * Driver can divide the range of threshold and map to different - * accuracy levels that device support. - * - * Threshold 0 means perfect match (no fuzziness), while threshold - * 0xffffffff means fuzziest match. - */ -struct rte_flow_item_fuzzy { - uint32_t thresh; /**< Accuracy threshold. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_FUZZY. */ -#ifndef __cplusplus -static const struct rte_flow_item_fuzzy rte_flow_item_fuzzy_mask = { - .thresh = 0xffffffff, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_GTP. - * - * Matches a GTPv1 header. - */ -struct rte_flow_item_gtp { - /** - * Version (3b), protocol type (1b), reserved (1b), - * Extension header flag (1b), - * Sequence number flag (1b), - * N-PDU number flag (1b). - */ - uint8_t v_pt_rsv_flags; - uint8_t msg_type; /**< Message type. */ - rte_be16_t msg_len; /**< Message length. */ - rte_be32_t teid; /**< Tunnel endpoint identifier. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_GTP. */ -#ifndef __cplusplus -static const struct rte_flow_item_gtp rte_flow_item_gtp_mask = { - .teid = RTE_BE32(0xffffffff), -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_ESP - * - * Matches an ESP header. - */ -struct rte_flow_item_esp { - struct esp_hdr hdr; /**< ESP header definition. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_ESP. */ -#ifndef __cplusplus -static const struct rte_flow_item_esp rte_flow_item_esp_mask = { - .hdr = { - .spi = 0xffffffff, - }, -}; -#endif - -/** - * RTE_FLOW_ITEM_TYPE_GENEVE. - * - * Matches a GENEVE header. - */ -struct rte_flow_item_geneve { - /** - * Version (2b), length of the options fields (6b), OAM packet (1b), - * critical options present (1b), reserved 0 (6b). - */ - rte_be16_t ver_opt_len_o_c_rsvd0; - rte_be16_t protocol; /**< Protocol type. */ - uint8_t vni[3]; /**< Virtual Network Identifier. */ - uint8_t rsvd1; /**< Reserved, normally 0x00. */ -}; - -/** Default mask for RTE_FLOW_ITEM_TYPE_GENEVE. */ -#ifndef __cplusplus -static const struct rte_flow_item_geneve rte_flow_item_geneve_mask = { - .vni = "\xff\xff\xff", -}; -#endif - -/** - * Matching pattern item definition. - * - * A pattern is formed by stacking items starting from the lowest protocol - * layer to match. This stacking restriction does not apply to meta items - * which can be placed anywhere in the stack without affecting the meaning - * of the resulting pattern. - * - * Patterns are terminated by END items. - * - * The spec field should be a valid pointer to a structure of the related - * item type. It may remain unspecified (NULL) in many cases to request - * broad (nonspecific) matching. In such cases, last and mask must also be - * set to NULL. - * - * Optionally, last can point to a structure of the same type to define an - * inclusive range. This is mostly supported by integer and address fields, - * may cause errors otherwise. Fields that do not support ranges must be set - * to 0 or to the same value as the corresponding fields in spec. - * - * Only the fields defined to nonzero values in the default masks (see - * rte_flow_item_{name}_mask constants) are considered relevant by - * default. This can be overridden by providing a mask structure of the - * same type with applicable bits set to one. It can also be used to - * partially filter out specific fields (e.g. as an alternate mean to match - * ranges of IP addresses). - * - * Mask is a simple bit-mask applied before interpreting the contents of - * spec and last, which may yield unexpected results if not used - * carefully. For example, if for an IPv4 address field, spec provides - * 10.1.2.3, last provides 10.3.4.5 and mask provides 255.255.0.0, the - * effective range becomes 10.1.0.0 to 10.3.255.255. - */ -struct rte_flow_item { - enum rte_flow_item_type type; /**< Item type. */ - const void *spec; /**< Pointer to item specification structure. */ - const void *last; /**< Defines an inclusive range (spec to last). */ - const void *mask; /**< Bit-mask applied to spec and last. */ -}; - -/** - * Action types. - * - * Each possible action is represented by a type. Some have associated - * configuration structures. Several actions combined in a list can be - * affected to a flow rule. That list is not ordered. - * - * They fall in three categories: - * - * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent - * processing matched packets by subsequent flow rules, unless overridden - * with PASSTHRU. - * - * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up - * for additional processing by subsequent flow rules. - * - * - Other non terminating meta actions that do not affect the fate of - * packets (END, VOID, MARK, FLAG, COUNT). - * - * When several actions are combined in a flow rule, they should all have - * different types (e.g. dropping a packet twice is not possible). - * - * Only the last action of a given type is taken into account. PMDs still - * perform error checking on the entire list. - * - * Note that PASSTHRU is the only action able to override a terminating - * rule. - */ -enum rte_flow_action_type { - /** - * [META] - * - * End marker for action lists. Prevents further processing of - * actions, thereby ending the list. - * - * No associated configuration structure. - */ - RTE_FLOW_ACTION_TYPE_END, - - /** - * [META] - * - * Used as a placeholder for convenience. It is ignored and simply - * discarded by PMDs. - * - * No associated configuration structure. - */ - RTE_FLOW_ACTION_TYPE_VOID, - - /** - * Leaves packets up for additional processing by subsequent flow - * rules. This is the default when a rule does not contain a - * terminating action, but can be specified to force a rule to - * become non-terminating. - * - * No associated configuration structure. - */ - RTE_FLOW_ACTION_TYPE_PASSTHRU, - - /** - * [META] - * - * Attaches an integer value to packets and sets PKT_RX_FDIR and - * PKT_RX_FDIR_ID mbuf flags. - * - * See struct rte_flow_action_mark. - */ - RTE_FLOW_ACTION_TYPE_MARK, - - /** - * [META] - * - * Flags packets. Similar to MARK without a specific value; only - * sets the PKT_RX_FDIR mbuf flag. - * - * No associated configuration structure. - */ - RTE_FLOW_ACTION_TYPE_FLAG, - - /** - * Assigns packets to a given queue index. - * - * See struct rte_flow_action_queue. - */ - RTE_FLOW_ACTION_TYPE_QUEUE, - - /** - * Drops packets. - * - * PASSTHRU overrides this action if both are specified. - * - * No associated configuration structure. - */ - RTE_FLOW_ACTION_TYPE_DROP, - - /** - * [META] - * - * Enables counters for this rule. - * - * These counters can be retrieved and reset through rte_flow_query(), - * see struct rte_flow_query_count. - * - * No associated configuration structure. - */ - RTE_FLOW_ACTION_TYPE_COUNT, - - /** - * Duplicates packets to a given queue index. - * - * This is normally combined with QUEUE, however when used alone, it - * is actually similar to QUEUE + PASSTHRU. - * - * See struct rte_flow_action_dup. - */ - RTE_FLOW_ACTION_TYPE_DUP, - - /** - * Similar to QUEUE, except RSS is additionally performed on packets - * to spread them among several queues according to the provided - * parameters. - * - * See struct rte_flow_action_rss. - */ - RTE_FLOW_ACTION_TYPE_RSS, - - /** - * Redirects packets to the physical function (PF) of the current - * device. - * - * No associated configuration structure. - */ - RTE_FLOW_ACTION_TYPE_PF, - - /** - * Redirects packets to the virtual function (VF) of the current - * device with the specified ID. - * - * See struct rte_flow_action_vf. - */ - RTE_FLOW_ACTION_TYPE_VF, - - /** - * Traffic metering and policing (MTR). - * - * See struct rte_flow_action_meter. - * See file rte_mtr.h for MTR object configuration. - */ - RTE_FLOW_ACTION_TYPE_METER, - - /** - * Redirects packets to security engine of current device for security - * processing as specified by security session. - * - * See struct rte_flow_action_security. - */ - RTE_FLOW_ACTION_TYPE_SECURITY -}; - -/** - * RTE_FLOW_ACTION_TYPE_MARK - * - * Attaches an integer value to packets and sets PKT_RX_FDIR and - * PKT_RX_FDIR_ID mbuf flags. - * - * This value is arbitrary and application-defined. Maximum allowed value - * depends on the underlying implementation. It is returned in the - * hash.fdir.hi mbuf field. - */ -struct rte_flow_action_mark { - uint32_t id; /**< Integer value to return with packets. */ -}; - -/** - * RTE_FLOW_ACTION_TYPE_QUEUE - * - * Assign packets to a given queue index. - * - * Terminating by default. - */ -struct rte_flow_action_queue { - uint16_t index; /**< Queue index to use. */ -}; - -/** - * RTE_FLOW_ACTION_TYPE_COUNT (query) - * - * Query structure to retrieve and reset flow rule counters. - */ -struct rte_flow_query_count { - uint32_t reset:1; /**< Reset counters after query [in]. */ - uint32_t hits_set:1; /**< hits field is set [out]. */ - uint32_t bytes_set:1; /**< bytes field is set [out]. */ - uint32_t reserved:29; /**< Reserved, must be zero [in, out]. */ - uint64_t hits; /**< Number of hits for this rule [out]. */ - uint64_t bytes; /**< Number of bytes through this rule [out]. */ -}; - -/** - * RTE_FLOW_ACTION_TYPE_DUP - * - * Duplicates packets to a given queue index. - * - * This is normally combined with QUEUE, however when used alone, it is - * actually similar to QUEUE + PASSTHRU. - * - * Non-terminating by default. - */ -struct rte_flow_action_dup { - uint16_t index; /**< Queue index to duplicate packets to. */ -}; - -/** - * RTE_FLOW_ACTION_TYPE_RSS - * - * Similar to QUEUE, except RSS is additionally performed on packets to - * spread them among several queues according to the provided parameters. - * - * Note: RSS hash result is stored in the hash.rss mbuf field which overlaps - * hash.fdir.lo. Since the MARK action sets the hash.fdir.hi field only, - * both can be requested simultaneously. - * - * Terminating by default. - */ -struct rte_flow_action_rss { - const struct rte_eth_rss_conf *rss_conf; /**< RSS parameters. */ - uint16_t num; /**< Number of entries in queue[]. */ - uint16_t queue[]; /**< Queues indices to use. */ -}; - -/** - * RTE_FLOW_ACTION_TYPE_VF - * - * Redirects packets to a virtual function (VF) of the current device. - * - * Packets matched by a VF pattern item can be redirected to their original - * VF ID instead of the specified one. This parameter may not be available - * and is not guaranteed to work properly if the VF part is matched by a - * prior flow rule or if packets are not addressed to a VF in the first - * place. - * - * Terminating by default. - */ -struct rte_flow_action_vf { - uint32_t original:1; /**< Use original VF ID if possible. */ - uint32_t reserved:31; /**< Reserved, must be zero. */ - uint32_t id; /**< VF ID to redirect packets to. */ -}; - -/** - * RTE_FLOW_ACTION_TYPE_METER - * - * Traffic metering and policing (MTR). - * - * Packets matched by items of this type can be either dropped or passed to the - * next item with their color set by the MTR object. - * - * Non-terminating by default. - */ -struct rte_flow_action_meter { - uint32_t mtr_id; /**< MTR object ID created with rte_mtr_create(). */ -}; - -/** - * RTE_FLOW_ACTION_TYPE_SECURITY - * - * Perform the security action on flows matched by the pattern items - * according to the configuration of the security session. - * - * This action modifies the payload of matched flows. For INLINE_CRYPTO, the - * security protocol headers and IV are fully provided by the application as - * specified in the flow pattern. The payload of matching packets is - * encrypted on egress, and decrypted and authenticated on ingress. - * For INLINE_PROTOCOL, the security protocol is fully offloaded to HW, - * providing full encapsulation and decapsulation of packets in security - * protocols. The flow pattern specifies both the outer security header fields - * and the inner packet fields. The security session specified in the action - * must match the pattern parameters. - * - * The security session specified in the action must be created on the same - * port as the flow action that is being specified. - * - * The ingress/egress flow attribute should match that specified in the - * security session if the security session supports the definition of the - * direction. - * - * Multiple flows can be configured to use the same security session. - * - * Non-terminating by default. - */ -struct rte_flow_action_security { - void *security_session; /**< Pointer to security session structure. */ -}; - -/** - * Definition of a single action. - * - * A list of actions is terminated by a END action. - * - * For simple actions without a configuration structure, conf remains NULL. - */ -struct rte_flow_action { - enum rte_flow_action_type type; /**< Action type. */ - const void *conf; /**< Pointer to action configuration structure. */ -}; - -/** - * Opaque type returned after successfully creating a flow. - * - * This handle can be used to manage and query the related flow (e.g. to - * destroy it or retrieve counters). - */ -struct rte_flow; - -/** - * Verbose error types. - * - * Most of them provide the type of the object referenced by struct - * rte_flow_error.cause. - */ -enum rte_flow_error_type { - RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */ - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ - RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */ - RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */ - RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */ - RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */ - RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */ - RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */ - RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */ - RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */ - RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */ - RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */ -}; - -/** - * Verbose error structure definition. - * - * This object is normally allocated by applications and set by PMDs, the - * message points to a constant string which does not need to be freed by - * the application, however its pointer can be considered valid only as long - * as its associated DPDK port remains configured. Closing the underlying - * device or unloading the PMD invalidates it. - * - * Both cause and message may be NULL regardless of the error type. - */ -struct rte_flow_error { - enum rte_flow_error_type type; /**< Cause field and error types. */ - const void *cause; /**< Object responsible for the error. */ - const char *message; /**< Human-readable error message. */ -}; - -/** - * Check whether a flow rule can be created on a given port. - * - * The flow rule is validated for correctness and whether it could be accepted - * by the device given sufficient resources. The rule is checked against the - * current device mode and queue configuration. The flow rule may also - * optionally be validated against existing flow rules and device resources. - * This function has no effect on the target device. - * - * The returned value is guaranteed to remain valid only as long as no - * successful calls to rte_flow_create() or rte_flow_destroy() are made in - * the meantime and no device parameter affecting flow rules in any way are - * modified, due to possible collisions or resource limitations (although in - * such cases EINVAL should not be returned). - * - * @param port_id - * Port identifier of Ethernet device. - * @param[in] attr - * Flow rule attributes. - * @param[in] pattern - * Pattern specification (list terminated by the END pattern item). - * @param[in] actions - * Associated actions (list terminated by the END action). - * @param[out] error - * Perform verbose error reporting if not NULL. PMDs initialize this - * structure in case of error only. - * - * @return - * 0 if flow rule is valid and can be created. A negative errno value - * otherwise (rte_errno is also set), the following errors are defined: - * - * -ENOSYS: underlying device does not support this functionality. - * - * -EIO: underlying device is removed. - * - * -EINVAL: unknown or invalid rule specification. - * - * -ENOTSUP: valid but unsupported rule specification (e.g. partial - * bit-masks are unsupported). - * - * -EEXIST: collision with an existing rule. Only returned if device - * supports flow rule collision checking and there was a flow rule - * collision. Not receiving this return code is no guarantee that creating - * the rule will not fail due to a collision. - * - * -ENOMEM: not enough memory to execute the function, or if the device - * supports resource validation, resource limitation on the device. - * - * -EBUSY: action cannot be performed due to busy device resources, may - * succeed if the affected queues or even the entire port are in a stopped - * state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()). - */ -int -rte_flow_validate(uint16_t port_id, - const struct rte_flow_attr *attr, - const struct rte_flow_item pattern[], - const struct rte_flow_action actions[], - struct rte_flow_error *error); - -/** - * Create a flow rule on a given port. - * - * @param port_id - * Port identifier of Ethernet device. - * @param[in] attr - * Flow rule attributes. - * @param[in] pattern - * Pattern specification (list terminated by the END pattern item). - * @param[in] actions - * Associated actions (list terminated by the END action). - * @param[out] error - * Perform verbose error reporting if not NULL. PMDs initialize this - * structure in case of error only. - * - * @return - * A valid handle in case of success, NULL otherwise and rte_errno is set - * to the positive version of one of the error codes defined for - * rte_flow_validate(). - */ -struct rte_flow * -rte_flow_create(uint16_t port_id, - const struct rte_flow_attr *attr, - const struct rte_flow_item pattern[], - const struct rte_flow_action actions[], - struct rte_flow_error *error); - -/** - * Destroy a flow rule on a given port. - * - * Failure to destroy a flow rule handle may occur when other flow rules - * depend on it, and destroying it would result in an inconsistent state. - * - * This function is only guaranteed to succeed if handles are destroyed in - * reverse order of their creation. - * - * @param port_id - * Port identifier of Ethernet device. - * @param flow - * Flow rule handle to destroy. - * @param[out] error - * Perform verbose error reporting if not NULL. PMDs initialize this - * structure in case of error only. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -rte_flow_destroy(uint16_t port_id, - struct rte_flow *flow, - struct rte_flow_error *error); - -/** - * Destroy all flow rules associated with a port. - * - * In the unlikely event of failure, handles are still considered destroyed - * and no longer valid but the port must be assumed to be in an inconsistent - * state. - * - * @param port_id - * Port identifier of Ethernet device. - * @param[out] error - * Perform verbose error reporting if not NULL. PMDs initialize this - * structure in case of error only. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -rte_flow_flush(uint16_t port_id, - struct rte_flow_error *error); - -/** - * Query an existing flow rule. - * - * This function allows retrieving flow-specific data such as counters. - * Data is gathered by special actions which must be present in the flow - * rule definition. - * - * \see RTE_FLOW_ACTION_TYPE_COUNT - * - * @param port_id - * Port identifier of Ethernet device. - * @param flow - * Flow rule handle to query. - * @param action - * Action type to query. - * @param[in, out] data - * Pointer to storage for the associated query data type. - * @param[out] error - * Perform verbose error reporting if not NULL. PMDs initialize this - * structure in case of error only. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -rte_flow_query(uint16_t port_id, - struct rte_flow *flow, - enum rte_flow_action_type action, - void *data, - struct rte_flow_error *error); - -/** - * Restrict ingress traffic to the defined flow rules. - * - * Isolated mode guarantees that all ingress traffic comes from defined flow - * rules only (current and future). - * - * Besides making ingress more deterministic, it allows PMDs to safely reuse - * resources otherwise assigned to handle the remaining traffic, such as - * global RSS configuration settings, VLAN filters, MAC address entries, - * legacy filter API rules and so on in order to expand the set of possible - * flow rule types. - * - * Calling this function as soon as possible after device initialization, - * ideally before the first call to rte_eth_dev_configure(), is recommended - * to avoid possible failures due to conflicting settings. - * - * Once effective, leaving isolated mode may not be possible depending on - * PMD implementation. - * - * Additionally, the following functionality has no effect on the underlying - * port and may return errors such as ENOTSUP ("not supported"): - * - * - Toggling promiscuous mode. - * - Toggling allmulticast mode. - * - Configuring MAC addresses. - * - Configuring multicast addresses. - * - Configuring VLAN filters. - * - Configuring Rx filters through the legacy API (e.g. FDIR). - * - Configuring global RSS settings. - * - * @param port_id - * Port identifier of Ethernet device. - * @param set - * Nonzero to enter isolated mode, attempt to leave it otherwise. - * @param[out] error - * Perform verbose error reporting if not NULL. PMDs initialize this - * structure in case of error only. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -rte_flow_isolate(uint16_t port_id, int set, struct rte_flow_error *error); - -/** - * Initialize flow error structure. - * - * @param[out] error - * Pointer to flow error structure (may be NULL). - * @param code - * Related error code (rte_errno). - * @param type - * Cause field and error types. - * @param cause - * Object responsible for the error. - * @param message - * Human-readable error message. - * - * @return - * Negative error code (errno value) and rte_errno is set. - */ -int -rte_flow_error_set(struct rte_flow_error *error, - int code, - enum rte_flow_error_type type, - const void *cause, - const char *message); - -/** - * Generic flow representation. - * - * This form is sufficient to describe an rte_flow independently from any - * PMD implementation and allows for replayability and identification. - */ -struct rte_flow_desc { - size_t size; /**< Allocated space including data[]. */ - struct rte_flow_attr attr; /**< Attributes. */ - struct rte_flow_item *items; /**< Items. */ - struct rte_flow_action *actions; /**< Actions. */ - uint8_t data[]; /**< Storage for items/actions. */ -}; - -/** - * Copy an rte_flow rule description. - * - * @param[in] fd - * Flow rule description. - * @param[in] len - * Total size of allocated data for the flow description. - * @param[in] attr - * Flow rule attributes. - * @param[in] items - * Pattern specification (list terminated by the END pattern item). - * @param[in] actions - * Associated actions (list terminated by the END action). - * - * @return - * If len is greater or equal to the size of the flow, the total size of the - * flow description and its data. - * If len is lower than the size of the flow, the number of bytes that would - * have been written to desc had it been sufficient. Nothing is written. - */ -size_t -rte_flow_copy(struct rte_flow_desc *fd, size_t len, - const struct rte_flow_attr *attr, - const struct rte_flow_item *items, - const struct rte_flow_action *actions); - -#ifdef __cplusplus -} -#endif - -#endif /* RTE_FLOW_H_ */ diff --git a/lib/librte_ether/rte_flow_driver.h b/lib/librte_ether/rte_flow_driver.h deleted file mode 100644 index 7778c8e0..00000000 --- a/lib/librte_ether/rte_flow_driver.h +++ /dev/null @@ -1,121 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2016 6WIND S.A. - * Copyright 2016 Mellanox. - */ - -#ifndef RTE_FLOW_DRIVER_H_ -#define RTE_FLOW_DRIVER_H_ - -/** - * @file - * RTE generic flow API (driver side) - * - * This file provides implementation helpers for internal use by PMDs, they - * are not intended to be exposed to applications and are not subject to ABI - * versioning. - */ - -#include - -#include "rte_ethdev.h" -#include "rte_flow.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Generic flow operations structure implemented and returned by PMDs. - * - * To implement this API, PMDs must handle the RTE_ETH_FILTER_GENERIC filter - * type in their .filter_ctrl callback function (struct eth_dev_ops) as well - * as the RTE_ETH_FILTER_GET filter operation. - * - * If successful, this operation must result in a pointer to a PMD-specific - * struct rte_flow_ops written to the argument address as described below: - * - * \code - * - * // PMD filter_ctrl callback - * - * static const struct rte_flow_ops pmd_flow_ops = { ... }; - * - * switch (filter_type) { - * case RTE_ETH_FILTER_GENERIC: - * if (filter_op != RTE_ETH_FILTER_GET) - * return -EINVAL; - * *(const void **)arg = &pmd_flow_ops; - * return 0; - * } - * - * \endcode - * - * See also rte_flow_ops_get(). - * - * These callback functions are not supposed to be used by applications - * directly, which must rely on the API defined in rte_flow.h. - * - * Public-facing wrapper functions perform a few consistency checks so that - * unimplemented (i.e. NULL) callbacks simply return -ENOTSUP. These - * callbacks otherwise only differ by their first argument (with port ID - * already resolved to a pointer to struct rte_eth_dev). - */ -struct rte_flow_ops { - /** See rte_flow_validate(). */ - int (*validate) - (struct rte_eth_dev *, - const struct rte_flow_attr *, - const struct rte_flow_item [], - const struct rte_flow_action [], - struct rte_flow_error *); - /** See rte_flow_create(). */ - struct rte_flow *(*create) - (struct rte_eth_dev *, - const struct rte_flow_attr *, - const struct rte_flow_item [], - const struct rte_flow_action [], - struct rte_flow_error *); - /** See rte_flow_destroy(). */ - int (*destroy) - (struct rte_eth_dev *, - struct rte_flow *, - struct rte_flow_error *); - /** See rte_flow_flush(). */ - int (*flush) - (struct rte_eth_dev *, - struct rte_flow_error *); - /** See rte_flow_query(). */ - int (*query) - (struct rte_eth_dev *, - struct rte_flow *, - enum rte_flow_action_type, - void *, - struct rte_flow_error *); - /** See rte_flow_isolate(). */ - int (*isolate) - (struct rte_eth_dev *, - int, - struct rte_flow_error *); -}; - -/** - * Get generic flow operations structure from a port. - * - * @param port_id - * Port identifier to query. - * @param[out] error - * Pointer to flow error structure. - * - * @return - * The flow operations structure associated with port_id, NULL in case of - * error, in which case rte_errno is set and the error structure contains - * additional details. - */ -const struct rte_flow_ops * -rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error); - -#ifdef __cplusplus -} -#endif - -#endif /* RTE_FLOW_DRIVER_H_ */ diff --git a/lib/librte_ether/rte_mtr.c b/lib/librte_ether/rte_mtr.c deleted file mode 100644 index 1046cb5f..00000000 --- a/lib/librte_ether/rte_mtr.c +++ /dev/null @@ -1,201 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2017 Intel Corporation - */ - -#include - -#include -#include "rte_compat.h" -#include "rte_ethdev.h" -#include "rte_mtr_driver.h" -#include "rte_mtr.h" - -/* Get generic traffic metering & policing operations structure from a port. */ -const struct rte_mtr_ops * -rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - const struct rte_mtr_ops *ops; - - if (!rte_eth_dev_is_valid_port(port_id)) { - rte_mtr_error_set(error, - ENODEV, - RTE_MTR_ERROR_TYPE_UNSPECIFIED, - NULL, - rte_strerror(ENODEV)); - return NULL; - } - - if ((dev->dev_ops->mtr_ops_get == NULL) || - (dev->dev_ops->mtr_ops_get(dev, &ops) != 0) || - (ops == NULL)) { - rte_mtr_error_set(error, - ENOSYS, - RTE_MTR_ERROR_TYPE_UNSPECIFIED, - NULL, - rte_strerror(ENOSYS)); - return NULL; - } - - return ops; -} - -#define RTE_MTR_FUNC(port_id, func) \ -({ \ - const struct rte_mtr_ops *ops = \ - rte_mtr_ops_get(port_id, error); \ - if (ops == NULL) \ - return -rte_errno; \ - \ - if (ops->func == NULL) \ - return -rte_mtr_error_set(error, \ - ENOSYS, \ - RTE_MTR_ERROR_TYPE_UNSPECIFIED, \ - NULL, \ - rte_strerror(ENOSYS)); \ - \ - ops->func; \ -}) - -/* MTR capabilities get */ -int __rte_experimental -rte_mtr_capabilities_get(uint16_t port_id, - struct rte_mtr_capabilities *cap, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, capabilities_get)(dev, - cap, error); -} - -/* MTR meter profile add */ -int __rte_experimental -rte_mtr_meter_profile_add(uint16_t port_id, - uint32_t meter_profile_id, - struct rte_mtr_meter_profile *profile, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, meter_profile_add)(dev, - meter_profile_id, profile, error); -} - -/** MTR meter profile delete */ -int __rte_experimental -rte_mtr_meter_profile_delete(uint16_t port_id, - uint32_t meter_profile_id, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, meter_profile_delete)(dev, - meter_profile_id, error); -} - -/** MTR object create */ -int __rte_experimental -rte_mtr_create(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_params *params, - int shared, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, create)(dev, - mtr_id, params, shared, error); -} - -/** MTR object destroy */ -int __rte_experimental -rte_mtr_destroy(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, destroy)(dev, - mtr_id, error); -} - -/** MTR object meter enable */ -int __rte_experimental -rte_mtr_meter_enable(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, meter_enable)(dev, - mtr_id, error); -} - -/** MTR object meter disable */ -int __rte_experimental -rte_mtr_meter_disable(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, meter_disable)(dev, - mtr_id, error); -} - -/** MTR object meter profile update */ -int __rte_experimental -rte_mtr_meter_profile_update(uint16_t port_id, - uint32_t mtr_id, - uint32_t meter_profile_id, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, meter_profile_update)(dev, - mtr_id, meter_profile_id, error); -} - -/** MTR object meter DSCP table update */ -int __rte_experimental -rte_mtr_meter_dscp_table_update(uint16_t port_id, - uint32_t mtr_id, - enum rte_mtr_color *dscp_table, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, meter_dscp_table_update)(dev, - mtr_id, dscp_table, error); -} - -/** MTR object policer action update */ -int __rte_experimental -rte_mtr_policer_actions_update(uint16_t port_id, - uint32_t mtr_id, - uint32_t action_mask, - enum rte_mtr_policer_action *actions, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, policer_actions_update)(dev, - mtr_id, action_mask, actions, error); -} - -/** MTR object enabled stats update */ -int __rte_experimental -rte_mtr_stats_update(uint16_t port_id, - uint32_t mtr_id, - uint64_t stats_mask, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, stats_update)(dev, - mtr_id, stats_mask, error); -} - -/** MTR object stats read */ -int __rte_experimental -rte_mtr_stats_read(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_stats *stats, - uint64_t *stats_mask, - int clear, - struct rte_mtr_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_MTR_FUNC(port_id, stats_read)(dev, - mtr_id, stats, stats_mask, clear, error); -} diff --git a/lib/librte_ether/rte_mtr.h b/lib/librte_ether/rte_mtr.h deleted file mode 100644 index c4819b27..00000000 --- a/lib/librte_ether/rte_mtr.h +++ /dev/null @@ -1,730 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 Intel Corporation - * Copyright 2017 NXP - * Copyright 2017 Cavium - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __INCLUDE_RTE_MTR_H__ -#define __INCLUDE_RTE_MTR_H__ - -/** - * @file - * RTE Generic Traffic Metering and Policing API - * - * This interface provides the ability to configure the traffic metering and - * policing (MTR) in a generic way. - * - * The processing done for each input packet hitting a MTR object is: - * A) Traffic metering: The packet is assigned a color (the meter output - * color), based on the previous history of the flow reflected in the - * current state of the MTR object, according to the specific traffic - * metering algorithm. The traffic metering algorithm can typically work - * in color aware mode, in which case the input packet already has an - * initial color (the input color), or in color blind mode, which is - * equivalent to considering all input packets initially colored as green. - * B) Policing: There is a separate policer action configured for each meter - * output color, which can: - * a) Drop the packet. - * b) Keep the same packet color: the policer output color matches the - * meter output color (essentially a no-op action). - * c) Recolor the packet: the policer output color is different than - * the meter output color. - * The policer output color is the output color of the packet, which is - * set in the packet meta-data (i.e. struct rte_mbuf::sched::color). - * C) Statistics: The set of counters maintained for each MTR object is - * configurable and subject to the implementation support. This set - * includes the number of packets and bytes dropped or passed for each - * output color. - * - * Once successfully created, an MTR object is linked to one or several flows - * through the meter action of the flow API. - * A) Whether an MTR object is private to a flow or potentially shared by - * several flows has to be specified at creation time. - * B) Several meter actions can be potentially registered for the same flow. - * - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - */ -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Color - */ -enum rte_mtr_color { - RTE_MTR_GREEN = 0, /**< Green */ - RTE_MTR_YELLOW, /**< Yellow */ - RTE_MTR_RED, /**< Red */ - RTE_MTR_COLORS /**< Number of colors. */ -}; - -/** - * Statistics counter type - */ -enum rte_mtr_stats_type { - /** Number of packets passed as green by the policer. */ - RTE_MTR_STATS_N_PKTS_GREEN = 1 << 0, - - /** Number of packets passed as yellow by the policer. */ - RTE_MTR_STATS_N_PKTS_YELLOW = 1 << 1, - - /** Number of packets passed as red by the policer. */ - RTE_MTR_STATS_N_PKTS_RED = 1 << 2, - - /** Number of packets dropped by the policer. */ - RTE_MTR_STATS_N_PKTS_DROPPED = 1 << 3, - - /** Number of bytes passed as green by the policer. */ - RTE_MTR_STATS_N_BYTES_GREEN = 1 << 4, - - /** Number of bytes passed as yellow by the policer. */ - RTE_MTR_STATS_N_BYTES_YELLOW = 1 << 5, - - /** Number of bytes passed as red by the policer. */ - RTE_MTR_STATS_N_BYTES_RED = 1 << 6, - - /** Number of bytes dropped by the policer. */ - RTE_MTR_STATS_N_BYTES_DROPPED = 1 << 7, -}; - -/** - * Statistics counters - */ -struct rte_mtr_stats { - /** Number of packets passed by the policer (per color). */ - uint64_t n_pkts[RTE_MTR_COLORS]; - - /** Number of bytes passed by the policer (per color). */ - uint64_t n_bytes[RTE_MTR_COLORS]; - - /** Number of packets dropped by the policer. */ - uint64_t n_pkts_dropped; - - /** Number of bytes passed by the policer. */ - uint64_t n_bytes_dropped; -}; - -/** - * Traffic metering algorithms - */ -enum rte_mtr_algorithm { - /** No traffic metering performed, the output color is the same as the - * input color for every input packet. The meter of the MTR object is - * working in pass-through mode, having same effect as meter disable. - * @see rte_mtr_meter_disable() - */ - RTE_MTR_NONE = 0, - - /** Single Rate Three Color Marker (srTCM) - IETF RFC 2697. */ - RTE_MTR_SRTCM_RFC2697, - - /** Two Rate Three Color Marker (trTCM) - IETF RFC 2698. */ - RTE_MTR_TRTCM_RFC2698, - - /** Two Rate Three Color Marker (trTCM) - IETF RFC 4115. */ - RTE_MTR_TRTCM_RFC4115, -}; - -/** - * Meter profile - */ -struct rte_mtr_meter_profile { - /** Traffic metering algorithm. */ - enum rte_mtr_algorithm alg; - - RTE_STD_C11 - union { - /** Items only valid when *alg* is set to srTCM - RFC 2697. */ - struct { - /** Committed Information Rate (CIR) (bytes/second). */ - uint64_t cir; - - /** Committed Burst Size (CBS) (bytes). */ - uint64_t cbs; - - /** Excess Burst Size (EBS) (bytes). */ - uint64_t ebs; - } srtcm_rfc2697; - - /** Items only valid when *alg* is set to trTCM - RFC 2698. */ - struct { - /** Committed Information Rate (CIR) (bytes/second). */ - uint64_t cir; - - /** Peak Information Rate (PIR) (bytes/second). */ - uint64_t pir; - - /** Committed Burst Size (CBS) (byes). */ - uint64_t cbs; - - /** Peak Burst Size (PBS) (bytes). */ - uint64_t pbs; - } trtcm_rfc2698; - - /** Items only valid when *alg* is set to trTCM - RFC 4115. */ - struct { - /** Committed Information Rate (CIR) (bytes/second). */ - uint64_t cir; - - /** Excess Information Rate (EIR) (bytes/second). */ - uint64_t eir; - - /** Committed Burst Size (CBS) (byes). */ - uint64_t cbs; - - /** Excess Burst Size (EBS) (bytes). */ - uint64_t ebs; - } trtcm_rfc4115; - }; -}; - -/** - * Policer actions - */ -enum rte_mtr_policer_action { - /** Recolor the packet as green. */ - MTR_POLICER_ACTION_COLOR_GREEN = 0, - - /** Recolor the packet as yellow. */ - MTR_POLICER_ACTION_COLOR_YELLOW, - - /** Recolor the packet as red. */ - MTR_POLICER_ACTION_COLOR_RED, - - /** Drop the packet. */ - MTR_POLICER_ACTION_DROP, -}; - -/** - * Parameters for each traffic metering & policing object - * - * @see enum rte_mtr_stats_type - */ -struct rte_mtr_params { - /** Meter profile ID. */ - uint32_t meter_profile_id; - - /** Meter input color in case of MTR object chaining. When non-zero: if - * a previous MTR object is enabled in the same flow, then the color - * determined by the latest MTR object in the same flow is used as the - * input color by the current MTR object, otherwise the current MTR - * object uses the *dscp_table* to determine the input color. When zero: - * the color determined by any previous MTR object in same flow is - * ignored by the current MTR object, which uses the *dscp_table* to - * determine the input color. - */ - int use_prev_mtr_color; - - /** Meter input color. When non-NULL: it points to a pre-allocated and - * pre-populated table with exactly 64 elements providing the input - * color for each value of the IPv4/IPv6 Differentiated Services Code - * Point (DSCP) input packet field. When NULL: it is equivalent to - * setting this parameter to an all-green populated table (i.e. table - * with all the 64 elements set to green color). The color blind mode - * is configured by setting *use_prev_mtr_color* to 0 and *dscp_table* - * to either NULL or to an all-green populated table. When - * *use_prev_mtr_color* is non-zero value or when *dscp_table* contains - * at least one yellow or red color element, then the color aware mode - * is configured. - */ - enum rte_mtr_color *dscp_table; - - /** Non-zero to enable the meter, zero to disable the meter at the time - * of MTR object creation. Ignored when the meter profile indicated by - * *meter_profile_id* is set to NONE. - * @see rte_mtr_meter_disable() - */ - int meter_enable; - - /** Policer actions (per meter output color). */ - enum rte_mtr_policer_action action[RTE_MTR_COLORS]; - - /** Set of stats counters to be enabled. - * @see enum rte_mtr_stats_type - */ - uint64_t stats_mask; -}; - -/** - * MTR capabilities - */ -struct rte_mtr_capabilities { - /** Maximum number of MTR objects. */ - uint32_t n_max; - - /** Maximum number of MTR objects that can be shared by multiple flows. - * The value of zero indicates that shared MTR objects are not - * supported. The maximum value is *n_max*. - */ - uint32_t n_shared_max; - - /** When non-zero, this flag indicates that all the MTR objects that - * cannot be shared by multiple flows have identical capability set. - */ - int identical; - - /** When non-zero, this flag indicates that all the MTR objects that - * can be shared by multiple flows have identical capability set. - */ - int shared_identical; - - /** Maximum number of flows that can share the same MTR object. The - * value of zero is invalid. The value of 1 means that shared MTR - * objects not supported. - */ - uint32_t shared_n_flows_per_mtr_max; - - /** Maximum number of MTR objects that can be part of the same flow. The - * value of zero is invalid. The value of 1 indicates that MTR object - * chaining is not supported. The maximum value is *n_max*. - */ - uint32_t chaining_n_mtrs_per_flow_max; - - /** - * When non-zero, it indicates that the packet color identified by one - * MTR object can be used as the packet input color by any subsequent - * MTR object from the same flow. When zero, it indicates that the color - * determined by one MTR object is always ignored by any subsequent MTR - * object from the same flow. Only valid when MTR chaining is supported, - * i.e. *chaining_n_mtrs_per_flow_max* is greater than 1. When non-zero, - * it also means that the color aware mode is supported by at least one - * metering algorithm. - */ - int chaining_use_prev_mtr_color_supported; - - /** - * When non-zero, it indicates that the packet color identified by one - * MTR object is always used as the packet input color by any subsequent - * MTR object that is part of the same flow. When zero, it indicates - * that whether the color determined by one MTR object is either ignored - * or used as the packet input color by any subsequent MTR object from - * the same flow is individually configurable for each MTR object. Only - * valid when *chaining_use_prev_mtr_color_supported* is non-zero. - */ - int chaining_use_prev_mtr_color_enforced; - - /** Maximum number of MTR objects that can have their meter configured - * to run the srTCM RFC 2697 algorithm. The value of 0 indicates this - * metering algorithm is not supported. The maximum value is *n_max*. - */ - uint32_t meter_srtcm_rfc2697_n_max; - - /** Maximum number of MTR objects that can have their meter configured - * to run the trTCM RFC 2698 algorithm. The value of 0 indicates this - * metering algorithm is not supported. The maximum value is *n_max*. - */ - uint32_t meter_trtcm_rfc2698_n_max; - - /** Maximum number of MTR objects that can have their meter configured - * to run the trTCM RFC 4115 algorithm. The value of 0 indicates this - * metering algorithm is not supported. The maximum value is *n_max*. - */ - uint32_t meter_trtcm_rfc4115_n_max; - - /** Maximum traffic rate that can be metered by a single MTR object. For - * srTCM RFC 2697, this is the maximum CIR rate. For trTCM RFC 2698, - * this is the maximum PIR rate. For trTCM RFC 4115, this is the maximum - * value for the sum of PIR and EIR rates. - */ - uint64_t meter_rate_max; - - /** - * When non-zero, it indicates that color aware mode is supported for - * the srTCM RFC 2697 metering algorithm. - */ - int color_aware_srtcm_rfc2697_supported; - - /** - * When non-zero, it indicates that color aware mode is supported for - * the trTCM RFC 2698 metering algorithm. - */ - int color_aware_trtcm_rfc2698_supported; - - /** - * When non-zero, it indicates that color aware mode is supported for - * the trTCM RFC 4115 metering algorithm. - */ - int color_aware_trtcm_rfc4115_supported; - - /** When non-zero, it indicates that the policer packet recolor actions - * are supported. - * @see enum rte_mtr_policer_action - */ - int policer_action_recolor_supported; - - /** When non-zero, it indicates that the policer packet drop action is - * supported. - * @see enum rte_mtr_policer_action - */ - int policer_action_drop_supported; - - /** Set of supported statistics counter types. - * @see enum rte_mtr_stats_type - */ - uint64_t stats_mask; -}; - -/** - * Verbose error types. - * - * Most of them provide the type of the object referenced by struct - * rte_mtr_error::cause. - */ -enum rte_mtr_error_type { - RTE_MTR_ERROR_TYPE_NONE, /**< No error. */ - RTE_MTR_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ - RTE_MTR_ERROR_TYPE_METER_PROFILE_ID, - RTE_MTR_ERROR_TYPE_METER_PROFILE, - RTE_MTR_ERROR_TYPE_MTR_ID, - RTE_MTR_ERROR_TYPE_MTR_PARAMS, - RTE_MTR_ERROR_TYPE_POLICER_ACTION_GREEN, - RTE_MTR_ERROR_TYPE_POLICER_ACTION_YELLOW, - RTE_MTR_ERROR_TYPE_POLICER_ACTION_RED, - RTE_MTR_ERROR_TYPE_STATS_MASK, - RTE_MTR_ERROR_TYPE_STATS, - RTE_MTR_ERROR_TYPE_SHARED, -}; - -/** - * Verbose error structure definition. - * - * This object is normally allocated by applications and set by PMDs, the - * message points to a constant string which does not need to be freed by - * the application, however its pointer can be considered valid only as long - * as its associated DPDK port remains configured. Closing the underlying - * device or unloading the PMD invalidates it. - * - * Both cause and message may be NULL regardless of the error type. - */ -struct rte_mtr_error { - enum rte_mtr_error_type type; /**< Cause field and error type. */ - const void *cause; /**< Object responsible for the error. */ - const char *message; /**< Human-readable error message. */ -}; - -/** - * MTR capabilities get - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[out] cap - * MTR capabilities. Needs to be pre-allocated and valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int __rte_experimental -rte_mtr_capabilities_get(uint16_t port_id, - struct rte_mtr_capabilities *cap, - struct rte_mtr_error *error); - -/** - * Meter profile add - * - * Create a new meter profile with ID set to *meter_profile_id*. The new profile - * is used to create one or several MTR objects. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] meter_profile_id - * ID for the new meter profile. Needs to be unused by any of the existing - * meter profiles added for the current port. - * @param[in] profile - * Meter profile parameters. Needs to be pre-allocated and valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int __rte_experimental -rte_mtr_meter_profile_add(uint16_t port_id, - uint32_t meter_profile_id, - struct rte_mtr_meter_profile *profile, - struct rte_mtr_error *error); - -/** - * Meter profile delete - * - * Delete an existing meter profile. This operation fails when there is - * currently at least one user (i.e. MTR object) of this profile. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] meter_profile_id - * Meter profile ID. Needs to be the valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int __rte_experimental -rte_mtr_meter_profile_delete(uint16_t port_id, - uint32_t meter_profile_id, - struct rte_mtr_error *error); - -/** - * MTR object create - * - * Create a new MTR object for the current port. This object is run as part of - * associated flow action for traffic metering and policing. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mtr_id - * MTR object ID. Needs to be unused by any of the existing MTR objects. - * created for the current port. - * @param[in] params - * MTR object params. Needs to be pre-allocated and valid. - * @param[in] shared - * Non-zero when this MTR object can be shared by multiple flows, zero when - * this MTR object can be used by a single flow. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see enum rte_flow_action_type::RTE_FLOW_ACTION_TYPE_METER - */ -int __rte_experimental -rte_mtr_create(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_params *params, - int shared, - struct rte_mtr_error *error); - -/** - * MTR object destroy - * - * Delete an existing MTR object. This operation fails when there is currently - * at least one user (i.e. flow) of this MTR object. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mtr_id - * MTR object ID. Needs to be valid. - * created for the current port. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int __rte_experimental -rte_mtr_destroy(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_error *error); - -/** - * MTR object meter disable - * - * Disable the meter of an existing MTR object. In disabled state, the meter of - * the current MTR object works in pass-through mode, meaning that for each - * input packet the meter output color is always the same as the input color. In - * particular, when the meter of the current MTR object is configured in color - * blind mode, the input color is always green, so the meter output color is - * also always green. Note that the policer and the statistics of the current - * MTR object are working as usual while the meter is disabled. No action is - * taken and this function returns successfully when the meter of the current - * MTR object is already disabled. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mtr_id - * MTR object ID. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int __rte_experimental -rte_mtr_meter_disable(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_error *error); - -/** - * MTR object meter enable - * - * Enable the meter of an existing MTR object. If the MTR object has its meter - * already enabled, then no action is taken and this function returns - * successfully. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mtr_id - * MTR object ID. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int __rte_experimental -rte_mtr_meter_enable(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_error *error); - -/** - * MTR object meter profile update - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mtr_id - * MTR object ID. Needs to be valid. - * @param[in] meter_profile_id - * Meter profile ID for the current MTR object. Needs to be valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int __rte_experimental -rte_mtr_meter_profile_update(uint16_t port_id, - uint32_t mtr_id, - uint32_t meter_profile_id, - struct rte_mtr_error *error); - -/** - * MTR object DSCP table update - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mtr_id - * MTR object ID. Needs to be valid. - * @param[in] dscp_table - * When non-NULL: it points to a pre-allocated and pre-populated table with - * exactly 64 elements providing the input color for each value of the - * IPv4/IPv6 Differentiated Services Code Point (DSCP) input packet field. - * When NULL: it is equivalent to setting this parameter to an “all-green” - * populated table (i.e. table with all the 64 elements set to green color). - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int __rte_experimental -rte_mtr_meter_dscp_table_update(uint16_t port_id, - uint32_t mtr_id, - enum rte_mtr_color *dscp_table, - struct rte_mtr_error *error); - -/** - * MTR object policer actions update - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mtr_id - * MTR object ID. Needs to be valid. - * @param[in] action_mask - * Bit mask indicating which policer actions need to be updated. One or more - * policer actions can be updated in a single function invocation. To update - * the policer action associated with color C, bit (1 << C) needs to be set in - * *action_mask* and element at position C in the *actions* array needs to be - * valid. - * @param[in] actions - * Pre-allocated and pre-populated array of policer actions. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int __rte_experimental -rte_mtr_policer_actions_update(uint16_t port_id, - uint32_t mtr_id, - uint32_t action_mask, - enum rte_mtr_policer_action *actions, - struct rte_mtr_error *error); - -/** - * MTR object enabled statistics counters update - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mtr_id - * MTR object ID. Needs to be valid. - * @param[in] stats_mask - * Mask of statistics counter types to be enabled for the current MTR object. - * Any statistics counter type not included in this set is to be disabled for - * the current MTR object. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see enum rte_mtr_stats_type - */ -int __rte_experimental -rte_mtr_stats_update(uint16_t port_id, - uint32_t mtr_id, - uint64_t stats_mask, - struct rte_mtr_error *error); - -/** - * MTR object statistics counters read - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mtr_id - * MTR object ID. Needs to be valid. - * @param[out] stats - * When non-NULL, it contains the current value for the statistics counters - * enabled for the current MTR object. - * @param[out] stats_mask - * When non-NULL, it contains the mask of statistics counter types that are - * currently enabled for this MTR object, indicating which of the counters - * retrieved with the *stats* structure are valid. - * @param[in] clear - * When this parameter has a non-zero value, the statistics counters are - * cleared (i.e. set to zero) immediately after they have been read, - * otherwise the statistics counters are left untouched. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see enum rte_mtr_stats_type - */ -int __rte_experimental -rte_mtr_stats_read(uint16_t port_id, - uint32_t mtr_id, - struct rte_mtr_stats *stats, - uint64_t *stats_mask, - int clear, - struct rte_mtr_error *error); - -#ifdef __cplusplus -} -#endif - -#endif /* __INCLUDE_RTE_MTR_H__ */ diff --git a/lib/librte_ether/rte_mtr_driver.h b/lib/librte_ether/rte_mtr_driver.h deleted file mode 100644 index c9a6d7c3..00000000 --- a/lib/librte_ether/rte_mtr_driver.h +++ /dev/null @@ -1,192 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2017 Intel Corporation - */ - -#ifndef __INCLUDE_RTE_MTR_DRIVER_H__ -#define __INCLUDE_RTE_MTR_DRIVER_H__ - -/** - * @file - * RTE Generic Traffic Metering and Policing API (Driver Side) - * - * This file provides implementation helpers for internal use by PMDs, they - * are not intended to be exposed to applications and are not subject to ABI - * versioning. - */ - -#include - -#include -#include "rte_ethdev.h" -#include "rte_mtr.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef int (*rte_mtr_capabilities_get_t)(struct rte_eth_dev *dev, - struct rte_mtr_capabilities *cap, - struct rte_mtr_error *error); -/**< @internal MTR capabilities get */ - -typedef int (*rte_mtr_meter_profile_add_t)(struct rte_eth_dev *dev, - uint32_t meter_profile_id, - struct rte_mtr_meter_profile *profile, - struct rte_mtr_error *error); -/**< @internal MTR meter profile add */ - -typedef int (*rte_mtr_meter_profile_delete_t)(struct rte_eth_dev *dev, - uint32_t meter_profile_id, - struct rte_mtr_error *error); -/**< @internal MTR meter profile delete */ - -typedef int (*rte_mtr_create_t)(struct rte_eth_dev *dev, - uint32_t mtr_id, - struct rte_mtr_params *params, - int shared, - struct rte_mtr_error *error); -/**< @internal MTR object create */ - -typedef int (*rte_mtr_destroy_t)(struct rte_eth_dev *dev, - uint32_t mtr_id, - struct rte_mtr_error *error); -/**< @internal MTR object destroy */ - -typedef int (*rte_mtr_meter_enable_t)(struct rte_eth_dev *dev, - uint32_t mtr_id, - struct rte_mtr_error *error); -/**< @internal MTR object meter enable */ - -typedef int (*rte_mtr_meter_disable_t)(struct rte_eth_dev *dev, - uint32_t mtr_id, - struct rte_mtr_error *error); -/**< @internal MTR object meter disable */ - -typedef int (*rte_mtr_meter_profile_update_t)(struct rte_eth_dev *dev, - uint32_t mtr_id, - uint32_t meter_profile_id, - struct rte_mtr_error *error); -/**< @internal MTR object meter profile update */ - -typedef int (*rte_mtr_meter_dscp_table_update_t)(struct rte_eth_dev *dev, - uint32_t mtr_id, - enum rte_mtr_color *dscp_table, - struct rte_mtr_error *error); -/**< @internal MTR object meter DSCP table update */ - -typedef int (*rte_mtr_policer_actions_update_t)(struct rte_eth_dev *dev, - uint32_t mtr_id, - uint32_t action_mask, - enum rte_mtr_policer_action *actions, - struct rte_mtr_error *error); -/**< @internal MTR object policer action update*/ - -typedef int (*rte_mtr_stats_update_t)(struct rte_eth_dev *dev, - uint32_t mtr_id, - uint64_t stats_mask, - struct rte_mtr_error *error); -/**< @internal MTR object enabled stats update */ - -typedef int (*rte_mtr_stats_read_t)(struct rte_eth_dev *dev, - uint32_t mtr_id, - struct rte_mtr_stats *stats, - uint64_t *stats_mask, - int clear, - struct rte_mtr_error *error); -/**< @internal MTR object stats read */ - -struct rte_mtr_ops { - /** MTR capabilities get */ - rte_mtr_capabilities_get_t capabilities_get; - - /** MTR meter profile add */ - rte_mtr_meter_profile_add_t meter_profile_add; - - /** MTR meter profile delete */ - rte_mtr_meter_profile_delete_t meter_profile_delete; - - /** MTR object create */ - rte_mtr_create_t create; - - /** MTR object destroy */ - rte_mtr_destroy_t destroy; - - /** MTR object meter enable */ - rte_mtr_meter_enable_t meter_enable; - - /** MTR object meter disable */ - rte_mtr_meter_disable_t meter_disable; - - /** MTR object meter profile update */ - rte_mtr_meter_profile_update_t meter_profile_update; - - /** MTR object meter DSCP table update */ - rte_mtr_meter_dscp_table_update_t meter_dscp_table_update; - - /** MTR object policer action update */ - rte_mtr_policer_actions_update_t policer_actions_update; - - /** MTR object enabled stats update */ - rte_mtr_stats_update_t stats_update; - - /** MTR object stats read */ - rte_mtr_stats_read_t stats_read; -}; - -/** - * Initialize generic error structure. - * - * This function also sets rte_errno to a given value. - * - * @param[out] error - * Pointer to error structure (may be NULL). - * @param[in] code - * Related error code (rte_errno). - * @param[in] type - * Cause field and error type. - * @param[in] cause - * Object responsible for the error. - * @param[in] message - * Human-readable error message. - * - * @return - * Error code. - */ -static inline int -rte_mtr_error_set(struct rte_mtr_error *error, - int code, - enum rte_mtr_error_type type, - const void *cause, - const char *message) -{ - if (error) { - *error = (struct rte_mtr_error){ - .type = type, - .cause = cause, - .message = message, - }; - } - rte_errno = code; - return code; -} - -/** - * Get generic traffic metering and policing operations structure from a port - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[out] error - * Error details - * - * @return - * The traffic metering and policing operations structure associated with - * port_id on success, NULL otherwise. - */ -const struct rte_mtr_ops * -rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error); - -#ifdef __cplusplus -} -#endif - -#endif /* __INCLUDE_RTE_MTR_DRIVER_H__ */ diff --git a/lib/librte_ether/rte_tm.c b/lib/librte_ether/rte_tm.c deleted file mode 100644 index 9709454f..00000000 --- a/lib/librte_ether/rte_tm.c +++ /dev/null @@ -1,409 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2017 Intel Corporation - */ - -#include - -#include -#include "rte_ethdev.h" -#include "rte_tm_driver.h" -#include "rte_tm.h" - -/* Get generic traffic manager operations structure from a port. */ -const struct rte_tm_ops * -rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - const struct rte_tm_ops *ops; - - if (!rte_eth_dev_is_valid_port(port_id)) { - rte_tm_error_set(error, - ENODEV, - RTE_TM_ERROR_TYPE_UNSPECIFIED, - NULL, - rte_strerror(ENODEV)); - return NULL; - } - - if ((dev->dev_ops->tm_ops_get == NULL) || - (dev->dev_ops->tm_ops_get(dev, &ops) != 0) || - (ops == NULL)) { - rte_tm_error_set(error, - ENOSYS, - RTE_TM_ERROR_TYPE_UNSPECIFIED, - NULL, - rte_strerror(ENOSYS)); - return NULL; - } - - return ops; -} - -#define RTE_TM_FUNC(port_id, func) \ -({ \ - const struct rte_tm_ops *ops = \ - rte_tm_ops_get(port_id, error); \ - if (ops == NULL) \ - return -rte_errno; \ - \ - if (ops->func == NULL) \ - return -rte_tm_error_set(error, \ - ENOSYS, \ - RTE_TM_ERROR_TYPE_UNSPECIFIED, \ - NULL, \ - rte_strerror(ENOSYS)); \ - \ - ops->func; \ -}) - -/* Get number of leaf nodes */ -int -rte_tm_get_number_of_leaf_nodes(uint16_t port_id, - uint32_t *n_leaf_nodes, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - const struct rte_tm_ops *ops = - rte_tm_ops_get(port_id, error); - - if (ops == NULL) - return -rte_errno; - - if (n_leaf_nodes == NULL) { - rte_tm_error_set(error, - EINVAL, - RTE_TM_ERROR_TYPE_UNSPECIFIED, - NULL, - rte_strerror(EINVAL)); - return -rte_errno; - } - - *n_leaf_nodes = dev->data->nb_tx_queues; - return 0; -} - -/* Check node type (leaf or non-leaf) */ -int -rte_tm_node_type_get(uint16_t port_id, - uint32_t node_id, - int *is_leaf, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_type_get)(dev, - node_id, is_leaf, error); -} - -/* Get capabilities */ -int rte_tm_capabilities_get(uint16_t port_id, - struct rte_tm_capabilities *cap, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, capabilities_get)(dev, - cap, error); -} - -/* Get level capabilities */ -int rte_tm_level_capabilities_get(uint16_t port_id, - uint32_t level_id, - struct rte_tm_level_capabilities *cap, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, level_capabilities_get)(dev, - level_id, cap, error); -} - -/* Get node capabilities */ -int rte_tm_node_capabilities_get(uint16_t port_id, - uint32_t node_id, - struct rte_tm_node_capabilities *cap, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_capabilities_get)(dev, - node_id, cap, error); -} - -/* Add WRED profile */ -int rte_tm_wred_profile_add(uint16_t port_id, - uint32_t wred_profile_id, - struct rte_tm_wred_params *profile, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, wred_profile_add)(dev, - wred_profile_id, profile, error); -} - -/* Delete WRED profile */ -int rte_tm_wred_profile_delete(uint16_t port_id, - uint32_t wred_profile_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, wred_profile_delete)(dev, - wred_profile_id, error); -} - -/* Add/update shared WRED context */ -int rte_tm_shared_wred_context_add_update(uint16_t port_id, - uint32_t shared_wred_context_id, - uint32_t wred_profile_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, shared_wred_context_add_update)(dev, - shared_wred_context_id, wred_profile_id, error); -} - -/* Delete shared WRED context */ -int rte_tm_shared_wred_context_delete(uint16_t port_id, - uint32_t shared_wred_context_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, shared_wred_context_delete)(dev, - shared_wred_context_id, error); -} - -/* Add shaper profile */ -int rte_tm_shaper_profile_add(uint16_t port_id, - uint32_t shaper_profile_id, - struct rte_tm_shaper_params *profile, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, shaper_profile_add)(dev, - shaper_profile_id, profile, error); -} - -/* Delete WRED profile */ -int rte_tm_shaper_profile_delete(uint16_t port_id, - uint32_t shaper_profile_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, shaper_profile_delete)(dev, - shaper_profile_id, error); -} - -/* Add shared shaper */ -int rte_tm_shared_shaper_add_update(uint16_t port_id, - uint32_t shared_shaper_id, - uint32_t shaper_profile_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, shared_shaper_add_update)(dev, - shared_shaper_id, shaper_profile_id, error); -} - -/* Delete shared shaper */ -int rte_tm_shared_shaper_delete(uint16_t port_id, - uint32_t shared_shaper_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, shared_shaper_delete)(dev, - shared_shaper_id, error); -} - -/* Add node to port traffic manager hierarchy */ -int rte_tm_node_add(uint16_t port_id, - uint32_t node_id, - uint32_t parent_node_id, - uint32_t priority, - uint32_t weight, - uint32_t level_id, - struct rte_tm_node_params *params, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_add)(dev, - node_id, parent_node_id, priority, weight, level_id, - params, error); -} - -/* Delete node from traffic manager hierarchy */ -int rte_tm_node_delete(uint16_t port_id, - uint32_t node_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_delete)(dev, - node_id, error); -} - -/* Suspend node */ -int rte_tm_node_suspend(uint16_t port_id, - uint32_t node_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_suspend)(dev, - node_id, error); -} - -/* Resume node */ -int rte_tm_node_resume(uint16_t port_id, - uint32_t node_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_resume)(dev, - node_id, error); -} - -/* Commit the initial port traffic manager hierarchy */ -int rte_tm_hierarchy_commit(uint16_t port_id, - int clear_on_fail, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, hierarchy_commit)(dev, - clear_on_fail, error); -} - -/* Update node parent */ -int rte_tm_node_parent_update(uint16_t port_id, - uint32_t node_id, - uint32_t parent_node_id, - uint32_t priority, - uint32_t weight, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_parent_update)(dev, - node_id, parent_node_id, priority, weight, error); -} - -/* Update node private shaper */ -int rte_tm_node_shaper_update(uint16_t port_id, - uint32_t node_id, - uint32_t shaper_profile_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_shaper_update)(dev, - node_id, shaper_profile_id, error); -} - -/* Update node shared shapers */ -int rte_tm_node_shared_shaper_update(uint16_t port_id, - uint32_t node_id, - uint32_t shared_shaper_id, - int add, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_shared_shaper_update)(dev, - node_id, shared_shaper_id, add, error); -} - -/* Update node stats */ -int rte_tm_node_stats_update(uint16_t port_id, - uint32_t node_id, - uint64_t stats_mask, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_stats_update)(dev, - node_id, stats_mask, error); -} - -/* Update WFQ weight mode */ -int rte_tm_node_wfq_weight_mode_update(uint16_t port_id, - uint32_t node_id, - int *wfq_weight_mode, - uint32_t n_sp_priorities, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_wfq_weight_mode_update)(dev, - node_id, wfq_weight_mode, n_sp_priorities, error); -} - -/* Update node congestion management mode */ -int rte_tm_node_cman_update(uint16_t port_id, - uint32_t node_id, - enum rte_tm_cman_mode cman, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_cman_update)(dev, - node_id, cman, error); -} - -/* Update node private WRED context */ -int rte_tm_node_wred_context_update(uint16_t port_id, - uint32_t node_id, - uint32_t wred_profile_id, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_wred_context_update)(dev, - node_id, wred_profile_id, error); -} - -/* Update node shared WRED context */ -int rte_tm_node_shared_wred_context_update(uint16_t port_id, - uint32_t node_id, - uint32_t shared_wred_context_id, - int add, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_shared_wred_context_update)(dev, - node_id, shared_wred_context_id, add, error); -} - -/* Read and/or clear stats counters for specific node */ -int rte_tm_node_stats_read(uint16_t port_id, - uint32_t node_id, - struct rte_tm_node_stats *stats, - uint64_t *stats_mask, - int clear, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, node_stats_read)(dev, - node_id, stats, stats_mask, clear, error); -} - -/* Packet marking - VLAN DEI */ -int rte_tm_mark_vlan_dei(uint16_t port_id, - int mark_green, - int mark_yellow, - int mark_red, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, mark_vlan_dei)(dev, - mark_green, mark_yellow, mark_red, error); -} - -/* Packet marking - IPv4/IPv6 ECN */ -int rte_tm_mark_ip_ecn(uint16_t port_id, - int mark_green, - int mark_yellow, - int mark_red, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, mark_ip_ecn)(dev, - mark_green, mark_yellow, mark_red, error); -} - -/* Packet marking - IPv4/IPv6 DSCP */ -int rte_tm_mark_ip_dscp(uint16_t port_id, - int mark_green, - int mark_yellow, - int mark_red, - struct rte_tm_error *error) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - return RTE_TM_FUNC(port_id, mark_ip_dscp)(dev, - mark_green, mark_yellow, mark_red, error); -} diff --git a/lib/librte_ether/rte_tm.h b/lib/librte_ether/rte_tm.h deleted file mode 100644 index 2b25a871..00000000 --- a/lib/librte_ether/rte_tm.h +++ /dev/null @@ -1,1912 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Intel Corporation. - * Copyright(c) 2017 Cavium. - * Copyright(c) 2017 NXP. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __INCLUDE_RTE_TM_H__ -#define __INCLUDE_RTE_TM_H__ - -/** - * @file - * RTE Generic Traffic Manager API - * - * This interface provides the ability to configure the traffic manager in a - * generic way. It includes features such as: hierarchical scheduling, - * traffic shaping, congestion management, packet marking, etc. - * - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - */ - -#include - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Ethernet framing overhead. - * - * Overhead fields per Ethernet frame: - * 1. Preamble: 7 bytes; - * 2. Start of Frame Delimiter (SFD): 1 byte; - * 3. Inter-Frame Gap (IFG): 12 bytes. - * - * One of the typical values for the *pkt_length_adjust* field of the shaper - * profile. - * - * @see struct rte_tm_shaper_params - */ -#define RTE_TM_ETH_FRAMING_OVERHEAD 20 - -/** - * Ethernet framing overhead including the Frame Check Sequence (FCS) field. - * Useful when FCS is generated and added at the end of the Ethernet frame on - * TX side without any SW intervention. - * - * One of the typical values for the pkt_length_adjust field of the shaper - * profile. - * - * @see struct rte_tm_shaper_params - */ -#define RTE_TM_ETH_FRAMING_OVERHEAD_FCS 24 - -/** - * Invalid WRED profile ID. - * - * @see struct rte_tm_node_params - * @see rte_tm_node_add() - * @see rte_tm_node_wred_context_update() - */ -#define RTE_TM_WRED_PROFILE_ID_NONE UINT32_MAX - -/** - *Invalid shaper profile ID. - * - * @see struct rte_tm_node_params - * @see rte_tm_node_add() - * @see rte_tm_node_shaper_update() - */ -#define RTE_TM_SHAPER_PROFILE_ID_NONE UINT32_MAX - -/** - * Node ID for the parent of the root node. - * - * @see rte_tm_node_add() - */ -#define RTE_TM_NODE_ID_NULL UINT32_MAX - -/** - * Node level ID used to disable level ID checking. - * - * @see rte_tm_node_add() - */ -#define RTE_TM_NODE_LEVEL_ID_ANY UINT32_MAX - -/** - * Color - */ -enum rte_tm_color { - RTE_TM_GREEN = 0, /**< Green */ - RTE_TM_YELLOW, /**< Yellow */ - RTE_TM_RED, /**< Red */ - RTE_TM_COLORS /**< Number of colors */ -}; - -/** - * Node statistics counter type - */ -enum rte_tm_stats_type { - /** Number of packets scheduled from current node. */ - RTE_TM_STATS_N_PKTS = 1 << 0, - - /** Number of bytes scheduled from current node. */ - RTE_TM_STATS_N_BYTES = 1 << 1, - - /** Number of green packets dropped by current leaf node. */ - RTE_TM_STATS_N_PKTS_GREEN_DROPPED = 1 << 2, - - /** Number of yellow packets dropped by current leaf node. */ - RTE_TM_STATS_N_PKTS_YELLOW_DROPPED = 1 << 3, - - /** Number of red packets dropped by current leaf node. */ - RTE_TM_STATS_N_PKTS_RED_DROPPED = 1 << 4, - - /** Number of green bytes dropped by current leaf node. */ - RTE_TM_STATS_N_BYTES_GREEN_DROPPED = 1 << 5, - - /** Number of yellow bytes dropped by current leaf node. */ - RTE_TM_STATS_N_BYTES_YELLOW_DROPPED = 1 << 6, - - /** Number of red bytes dropped by current leaf node. */ - RTE_TM_STATS_N_BYTES_RED_DROPPED = 1 << 7, - - /** Number of packets currently waiting in the packet queue of current - * leaf node. - */ - RTE_TM_STATS_N_PKTS_QUEUED = 1 << 8, - - /** Number of bytes currently waiting in the packet queue of current - * leaf node. - */ - RTE_TM_STATS_N_BYTES_QUEUED = 1 << 9, -}; - -/** - * Node statistics counters - */ -struct rte_tm_node_stats { - /** Number of packets scheduled from current node. */ - uint64_t n_pkts; - - /** Number of bytes scheduled from current node. */ - uint64_t n_bytes; - - /** Statistics counters for leaf nodes only. */ - struct { - /** Number of packets dropped by current leaf node per each - * color. - */ - uint64_t n_pkts_dropped[RTE_TM_COLORS]; - - /** Number of bytes dropped by current leaf node per each - * color. - */ - uint64_t n_bytes_dropped[RTE_TM_COLORS]; - - /** Number of packets currently waiting in the packet queue of - * current leaf node. - */ - uint64_t n_pkts_queued; - - /** Number of bytes currently waiting in the packet queue of - * current leaf node. - */ - uint64_t n_bytes_queued; - } leaf; -}; - -/** - * Traffic manager dynamic updates - */ -enum rte_tm_dynamic_update_type { - /** Dynamic parent node update. The new parent node is located on same - * hierarchy level as the former parent node. Consequently, the node - * whose parent is changed preserves its hierarchy level. - */ - RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL = 1 << 0, - - /** Dynamic parent node update. The new parent node is located on - * different hierarchy level than the former parent node. Consequently, - * the node whose parent is changed also changes its hierarchy level. - */ - RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL = 1 << 1, - - /** Dynamic node add/delete. */ - RTE_TM_UPDATE_NODE_ADD_DELETE = 1 << 2, - - /** Suspend/resume nodes. */ - RTE_TM_UPDATE_NODE_SUSPEND_RESUME = 1 << 3, - - /** Dynamic switch between byte-based and packet-based WFQ weights. */ - RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE = 1 << 4, - - /** Dynamic update on number of SP priorities. */ - RTE_TM_UPDATE_NODE_N_SP_PRIORITIES = 1 << 5, - - /** Dynamic update of congestion management mode for leaf nodes. */ - RTE_TM_UPDATE_NODE_CMAN = 1 << 6, - - /** Dynamic update of the set of enabled stats counter types. */ - RTE_TM_UPDATE_NODE_STATS = 1 << 7, -}; - -/** - * Traffic manager capabilities - */ -struct rte_tm_capabilities { - /** Maximum number of nodes. */ - uint32_t n_nodes_max; - - /** Maximum number of levels (i.e. number of nodes connecting the root - * node with any leaf node, including the root and the leaf). - */ - uint32_t n_levels_max; - - /** When non-zero, this flag indicates that all the non-leaf nodes - * (with the exception of the root node) have identical capability set. - */ - int non_leaf_nodes_identical; - - /** When non-zero, this flag indicates that all the leaf nodes have - * identical capability set. - */ - int leaf_nodes_identical; - - /** Maximum number of shapers, either private or shared. In case the - * implementation does not share any resources between private and - * shared shapers, it is typically equal to the sum of - * *shaper_private_n_max* and *shaper_shared_n_max*. The - * value of zero indicates that traffic shaping is not supported. - */ - uint32_t shaper_n_max; - - /** Maximum number of private shapers. Indicates the maximum number of - * nodes that can concurrently have their private shaper enabled. The - * value of zero indicates that private shapers are not supported. - */ - uint32_t shaper_private_n_max; - - /** Maximum number of private shapers that support dual rate shaping. - * Indicates the maximum number of nodes that can concurrently have - * their private shaper enabled with dual rate support. Only valid when - * private shapers are supported. The value of zero indicates that dual - * rate shaping is not available for private shapers. The maximum value - * is *shaper_private_n_max*. - */ - int shaper_private_dual_rate_n_max; - - /** Minimum committed/peak rate (bytes per second) for any private - * shaper. Valid only when private shapers are supported. - */ - uint64_t shaper_private_rate_min; - - /** Maximum committed/peak rate (bytes per second) for any private - * shaper. Valid only when private shapers are supported. - */ - uint64_t shaper_private_rate_max; - - /** Maximum number of shared shapers. The value of zero indicates that - * shared shapers are not supported. - */ - uint32_t shaper_shared_n_max; - - /** Maximum number of nodes that can share the same shared shaper. - * Only valid when shared shapers are supported. - */ - uint32_t shaper_shared_n_nodes_per_shaper_max; - - /** Maximum number of shared shapers a node can be part of. This - * parameter indicates that there is at least one node that can be - * configured with this many shared shapers, which might not be true for - * all the nodes. Only valid when shared shapers are supported, in which - * case it ranges from 1 to *shaper_shared_n_max*. - */ - uint32_t shaper_shared_n_shapers_per_node_max; - - /** Maximum number of shared shapers that can be configured with dual - * rate shaping. The value of zero indicates that dual rate shaping - * support is not available for shared shapers. - */ - uint32_t shaper_shared_dual_rate_n_max; - - /** Minimum committed/peak rate (bytes per second) for any shared - * shaper. Only valid when shared shapers are supported. - */ - uint64_t shaper_shared_rate_min; - - /** Maximum committed/peak rate (bytes per second) for any shared - * shaper. Only valid when shared shapers are supported. - */ - uint64_t shaper_shared_rate_max; - - /** Minimum value allowed for packet length adjustment for any private - * or shared shaper. - */ - int shaper_pkt_length_adjust_min; - - /** Maximum value allowed for packet length adjustment for any private - * or shared shaper. - */ - int shaper_pkt_length_adjust_max; - - /** Maximum number of children nodes. This parameter indicates that - * there is at least one non-leaf node that can be configured with this - * many children nodes, which might not be true for all the non-leaf - * nodes. - */ - uint32_t sched_n_children_max; - - /** Maximum number of supported priority levels. This parameter - * indicates that there is at least one non-leaf node that can be - * configured with this many priority levels for managing its children - * nodes, which might not be true for all the non-leaf nodes. The value - * of zero is invalid. The value of 1 indicates that only priority 0 is - * supported, which essentially means that Strict Priority (SP) - * algorithm is not supported. - */ - uint32_t sched_sp_n_priorities_max; - - /** Maximum number of sibling nodes that can have the same priority at - * any given time, i.e. maximum size of the WFQ sibling node group. This - * parameter indicates there is at least one non-leaf node that meets - * this condition, which might not be true for all the non-leaf nodes. - * The value of zero is invalid. The value of 1 indicates that WFQ - * algorithm is not supported. The maximum value is - * *sched_n_children_max*. - */ - uint32_t sched_wfq_n_children_per_group_max; - - /** Maximum number of priority levels that can have more than one child - * node at any given time, i.e. maximum number of WFQ sibling node - * groups that have two or more members. This parameter indicates there - * is at least one non-leaf node that meets this condition, which might - * not be true for all the non-leaf nodes. The value of zero states that - * WFQ algorithm is not supported. The value of 1 indicates that - * (*sched_sp_n_priorities_max* - 1) priority levels have at most one - * child node, so there can be only one priority level with two or - * more sibling nodes making up a WFQ group. The maximum value is: - * min(floor(*sched_n_children_max* / 2), *sched_sp_n_priorities_max*). - */ - uint32_t sched_wfq_n_groups_max; - - /** Maximum WFQ weight. The value of 1 indicates that all sibling nodes - * with same priority have the same WFQ weight, so WFQ is reduced to FQ. - */ - uint32_t sched_wfq_weight_max; - - /** Head drop algorithm support. When non-zero, this parameter - * indicates that there is at least one leaf node that supports the head - * drop algorithm, which might not be true for all the leaf nodes. - */ - int cman_head_drop_supported; - - /** Maximum number of WRED contexts, either private or shared. In case - * the implementation does not share any resources between private and - * shared WRED contexts, it is typically equal to the sum of - * *cman_wred_context_private_n_max* and - * *cman_wred_context_shared_n_max*. The value of zero indicates that - * WRED is not supported. - */ - uint32_t cman_wred_context_n_max; - - /** Maximum number of private WRED contexts. Indicates the maximum - * number of leaf nodes that can concurrently have their private WRED - * context enabled. The value of zero indicates that private WRED - * contexts are not supported. - */ - uint32_t cman_wred_context_private_n_max; - - /** Maximum number of shared WRED contexts. The value of zero - * indicates that shared WRED contexts are not supported. - */ - uint32_t cman_wred_context_shared_n_max; - - /** Maximum number of leaf nodes that can share the same WRED context. - * Only valid when shared WRED contexts are supported. - */ - uint32_t cman_wred_context_shared_n_nodes_per_context_max; - - /** Maximum number of shared WRED contexts a leaf node can be part of. - * This parameter indicates that there is at least one leaf node that - * can be configured with this many shared WRED contexts, which might - * not be true for all the leaf nodes. Only valid when shared WRED - * contexts are supported, in which case it ranges from 1 to - * *cman_wred_context_shared_n_max*. - */ - uint32_t cman_wred_context_shared_n_contexts_per_node_max; - - /** Support for VLAN DEI packet marking (per color). */ - int mark_vlan_dei_supported[RTE_TM_COLORS]; - - /** Support for IPv4/IPv6 ECN marking of TCP packets (per color). */ - int mark_ip_ecn_tcp_supported[RTE_TM_COLORS]; - - /** Support for IPv4/IPv6 ECN marking of SCTP packets (per color). */ - int mark_ip_ecn_sctp_supported[RTE_TM_COLORS]; - - /** Support for IPv4/IPv6 DSCP packet marking (per color). */ - int mark_ip_dscp_supported[RTE_TM_COLORS]; - - /** Set of supported dynamic update operations. - * @see enum rte_tm_dynamic_update_type - */ - uint64_t dynamic_update_mask; - - /** Set of supported statistics counter types. - * @see enum rte_tm_stats_type - */ - uint64_t stats_mask; -}; - -/** - * Traffic manager level capabilities - */ -struct rte_tm_level_capabilities { - /** Maximum number of nodes for the current hierarchy level. */ - uint32_t n_nodes_max; - - /** Maximum number of non-leaf nodes for the current hierarchy level. - * The value of 0 indicates that current level only supports leaf - * nodes. The maximum value is *n_nodes_max*. - */ - uint32_t n_nodes_nonleaf_max; - - /** Maximum number of leaf nodes for the current hierarchy level. The - * value of 0 indicates that current level only supports non-leaf - * nodes. The maximum value is *n_nodes_max*. - */ - uint32_t n_nodes_leaf_max; - - /** When non-zero, this flag indicates that all the non-leaf nodes on - * this level have identical capability set. Valid only when - * *n_nodes_nonleaf_max* is non-zero. - */ - int non_leaf_nodes_identical; - - /** When non-zero, this flag indicates that all the leaf nodes on this - * level have identical capability set. Valid only when - * *n_nodes_leaf_max* is non-zero. - */ - int leaf_nodes_identical; - - RTE_STD_C11 - union { - /** Items valid only for the non-leaf nodes on this level. */ - struct { - /** Private shaper support. When non-zero, it indicates - * there is at least one non-leaf node on this level - * with private shaper support, which may not be the - * case for all the non-leaf nodes on this level. - */ - int shaper_private_supported; - - /** Dual rate support for private shaper. Valid only - * when private shaper is supported for the non-leaf - * nodes on the current level. When non-zero, it - * indicates there is at least one non-leaf node on this - * level with dual rate private shaper support, which - * may not be the case for all the non-leaf nodes on - * this level. - */ - int shaper_private_dual_rate_supported; - - /** Minimum committed/peak rate (bytes per second) for - * private shapers of the non-leaf nodes of this level. - * Valid only when private shaper is supported on this - * level. - */ - uint64_t shaper_private_rate_min; - - /** Maximum committed/peak rate (bytes per second) for - * private shapers of the non-leaf nodes on this level. - * Valid only when private shaper is supported on this - * level. - */ - uint64_t shaper_private_rate_max; - - /** Maximum number of shared shapers that any non-leaf - * node on this level can be part of. The value of zero - * indicates that shared shapers are not supported by - * the non-leaf nodes on this level. When non-zero, it - * indicates there is at least one non-leaf node on this - * level that meets this condition, which may not be the - * case for all the non-leaf nodes on this level. - */ - uint32_t shaper_shared_n_max; - - /** Maximum number of children nodes. This parameter - * indicates that there is at least one non-leaf node on - * this level that can be configured with this many - * children nodes, which might not be true for all the - * non-leaf nodes on this level. - */ - uint32_t sched_n_children_max; - - /** Maximum number of supported priority levels. This - * parameter indicates that there is at least one - * non-leaf node on this level that can be configured - * with this many priority levels for managing its - * children nodes, which might not be true for all the - * non-leaf nodes on this level. The value of zero is - * invalid. The value of 1 indicates that only priority - * 0 is supported, which essentially means that Strict - * Priority (SP) algorithm is not supported on this - * level. - */ - uint32_t sched_sp_n_priorities_max; - - /** Maximum number of sibling nodes that can have the - * same priority at any given time, i.e. maximum size of - * the WFQ sibling node group. This parameter indicates - * there is at least one non-leaf node on this level - * that meets this condition, which may not be true for - * all the non-leaf nodes on this level. The value of - * zero is invalid. The value of 1 indicates that WFQ - * algorithm is not supported on this level. The maximum - * value is *sched_n_children_max*. - */ - uint32_t sched_wfq_n_children_per_group_max; - - /** Maximum number of priority levels that can have - * more than one child node at any given time, i.e. - * maximum number of WFQ sibling node groups that - * have two or more members. This parameter indicates - * there is at least one non-leaf node on this level - * that meets this condition, which might not be true - * for all the non-leaf nodes. The value of zero states - * that WFQ algorithm is not supported on this level. - * The value of 1 indicates that - * (*sched_sp_n_priorities_max* - 1) priority levels on - * this level have at most one child node, so there can - * be only one priority level with two or more sibling - * nodes making up a WFQ group on this level. The - * maximum value is: - * min(floor(*sched_n_children_max* / 2), - * *sched_sp_n_priorities_max*). - */ - uint32_t sched_wfq_n_groups_max; - - /** Maximum WFQ weight. The value of 1 indicates that - * all sibling nodes on this level with same priority - * have the same WFQ weight, so on this level WFQ is - * reduced to FQ. - */ - uint32_t sched_wfq_weight_max; - - /** Mask of statistics counter types supported by the - * non-leaf nodes on this level. Every supported - * statistics counter type is supported by at least one - * non-leaf node on this level, which may not be true - * for all the non-leaf nodes on this level. - * @see enum rte_tm_stats_type - */ - uint64_t stats_mask; - } nonleaf; - - /** Items valid only for the leaf nodes on this level. */ - struct { - /** Private shaper support. When non-zero, it indicates - * there is at least one leaf node on this level with - * private shaper support, which may not be the case for - * all the leaf nodes on this level. - */ - int shaper_private_supported; - - /** Dual rate support for private shaper. Valid only - * when private shaper is supported for the leaf nodes - * on this level. When non-zero, it indicates there is - * at least one leaf node on this level with dual rate - * private shaper support, which may not be the case for - * all the leaf nodes on this level. - */ - int shaper_private_dual_rate_supported; - - /** Minimum committed/peak rate (bytes per second) for - * private shapers of the leaf nodes of this level. - * Valid only when private shaper is supported for the - * leaf nodes on this level. - */ - uint64_t shaper_private_rate_min; - - /** Maximum committed/peak rate (bytes per second) for - * private shapers of the leaf nodes on this level. - * Valid only when private shaper is supported for the - * leaf nodes on this level. - */ - uint64_t shaper_private_rate_max; - - /** Maximum number of shared shapers that any leaf node - * on this level can be part of. The value of zero - * indicates that shared shapers are not supported by - * the leaf nodes on this level. When non-zero, it - * indicates there is at least one leaf node on this - * level that meets this condition, which may not be the - * case for all the leaf nodes on this level. - */ - uint32_t shaper_shared_n_max; - - /** Head drop algorithm support. When non-zero, this - * parameter indicates that there is at least one leaf - * node on this level that supports the head drop - * algorithm, which might not be true for all the leaf - * nodes on this level. - */ - int cman_head_drop_supported; - - /** Private WRED context support. When non-zero, it - * indicates there is at least one node on this level - * with private WRED context support, which may not be - * true for all the leaf nodes on this level. - */ - int cman_wred_context_private_supported; - - /** Maximum number of shared WRED contexts that any - * leaf node on this level can be part of. The value of - * zero indicates that shared WRED contexts are not - * supported by the leaf nodes on this level. When - * non-zero, it indicates there is at least one leaf - * node on this level that meets this condition, which - * may not be the case for all the leaf nodes on this - * level. - */ - uint32_t cman_wred_context_shared_n_max; - - /** Mask of statistics counter types supported by the - * leaf nodes on this level. Every supported statistics - * counter type is supported by at least one leaf node - * on this level, which may not be true for all the leaf - * nodes on this level. - * @see enum rte_tm_stats_type - */ - uint64_t stats_mask; - } leaf; - }; -}; - -/** - * Traffic manager node capabilities - */ -struct rte_tm_node_capabilities { - /** Private shaper support for the current node. */ - int shaper_private_supported; - - /** Dual rate shaping support for private shaper of current node. - * Valid only when private shaper is supported by the current node. - */ - int shaper_private_dual_rate_supported; - - /** Minimum committed/peak rate (bytes per second) for private - * shaper of current node. Valid only when private shaper is supported - * by the current node. - */ - uint64_t shaper_private_rate_min; - - /** Maximum committed/peak rate (bytes per second) for private - * shaper of current node. Valid only when private shaper is supported - * by the current node. - */ - uint64_t shaper_private_rate_max; - - /** Maximum number of shared shapers the current node can be part of. - * The value of zero indicates that shared shapers are not supported by - * the current node. - */ - uint32_t shaper_shared_n_max; - - RTE_STD_C11 - union { - /** Items valid only for non-leaf nodes. */ - struct { - /** Maximum number of children nodes. */ - uint32_t sched_n_children_max; - - /** Maximum number of supported priority levels. The - * value of zero is invalid. The value of 1 indicates - * that only priority 0 is supported, which essentially - * means that Strict Priority (SP) algorithm is not - * supported. - */ - uint32_t sched_sp_n_priorities_max; - - /** Maximum number of sibling nodes that can have the - * same priority at any given time, i.e. maximum size - * of the WFQ sibling node group. The value of zero - * is invalid. The value of 1 indicates that WFQ - * algorithm is not supported. The maximum value is - * *sched_n_children_max*. - */ - uint32_t sched_wfq_n_children_per_group_max; - - /** Maximum number of priority levels that can have - * more than one child node at any given time, i.e. - * maximum number of WFQ sibling node groups that have - * two or more members. The value of zero states that - * WFQ algorithm is not supported. The value of 1 - * indicates that (*sched_sp_n_priorities_max* - 1) - * priority levels have at most one child node, so there - * can be only one priority level with two or more - * sibling nodes making up a WFQ group. The maximum - * value is: min(floor(*sched_n_children_max* / 2), - * *sched_sp_n_priorities_max*). - */ - uint32_t sched_wfq_n_groups_max; - - /** Maximum WFQ weight. The value of 1 indicates that - * all sibling nodes with same priority have the same - * WFQ weight, so WFQ is reduced to FQ. - */ - uint32_t sched_wfq_weight_max; - } nonleaf; - - /** Items valid only for leaf nodes. */ - struct { - /** Head drop algorithm support for current node. */ - int cman_head_drop_supported; - - /** Private WRED context support for current node. */ - int cman_wred_context_private_supported; - - /** Maximum number of shared WRED contexts the current - * node can be part of. The value of zero indicates that - * shared WRED contexts are not supported by the current - * node. - */ - uint32_t cman_wred_context_shared_n_max; - } leaf; - }; - - /** Mask of statistics counter types supported by the current node. - * @see enum rte_tm_stats_type - */ - uint64_t stats_mask; -}; - -/** - * Congestion management (CMAN) mode - * - * This is used for controlling the admission of packets into a packet queue or - * group of packet queues on congestion. On request of writing a new packet - * into the current queue while the queue is full, the *tail drop* algorithm - * drops the new packet while leaving the queue unmodified, as opposed to *head - * drop* algorithm, which drops the packet at the head of the queue (the oldest - * packet waiting in the queue) and admits the new packet at the tail of the - * queue. - * - * The *Random Early Detection (RED)* algorithm works by proactively dropping - * more and more input packets as the queue occupancy builds up. When the queue - * is full or almost full, RED effectively works as *tail drop*. The *Weighted - * RED* algorithm uses a separate set of RED thresholds for each packet color. - */ -enum rte_tm_cman_mode { - RTE_TM_CMAN_TAIL_DROP = 0, /**< Tail drop */ - RTE_TM_CMAN_HEAD_DROP, /**< Head drop */ - RTE_TM_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */ -}; - -/** - * Random Early Detection (RED) profile - */ -struct rte_tm_red_params { - /** Minimum queue threshold */ - uint16_t min_th; - - /** Maximum queue threshold */ - uint16_t max_th; - - /** Inverse of packet marking probability maximum value (maxp), i.e. - * maxp_inv = 1 / maxp - */ - uint16_t maxp_inv; - - /** Negated log2 of queue weight (wq), i.e. wq = 1 / (2 ^ wq_log2) */ - uint16_t wq_log2; -}; - -/** - * Weighted RED (WRED) profile - * - * Multiple WRED contexts can share the same WRED profile. Each leaf node with - * WRED enabled as its congestion management mode has zero or one private WRED - * context (only one leaf node using it) and/or zero, one or several shared - * WRED contexts (multiple leaf nodes use the same WRED context). A private - * WRED context is used to perform congestion management for a single leaf - * node, while a shared WRED context is used to perform congestion management - * for a group of leaf nodes. - */ -struct rte_tm_wred_params { - /** One set of RED parameters per packet color */ - struct rte_tm_red_params red_params[RTE_TM_COLORS]; -}; - -/** - * Token bucket - */ -struct rte_tm_token_bucket { - /** Token bucket rate (bytes per second) */ - uint64_t rate; - - /** Token bucket size (bytes), a.k.a. max burst size */ - uint64_t size; -}; - -/** - * Shaper (rate limiter) profile - * - * Multiple shaper instances can share the same shaper profile. Each node has - * zero or one private shaper (only one node using it) and/or zero, one or - * several shared shapers (multiple nodes use the same shaper instance). - * A private shaper is used to perform traffic shaping for a single node, while - * a shared shaper is used to perform traffic shaping for a group of nodes. - * - * Single rate shapers use a single token bucket. A single rate shaper can be - * configured by setting the rate of the committed bucket to zero, which - * effectively disables this bucket. The peak bucket is used to limit the rate - * and the burst size for the current shaper. - * - * Dual rate shapers use both the committed and the peak token buckets. The - * rate of the peak bucket has to be bigger than zero, as well as greater than - * or equal to the rate of the committed bucket. - */ -struct rte_tm_shaper_params { - /** Committed token bucket */ - struct rte_tm_token_bucket committed; - - /** Peak token bucket */ - struct rte_tm_token_bucket peak; - - /** Signed value to be added to the length of each packet for the - * purpose of shaping. Can be used to correct the packet length with - * the framing overhead bytes that are also consumed on the wire (e.g. - * RTE_TM_ETH_FRAMING_OVERHEAD_FCS). - */ - int32_t pkt_length_adjust; -}; - -/** - * Node parameters - * - * Each non-leaf node has multiple inputs (its children nodes) and single output - * (which is input to its parent node). It arbitrates its inputs using Strict - * Priority (SP) and Weighted Fair Queuing (WFQ) algorithms to schedule input - * packets to its output while observing its shaping (rate limiting) - * constraints. - * - * Algorithms such as Weighted Round Robin (WRR), Byte-level WRR, Deficit WRR - * (DWRR), etc. are considered approximations of the WFQ ideal and are - * assimilated to WFQ, although an associated implementation-dependent trade-off - * on accuracy, performance and resource usage might exist. - * - * Children nodes with different priorities are scheduled using the SP algorithm - * based on their priority, with zero (0) as the highest priority. Children with - * the same priority are scheduled using the WFQ algorithm according to their - * weights. The WFQ weight of a given child node is relative to the sum of the - * weights of all its sibling nodes that have the same priority, with one (1) as - * the lowest weight. For each SP priority, the WFQ weight mode can be set as - * either byte-based or packet-based. - * - * Each leaf node sits on top of a TX queue of the current Ethernet port. Hence, - * the leaf nodes are predefined, with their node IDs set to 0 .. (N-1), where N - * is the number of TX queues configured for the current Ethernet port. The - * non-leaf nodes have their IDs generated by the application. - */ -struct rte_tm_node_params { - /** Shaper profile for the private shaper. The absence of the private - * shaper for the current node is indicated by setting this parameter - * to RTE_TM_SHAPER_PROFILE_ID_NONE. - */ - uint32_t shaper_profile_id; - - /** User allocated array of valid shared shaper IDs. */ - uint32_t *shared_shaper_id; - - /** Number of shared shaper IDs in the *shared_shaper_id* array. */ - uint32_t n_shared_shapers; - - RTE_STD_C11 - union { - /** Parameters only valid for non-leaf nodes. */ - struct { - /** WFQ weight mode for each SP priority. When NULL, it - * indicates that WFQ is to be used for all priorities. - * When non-NULL, it points to a pre-allocated array of - * *n_sp_priorities* values, with non-zero value for - * byte-mode and zero for packet-mode. - */ - int *wfq_weight_mode; - - /** Number of SP priorities. */ - uint32_t n_sp_priorities; - } nonleaf; - - /** Parameters only valid for leaf nodes. */ - struct { - /** Congestion management mode */ - enum rte_tm_cman_mode cman; - - /** WRED parameters (only valid when *cman* is set to - * WRED). - */ - struct { - /** WRED profile for private WRED context. The - * absence of a private WRED context for the - * current leaf node is indicated by value - * RTE_TM_WRED_PROFILE_ID_NONE. - */ - uint32_t wred_profile_id; - - /** User allocated array of shared WRED context - * IDs. When set to NULL, it indicates that the - * current leaf node should not currently be - * part of any shared WRED contexts. - */ - uint32_t *shared_wred_context_id; - - /** Number of elements in the - * *shared_wred_context_id* array. Only valid - * when *shared_wred_context_id* is non-NULL, - * in which case it should be non-zero. - */ - uint32_t n_shared_wred_contexts; - } wred; - } leaf; - }; - - /** Mask of statistics counter types to be enabled for this node. This - * needs to be a subset of the statistics counter types available for - * the current node. Any statistics counter type not included in this - * set is to be disabled for the current node. - * @see enum rte_tm_stats_type - */ - uint64_t stats_mask; -}; - -/** - * Verbose error types. - * - * Most of them provide the type of the object referenced by struct - * rte_tm_error::cause. - */ -enum rte_tm_error_type { - RTE_TM_ERROR_TYPE_NONE, /**< No error. */ - RTE_TM_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ - RTE_TM_ERROR_TYPE_CAPABILITIES, - RTE_TM_ERROR_TYPE_LEVEL_ID, - RTE_TM_ERROR_TYPE_WRED_PROFILE, - RTE_TM_ERROR_TYPE_WRED_PROFILE_GREEN, - RTE_TM_ERROR_TYPE_WRED_PROFILE_YELLOW, - RTE_TM_ERROR_TYPE_WRED_PROFILE_RED, - RTE_TM_ERROR_TYPE_WRED_PROFILE_ID, - RTE_TM_ERROR_TYPE_SHARED_WRED_CONTEXT_ID, - RTE_TM_ERROR_TYPE_SHAPER_PROFILE, - RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_RATE, - RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE, - RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_RATE, - RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE, - RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN, - RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID, - RTE_TM_ERROR_TYPE_SHARED_SHAPER_ID, - RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID, - RTE_TM_ERROR_TYPE_NODE_PRIORITY, - RTE_TM_ERROR_TYPE_NODE_WEIGHT, - RTE_TM_ERROR_TYPE_NODE_PARAMS, - RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID, - RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID, - RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS, - RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE, - RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES, - RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN, - RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID, - RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID, - RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS, - RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS, - RTE_TM_ERROR_TYPE_NODE_ID, -}; - -/** - * Verbose error structure definition. - * - * This object is normally allocated by applications and set by PMDs, the - * message points to a constant string which does not need to be freed by - * the application, however its pointer can be considered valid only as long - * as its associated DPDK port remains configured. Closing the underlying - * device or unloading the PMD invalidates it. - * - * Both cause and message may be NULL regardless of the error type. - */ -struct rte_tm_error { - enum rte_tm_error_type type; /**< Cause field and error type. */ - const void *cause; /**< Object responsible for the error. */ - const char *message; /**< Human-readable error message. */ -}; - -/** - * Traffic manager get number of leaf nodes - * - * Each leaf node sits on on top of a TX queue of the current Ethernet port. - * Therefore, the set of leaf nodes is predefined, their number is always equal - * to N (where N is the number of TX queues configured for the current port) - * and their IDs are 0 .. (N-1). - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[out] n_leaf_nodes - * Number of leaf nodes for the current port. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int -rte_tm_get_number_of_leaf_nodes(uint16_t port_id, - uint32_t *n_leaf_nodes, - struct rte_tm_error *error); - -/** - * Traffic manager node ID validate and type (i.e. leaf or non-leaf) get - * - * The leaf nodes have predefined IDs in the range of 0 .. (N-1), where N is - * the number of TX queues of the current Ethernet port. The non-leaf nodes - * have their IDs generated by the application outside of the above range, - * which is reserved for leaf nodes. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID value. Needs to be valid. - * @param[out] is_leaf - * Set to non-zero value when node is leaf and to zero otherwise (non-leaf). - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int -rte_tm_node_type_get(uint16_t port_id, - uint32_t node_id, - int *is_leaf, - struct rte_tm_error *error); - -/** - * Traffic manager capabilities get - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[out] cap - * Traffic manager capabilities. Needs to be pre-allocated and valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int -rte_tm_capabilities_get(uint16_t port_id, - struct rte_tm_capabilities *cap, - struct rte_tm_error *error); - -/** - * Traffic manager level capabilities get - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] level_id - * The hierarchy level identifier. The value of 0 identifies the level of the - * root node. - * @param[out] cap - * Traffic manager level capabilities. Needs to be pre-allocated and valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int -rte_tm_level_capabilities_get(uint16_t port_id, - uint32_t level_id, - struct rte_tm_level_capabilities *cap, - struct rte_tm_error *error); - -/** - * Traffic manager node capabilities get - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid. - * @param[out] cap - * Traffic manager node capabilities. Needs to be pre-allocated and valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - */ -int -rte_tm_node_capabilities_get(uint16_t port_id, - uint32_t node_id, - struct rte_tm_node_capabilities *cap, - struct rte_tm_error *error); - -/** - * Traffic manager WRED profile add - * - * Create a new WRED profile with ID set to *wred_profile_id*. The new profile - * is used to create one or several WRED contexts. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] wred_profile_id - * WRED profile ID for the new profile. Needs to be unused. - * @param[in] profile - * WRED profile parameters. Needs to be pre-allocated and valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::cman_wred_context_n_max - */ -int -rte_tm_wred_profile_add(uint16_t port_id, - uint32_t wred_profile_id, - struct rte_tm_wred_params *profile, - struct rte_tm_error *error); - -/** - * Traffic manager WRED profile delete - * - * Delete an existing WRED profile. This operation fails when there is - * currently at least one user (i.e. WRED context) of this WRED profile. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] wred_profile_id - * WRED profile ID. Needs to be the valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::cman_wred_context_n_max - */ -int -rte_tm_wred_profile_delete(uint16_t port_id, - uint32_t wred_profile_id, - struct rte_tm_error *error); - -/** - * Traffic manager shared WRED context add or update - * - * When *shared_wred_context_id* is invalid, a new WRED context with this ID is - * created by using the WRED profile identified by *wred_profile_id*. - * - * When *shared_wred_context_id* is valid, this WRED context is no longer using - * the profile previously assigned to it and is updated to use the profile - * identified by *wred_profile_id*. - * - * A valid shared WRED context can be assigned to several hierarchy leaf nodes - * configured to use WRED as the congestion management mode. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] shared_wred_context_id - * Shared WRED context ID - * @param[in] wred_profile_id - * WRED profile ID. Needs to be the valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max - */ -int -rte_tm_shared_wred_context_add_update(uint16_t port_id, - uint32_t shared_wred_context_id, - uint32_t wred_profile_id, - struct rte_tm_error *error); - -/** - * Traffic manager shared WRED context delete - * - * Delete an existing shared WRED context. This operation fails when there is - * currently at least one user (i.e. hierarchy leaf node) of this shared WRED - * context. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] shared_wred_context_id - * Shared WRED context ID. Needs to be the valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max - */ -int -rte_tm_shared_wred_context_delete(uint16_t port_id, - uint32_t shared_wred_context_id, - struct rte_tm_error *error); - -/** - * Traffic manager shaper profile add - * - * Create a new shaper profile with ID set to *shaper_profile_id*. The new - * shaper profile is used to create one or several shapers. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] shaper_profile_id - * Shaper profile ID for the new profile. Needs to be unused. - * @param[in] profile - * Shaper profile parameters. Needs to be pre-allocated and valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::shaper_n_max - */ -int -rte_tm_shaper_profile_add(uint16_t port_id, - uint32_t shaper_profile_id, - struct rte_tm_shaper_params *profile, - struct rte_tm_error *error); - -/** - * Traffic manager shaper profile delete - * - * Delete an existing shaper profile. This operation fails when there is - * currently at least one user (i.e. shaper) of this shaper profile. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] shaper_profile_id - * Shaper profile ID. Needs to be the valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::shaper_n_max - */ -int -rte_tm_shaper_profile_delete(uint16_t port_id, - uint32_t shaper_profile_id, - struct rte_tm_error *error); - -/** - * Traffic manager shared shaper add or update - * - * When *shared_shaper_id* is not a valid shared shaper ID, a new shared shaper - * with this ID is created using the shaper profile identified by - * *shaper_profile_id*. - * - * When *shared_shaper_id* is a valid shared shaper ID, this shared shaper is - * no longer using the shaper profile previously assigned to it and is updated - * to use the shaper profile identified by *shaper_profile_id*. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] shared_shaper_id - * Shared shaper ID - * @param[in] shaper_profile_id - * Shaper profile ID. Needs to be the valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::shaper_shared_n_max - */ -int -rte_tm_shared_shaper_add_update(uint16_t port_id, - uint32_t shared_shaper_id, - uint32_t shaper_profile_id, - struct rte_tm_error *error); - -/** - * Traffic manager shared shaper delete - * - * Delete an existing shared shaper. This operation fails when there is - * currently at least one user (i.e. hierarchy node) of this shared shaper. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] shared_shaper_id - * Shared shaper ID. Needs to be the valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::shaper_shared_n_max - */ -int -rte_tm_shared_shaper_delete(uint16_t port_id, - uint32_t shared_shaper_id, - struct rte_tm_error *error); - -/** - * Traffic manager node add - * - * Create new node and connect it as child of an existing node. The new node is - * further identified by *node_id*, which needs to be unused by any of the - * existing nodes. The parent node is identified by *parent_node_id*, which - * needs to be the valid ID of an existing non-leaf node. The parent node is - * going to use the provided SP *priority* and WFQ *weight* to schedule its new - * child node. - * - * This function has to be called for both leaf and non-leaf nodes. In the case - * of leaf nodes (i.e. *node_id* is within the range of 0 .. (N-1), with N as - * the number of configured TX queues of the current port), the leaf node is - * configured rather than created (as the set of leaf nodes is predefined) and - * it is also connected as child of an existing node. - * - * The first node that is added becomes the root node and all the nodes that - * are subsequently added have to be added as descendants of the root node. The - * parent of the root node has to be specified as RTE_TM_NODE_ID_NULL and there - * can only be one node with this parent ID (i.e. the root node). Further - * restrictions for root node: needs to be non-leaf, its private shaper profile - * needs to be valid and single rate, cannot use any shared shapers. - * - * When called before rte_tm_hierarchy_commit() invocation, this function is - * typically used to define the initial start-up hierarchy for the port. - * Provided that dynamic hierarchy updates are supported by the current port (as - * advertised in the port capability set), this function can be also called - * after the rte_tm_hierarchy_commit() invocation. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be unused by any of the existing nodes. - * @param[in] parent_node_id - * Parent node ID. Needs to be the valid. - * @param[in] priority - * Node priority. The highest node priority is zero. Used by the SP algorithm - * running on the parent of the current node for scheduling this child node. - * @param[in] weight - * Node weight. The node weight is relative to the weight sum of all siblings - * that have the same priority. The lowest weight is one. Used by the WFQ - * algorithm running on the parent of the current node for scheduling this - * child node. - * @param[in] level_id - * Level ID that should be met by this node. The hierarchy level of the - * current node is already fully specified through its parent node (i.e. the - * level of this node is equal to the level of its parent node plus one), - * therefore the reason for providing this parameter is to enable the - * application to perform step-by-step checking of the node level during - * successive invocations of this function. When not desired, this check can - * be disabled by assigning value RTE_TM_NODE_LEVEL_ID_ANY to this parameter. - * @param[in] params - * Node parameters. Needs to be pre-allocated and valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see rte_tm_hierarchy_commit() - * @see RTE_TM_UPDATE_NODE_ADD_DELETE - * @see RTE_TM_NODE_LEVEL_ID_ANY - * @see struct rte_tm_capabilities - */ -int -rte_tm_node_add(uint16_t port_id, - uint32_t node_id, - uint32_t parent_node_id, - uint32_t priority, - uint32_t weight, - uint32_t level_id, - struct rte_tm_node_params *params, - struct rte_tm_error *error); - -/** - * Traffic manager node delete - * - * Delete an existing node. This operation fails when this node currently has - * at least one user (i.e. child node). - * - * When called before rte_tm_hierarchy_commit() invocation, this function is - * typically used to define the initial start-up hierarchy for the port. - * Provided that dynamic hierarchy updates are supported by the current port (as - * advertised in the port capability set), this function can be also called - * after the rte_tm_hierarchy_commit() invocation. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see RTE_TM_UPDATE_NODE_ADD_DELETE - */ -int -rte_tm_node_delete(uint16_t port_id, - uint32_t node_id, - struct rte_tm_error *error); - -/** - * Traffic manager node suspend - * - * Suspend an existing node. While the node is in suspended state, no packet is - * scheduled from this node and its descendants. The node exits the suspended - * state through the node resume operation. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see rte_tm_node_resume() - * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME - */ -int -rte_tm_node_suspend(uint16_t port_id, - uint32_t node_id, - struct rte_tm_error *error); - -/** - * Traffic manager node resume - * - * Resume an existing node that is currently in suspended state. The node - * entered the suspended state as result of a previous node suspend operation. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see rte_tm_node_suspend() - * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME - */ -int -rte_tm_node_resume(uint16_t port_id, - uint32_t node_id, - struct rte_tm_error *error); - -/** - * Traffic manager hierarchy commit - * - * This function is called during the port initialization phase (before the - * Ethernet port is started) to freeze the start-up hierarchy. - * - * This function typically performs the following steps: - * a) It validates the start-up hierarchy that was previously defined for the - * current port through successive rte_tm_node_add() invocations; - * b) Assuming successful validation, it performs all the necessary port - * specific configuration operations to install the specified hierarchy on - * the current port, with immediate effect once the port is started. - * - * This function fails when the currently configured hierarchy is not supported - * by the Ethernet port, in which case the user can abort or try out another - * hierarchy configuration (e.g. a hierarchy with less leaf nodes), which can be - * build from scratch (when *clear_on_fail* is enabled) or by modifying the - * existing hierarchy configuration (when *clear_on_fail* is disabled). - * - * Note that this function can still fail due to other causes (e.g. not enough - * memory available in the system, etc), even though the specified hierarchy is - * supported in principle by the current port. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] clear_on_fail - * On function call failure, hierarchy is cleared when this parameter is - * non-zero and preserved when this parameter is equal to zero. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see rte_tm_node_add() - * @see rte_tm_node_delete() - */ -int -rte_tm_hierarchy_commit(uint16_t port_id, - int clear_on_fail, - struct rte_tm_error *error); - -/** - * Traffic manager node parent update - * - * Restriction for root node: its parent cannot be changed. - * - * This function can only be called after the rte_tm_hierarchy_commit() - * invocation. Its success depends on the port support for this operation, as - * advertised through the port capability set. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid. - * @param[in] parent_node_id - * Node ID for the new parent. Needs to be valid. - * @param[in] priority - * Node priority. The highest node priority is zero. Used by the SP algorithm - * running on the parent of the current node for scheduling this child node. - * @param[in] weight - * Node weight. The node weight is relative to the weight sum of all siblings - * that have the same priority. The lowest weight is zero. Used by the WFQ - * algorithm running on the parent of the current node for scheduling this - * child node. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL - * @see RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL - */ -int -rte_tm_node_parent_update(uint16_t port_id, - uint32_t node_id, - uint32_t parent_node_id, - uint32_t priority, - uint32_t weight, - struct rte_tm_error *error); - -/** - * Traffic manager node private shaper update - * - * Restriction for the root node: its private shaper profile needs to be valid - * and single rate. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid. - * @param[in] shaper_profile_id - * Shaper profile ID for the private shaper of the current node. Needs to be - * either valid shaper profile ID or RTE_TM_SHAPER_PROFILE_ID_NONE, with - * the latter disabling the private shaper of the current node. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::shaper_private_n_max - */ -int -rte_tm_node_shaper_update(uint16_t port_id, - uint32_t node_id, - uint32_t shaper_profile_id, - struct rte_tm_error *error); - -/** - * Traffic manager node shared shapers update - * - * Restriction for root node: cannot use any shared rate shapers. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid. - * @param[in] shared_shaper_id - * Shared shaper ID. Needs to be valid. - * @param[in] add - * Set to non-zero value to add this shared shaper to current node or to zero - * to delete this shared shaper from current node. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::shaper_shared_n_max - */ -int -rte_tm_node_shared_shaper_update(uint16_t port_id, - uint32_t node_id, - uint32_t shared_shaper_id, - int add, - struct rte_tm_error *error); - -/** - * Traffic manager node enabled statistics counters update - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid. - * @param[in] stats_mask - * Mask of statistics counter types to be enabled for the current node. This - * needs to be a subset of the statistics counter types available for the - * current node. Any statistics counter type not included in this set is to - * be disabled for the current node. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see enum rte_tm_stats_type - * @see RTE_TM_UPDATE_NODE_STATS - */ -int -rte_tm_node_stats_update(uint16_t port_id, - uint32_t node_id, - uint64_t stats_mask, - struct rte_tm_error *error); - -/** - * Traffic manager node WFQ weight mode update - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid leaf node ID. - * @param[in] wfq_weight_mode - * WFQ weight mode for each SP priority. When NULL, it indicates that WFQ is - * to be used for all priorities. When non-NULL, it points to a pre-allocated - * array of *n_sp_priorities* values, with non-zero value for byte-mode and - * zero for packet-mode. - * @param[in] n_sp_priorities - * Number of SP priorities. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE - * @see RTE_TM_UPDATE_NODE_N_SP_PRIORITIES - */ -int -rte_tm_node_wfq_weight_mode_update(uint16_t port_id, - uint32_t node_id, - int *wfq_weight_mode, - uint32_t n_sp_priorities, - struct rte_tm_error *error); - -/** - * Traffic manager node congestion management mode update - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid leaf node ID. - * @param[in] cman - * Congestion management mode. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see RTE_TM_UPDATE_NODE_CMAN - */ -int -rte_tm_node_cman_update(uint16_t port_id, - uint32_t node_id, - enum rte_tm_cman_mode cman, - struct rte_tm_error *error); - -/** - * Traffic manager node private WRED context update - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid leaf node ID. - * @param[in] wred_profile_id - * WRED profile ID for the private WRED context of the current node. Needs to - * be either valid WRED profile ID or RTE_TM_WRED_PROFILE_ID_NONE, with the - * latter disabling the private WRED context of the current node. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::cman_wred_context_private_n_max -*/ -int -rte_tm_node_wred_context_update(uint16_t port_id, - uint32_t node_id, - uint32_t wred_profile_id, - struct rte_tm_error *error); - -/** - * Traffic manager node shared WRED context update - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid leaf node ID. - * @param[in] shared_wred_context_id - * Shared WRED context ID. Needs to be valid. - * @param[in] add - * Set to non-zero value to add this shared WRED context to current node or - * to zero to delete this shared WRED context from current node. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max - */ -int -rte_tm_node_shared_wred_context_update(uint16_t port_id, - uint32_t node_id, - uint32_t shared_wred_context_id, - int add, - struct rte_tm_error *error); - -/** - * Traffic manager node statistics counters read - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] node_id - * Node ID. Needs to be valid. - * @param[out] stats - * When non-NULL, it contains the current value for the statistics counters - * enabled for the current node. - * @param[out] stats_mask - * When non-NULL, it contains the mask of statistics counter types that are - * currently enabled for this node, indicating which of the counters - * retrieved with the *stats* structure are valid. - * @param[in] clear - * When this parameter has a non-zero value, the statistics counters are - * cleared (i.e. set to zero) immediately after they have been read, - * otherwise the statistics counters are left untouched. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see enum rte_tm_stats_type - */ -int -rte_tm_node_stats_read(uint16_t port_id, - uint32_t node_id, - struct rte_tm_node_stats *stats, - uint64_t *stats_mask, - int clear, - struct rte_tm_error *error); - -/** - * Traffic manager packet marking - VLAN DEI (IEEE 802.1Q) - * - * IEEE 802.1p maps the traffic class to the VLAN Priority Code Point (PCP) - * field (3 bits), while IEEE 802.1q maps the drop priority to the VLAN Drop - * Eligible Indicator (DEI) field (1 bit), which was previously named Canonical - * Format Indicator (CFI). - * - * All VLAN frames of a given color get their DEI bit set if marking is enabled - * for this color; otherwise, their DEI bit is left as is (either set or not). - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mark_green - * Set to non-zero value to enable marking of green packets and to zero to - * disable it. - * @param[in] mark_yellow - * Set to non-zero value to enable marking of yellow packets and to zero to - * disable it. - * @param[in] mark_red - * Set to non-zero value to enable marking of red packets and to zero to - * disable it. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::mark_vlan_dei_supported - */ -int -rte_tm_mark_vlan_dei(uint16_t port_id, - int mark_green, - int mark_yellow, - int mark_red, - struct rte_tm_error *error); - -/** - * Traffic manager packet marking - IPv4 / IPv6 ECN (IETF RFC 3168) - * - * IETF RFCs 2474 and 3168 reorganize the IPv4 Type of Service (TOS) field - * (8 bits) and the IPv6 Traffic Class (TC) field (8 bits) into Differentiated - * Services Codepoint (DSCP) field (6 bits) and Explicit Congestion - * Notification (ECN) field (2 bits). The DSCP field is typically used to - * encode the traffic class and/or drop priority (RFC 2597), while the ECN - * field is used by RFC 3168 to implement a congestion notification mechanism - * to be leveraged by transport layer protocols such as TCP and SCTP that have - * congestion control mechanisms. - * - * When congestion is experienced, as alternative to dropping the packet, - * routers can change the ECN field of input packets from 2'b01 or 2'b10 - * (values indicating that source endpoint is ECN-capable) to 2'b11 (meaning - * that congestion is experienced). The destination endpoint can use the - * ECN-Echo (ECE) TCP flag to relay the congestion indication back to the - * source endpoint, which acknowledges it back to the destination endpoint with - * the Congestion Window Reduced (CWR) TCP flag. - * - * All IPv4/IPv6 packets of a given color with ECN set to 2’b01 or 2’b10 - * carrying TCP or SCTP have their ECN set to 2’b11 if the marking feature is - * enabled for the current color, otherwise the ECN field is left as is. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mark_green - * Set to non-zero value to enable marking of green packets and to zero to - * disable it. - * @param[in] mark_yellow - * Set to non-zero value to enable marking of yellow packets and to zero to - * disable it. - * @param[in] mark_red - * Set to non-zero value to enable marking of red packets and to zero to - * disable it. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::mark_ip_ecn_tcp_supported - * @see struct rte_tm_capabilities::mark_ip_ecn_sctp_supported - */ -int -rte_tm_mark_ip_ecn(uint16_t port_id, - int mark_green, - int mark_yellow, - int mark_red, - struct rte_tm_error *error); - -/** - * Traffic manager packet marking - IPv4 / IPv6 DSCP (IETF RFC 2597) - * - * IETF RFC 2597 maps the traffic class and the drop priority to the IPv4/IPv6 - * Differentiated Services Codepoint (DSCP) field (6 bits). Here are the DSCP - * values proposed by this RFC: - * - *
                   Class 1    Class 2    Class 3    Class 4   
- *
                 +----------+----------+----------+----------+
- *
Low Drop Prec    |  001010  |  010010  |  011010  |  100010  |
- *
Medium Drop Prec |  001100  |  010100  |  011100  |  100100  |
- *
High Drop Prec   |  001110  |  010110  |  011110  |  100110  |
- *
                 +----------+----------+----------+----------+
- * - * There are 4 traffic classes (classes 1 .. 4) encoded by DSCP bits 1 and 2, - * as well as 3 drop priorities (low/medium/high) encoded by DSCP bits 3 and 4. - * - * All IPv4/IPv6 packets have their color marked into DSCP bits 3 and 4 as - * follows: green mapped to Low Drop Precedence (2’b01), yellow to Medium - * (2’b10) and red to High (2’b11). Marking needs to be explicitly enabled - * for each color; when not enabled for a given color, the DSCP field of all - * packets with that color is left as is. - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[in] mark_green - * Set to non-zero value to enable marking of green packets and to zero to - * disable it. - * @param[in] mark_yellow - * Set to non-zero value to enable marking of yellow packets and to zero to - * disable it. - * @param[in] mark_red - * Set to non-zero value to enable marking of red packets and to zero to - * disable it. - * @param[out] error - * Error details. Filled in only on error, when not NULL. - * @return - * 0 on success, non-zero error code otherwise. - * - * @see struct rte_tm_capabilities::mark_ip_dscp_supported - */ -int -rte_tm_mark_ip_dscp(uint16_t port_id, - int mark_green, - int mark_yellow, - int mark_red, - struct rte_tm_error *error); - -#ifdef __cplusplus -} -#endif - -#endif /* __INCLUDE_RTE_TM_H__ */ diff --git a/lib/librte_ether/rte_tm_driver.h b/lib/librte_ether/rte_tm_driver.h deleted file mode 100644 index 90114ff5..00000000 --- a/lib/librte_ether/rte_tm_driver.h +++ /dev/null @@ -1,337 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2017 Intel Corporation - */ - -#ifndef __INCLUDE_RTE_TM_DRIVER_H__ -#define __INCLUDE_RTE_TM_DRIVER_H__ - -/** - * @file - * RTE Generic Traffic Manager API (Driver Side) - * - * This file provides implementation helpers for internal use by PMDs, they - * are not intended to be exposed to applications and are not subject to ABI - * versioning. - */ - -#include - -#include -#include "rte_ethdev.h" -#include "rte_tm.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** @internal Traffic manager node ID validate and type get */ -typedef int (*rte_tm_node_type_get_t)(struct rte_eth_dev *dev, - uint32_t node_id, - int *is_leaf, - struct rte_tm_error *error); - -/** @internal Traffic manager capabilities get */ -typedef int (*rte_tm_capabilities_get_t)(struct rte_eth_dev *dev, - struct rte_tm_capabilities *cap, - struct rte_tm_error *error); - -/** @internal Traffic manager level capabilities get */ -typedef int (*rte_tm_level_capabilities_get_t)(struct rte_eth_dev *dev, - uint32_t level_id, - struct rte_tm_level_capabilities *cap, - struct rte_tm_error *error); - -/** @internal Traffic manager node capabilities get */ -typedef int (*rte_tm_node_capabilities_get_t)(struct rte_eth_dev *dev, - uint32_t node_id, - struct rte_tm_node_capabilities *cap, - struct rte_tm_error *error); - -/** @internal Traffic manager WRED profile add */ -typedef int (*rte_tm_wred_profile_add_t)(struct rte_eth_dev *dev, - uint32_t wred_profile_id, - struct rte_tm_wred_params *profile, - struct rte_tm_error *error); - -/** @internal Traffic manager WRED profile delete */ -typedef int (*rte_tm_wred_profile_delete_t)(struct rte_eth_dev *dev, - uint32_t wred_profile_id, - struct rte_tm_error *error); - -/** @internal Traffic manager shared WRED context add */ -typedef int (*rte_tm_shared_wred_context_add_update_t)( - struct rte_eth_dev *dev, - uint32_t shared_wred_context_id, - uint32_t wred_profile_id, - struct rte_tm_error *error); - -/** @internal Traffic manager shared WRED context delete */ -typedef int (*rte_tm_shared_wred_context_delete_t)( - struct rte_eth_dev *dev, - uint32_t shared_wred_context_id, - struct rte_tm_error *error); - -/** @internal Traffic manager shaper profile add */ -typedef int (*rte_tm_shaper_profile_add_t)(struct rte_eth_dev *dev, - uint32_t shaper_profile_id, - struct rte_tm_shaper_params *profile, - struct rte_tm_error *error); - -/** @internal Traffic manager shaper profile delete */ -typedef int (*rte_tm_shaper_profile_delete_t)(struct rte_eth_dev *dev, - uint32_t shaper_profile_id, - struct rte_tm_error *error); - -/** @internal Traffic manager shared shaper add/update */ -typedef int (*rte_tm_shared_shaper_add_update_t)(struct rte_eth_dev *dev, - uint32_t shared_shaper_id, - uint32_t shaper_profile_id, - struct rte_tm_error *error); - -/** @internal Traffic manager shared shaper delete */ -typedef int (*rte_tm_shared_shaper_delete_t)(struct rte_eth_dev *dev, - uint32_t shared_shaper_id, - struct rte_tm_error *error); - -/** @internal Traffic manager node add */ -typedef int (*rte_tm_node_add_t)(struct rte_eth_dev *dev, - uint32_t node_id, - uint32_t parent_node_id, - uint32_t priority, - uint32_t weight, - uint32_t level_id, - struct rte_tm_node_params *params, - struct rte_tm_error *error); - -/** @internal Traffic manager node delete */ -typedef int (*rte_tm_node_delete_t)(struct rte_eth_dev *dev, - uint32_t node_id, - struct rte_tm_error *error); - -/** @internal Traffic manager node suspend */ -typedef int (*rte_tm_node_suspend_t)(struct rte_eth_dev *dev, - uint32_t node_id, - struct rte_tm_error *error); - -/** @internal Traffic manager node resume */ -typedef int (*rte_tm_node_resume_t)(struct rte_eth_dev *dev, - uint32_t node_id, - struct rte_tm_error *error); - -/** @internal Traffic manager hierarchy commit */ -typedef int (*rte_tm_hierarchy_commit_t)(struct rte_eth_dev *dev, - int clear_on_fail, - struct rte_tm_error *error); - -/** @internal Traffic manager node parent update */ -typedef int (*rte_tm_node_parent_update_t)(struct rte_eth_dev *dev, - uint32_t node_id, - uint32_t parent_node_id, - uint32_t priority, - uint32_t weight, - struct rte_tm_error *error); - -/** @internal Traffic manager node shaper update */ -typedef int (*rte_tm_node_shaper_update_t)(struct rte_eth_dev *dev, - uint32_t node_id, - uint32_t shaper_profile_id, - struct rte_tm_error *error); - -/** @internal Traffic manager node shaper update */ -typedef int (*rte_tm_node_shared_shaper_update_t)(struct rte_eth_dev *dev, - uint32_t node_id, - uint32_t shared_shaper_id, - int32_t add, - struct rte_tm_error *error); - -/** @internal Traffic manager node stats update */ -typedef int (*rte_tm_node_stats_update_t)(struct rte_eth_dev *dev, - uint32_t node_id, - uint64_t stats_mask, - struct rte_tm_error *error); - -/** @internal Traffic manager node WFQ weight mode update */ -typedef int (*rte_tm_node_wfq_weight_mode_update_t)( - struct rte_eth_dev *dev, - uint32_t node_id, - int *wfq_weight_mode, - uint32_t n_sp_priorities, - struct rte_tm_error *error); - -/** @internal Traffic manager node congestion management mode update */ -typedef int (*rte_tm_node_cman_update_t)(struct rte_eth_dev *dev, - uint32_t node_id, - enum rte_tm_cman_mode cman, - struct rte_tm_error *error); - -/** @internal Traffic manager node WRED context update */ -typedef int (*rte_tm_node_wred_context_update_t)( - struct rte_eth_dev *dev, - uint32_t node_id, - uint32_t wred_profile_id, - struct rte_tm_error *error); - -/** @internal Traffic manager node WRED context update */ -typedef int (*rte_tm_node_shared_wred_context_update_t)( - struct rte_eth_dev *dev, - uint32_t node_id, - uint32_t shared_wred_context_id, - int add, - struct rte_tm_error *error); - -/** @internal Traffic manager read stats counters for specific node */ -typedef int (*rte_tm_node_stats_read_t)(struct rte_eth_dev *dev, - uint32_t node_id, - struct rte_tm_node_stats *stats, - uint64_t *stats_mask, - int clear, - struct rte_tm_error *error); - -/** @internal Traffic manager packet marking - VLAN DEI */ -typedef int (*rte_tm_mark_vlan_dei_t)(struct rte_eth_dev *dev, - int mark_green, - int mark_yellow, - int mark_red, - struct rte_tm_error *error); - -/** @internal Traffic manager packet marking - IPv4/IPv6 ECN */ -typedef int (*rte_tm_mark_ip_ecn_t)(struct rte_eth_dev *dev, - int mark_green, - int mark_yellow, - int mark_red, - struct rte_tm_error *error); - -/** @internal Traffic manager packet marking - IPv4/IPv6 DSCP */ -typedef int (*rte_tm_mark_ip_dscp_t)(struct rte_eth_dev *dev, - int mark_green, - int mark_yellow, - int mark_red, - struct rte_tm_error *error); - -struct rte_tm_ops { - /** Traffic manager node type get */ - rte_tm_node_type_get_t node_type_get; - - /** Traffic manager capabilities_get */ - rte_tm_capabilities_get_t capabilities_get; - /** Traffic manager level capabilities_get */ - rte_tm_level_capabilities_get_t level_capabilities_get; - /** Traffic manager node capabilities get */ - rte_tm_node_capabilities_get_t node_capabilities_get; - - /** Traffic manager WRED profile add */ - rte_tm_wred_profile_add_t wred_profile_add; - /** Traffic manager WRED profile delete */ - rte_tm_wred_profile_delete_t wred_profile_delete; - /** Traffic manager shared WRED context add/update */ - rte_tm_shared_wred_context_add_update_t - shared_wred_context_add_update; - /** Traffic manager shared WRED context delete */ - rte_tm_shared_wred_context_delete_t - shared_wred_context_delete; - - /** Traffic manager shaper profile add */ - rte_tm_shaper_profile_add_t shaper_profile_add; - /** Traffic manager shaper profile delete */ - rte_tm_shaper_profile_delete_t shaper_profile_delete; - /** Traffic manager shared shaper add/update */ - rte_tm_shared_shaper_add_update_t shared_shaper_add_update; - /** Traffic manager shared shaper delete */ - rte_tm_shared_shaper_delete_t shared_shaper_delete; - - /** Traffic manager node add */ - rte_tm_node_add_t node_add; - /** Traffic manager node delete */ - rte_tm_node_delete_t node_delete; - /** Traffic manager node suspend */ - rte_tm_node_suspend_t node_suspend; - /** Traffic manager node resume */ - rte_tm_node_resume_t node_resume; - /** Traffic manager hierarchy commit */ - rte_tm_hierarchy_commit_t hierarchy_commit; - - /** Traffic manager node parent update */ - rte_tm_node_parent_update_t node_parent_update; - /** Traffic manager node shaper update */ - rte_tm_node_shaper_update_t node_shaper_update; - /** Traffic manager node shared shaper update */ - rte_tm_node_shared_shaper_update_t node_shared_shaper_update; - /** Traffic manager node stats update */ - rte_tm_node_stats_update_t node_stats_update; - /** Traffic manager node WFQ weight mode update */ - rte_tm_node_wfq_weight_mode_update_t node_wfq_weight_mode_update; - /** Traffic manager node congestion management mode update */ - rte_tm_node_cman_update_t node_cman_update; - /** Traffic manager node WRED context update */ - rte_tm_node_wred_context_update_t node_wred_context_update; - /** Traffic manager node shared WRED context update */ - rte_tm_node_shared_wred_context_update_t - node_shared_wred_context_update; - /** Traffic manager read statistics counters for current node */ - rte_tm_node_stats_read_t node_stats_read; - - /** Traffic manager packet marking - VLAN DEI */ - rte_tm_mark_vlan_dei_t mark_vlan_dei; - /** Traffic manager packet marking - IPv4/IPv6 ECN */ - rte_tm_mark_ip_ecn_t mark_ip_ecn; - /** Traffic manager packet marking - IPv4/IPv6 DSCP */ - rte_tm_mark_ip_dscp_t mark_ip_dscp; -}; - -/** - * Initialize generic error structure. - * - * This function also sets rte_errno to a given value. - * - * @param[out] error - * Pointer to error structure (may be NULL). - * @param[in] code - * Related error code (rte_errno). - * @param[in] type - * Cause field and error type. - * @param[in] cause - * Object responsible for the error. - * @param[in] message - * Human-readable error message. - * - * @return - * Error code. - */ -static inline int -rte_tm_error_set(struct rte_tm_error *error, - int code, - enum rte_tm_error_type type, - const void *cause, - const char *message) -{ - if (error) { - *error = (struct rte_tm_error){ - .type = type, - .cause = cause, - .message = message, - }; - } - rte_errno = code; - return code; -} - -/** - * Get generic traffic manager operations structure from a port - * - * @param[in] port_id - * The port identifier of the Ethernet device. - * @param[out] error - * Error details - * - * @return - * The traffic manager operations structure associated with port_id on - * success, NULL otherwise. - */ -const struct rte_tm_ops * -rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error); - -#ifdef __cplusplus -} -#endif - -#endif /* __INCLUDE_RTE_TM_DRIVER_H__ */ diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile index d27dd070..47f599a6 100644 --- a/lib/librte_eventdev/Makefile +++ b/lib/librte_eventdev/Makefile @@ -8,18 +8,26 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_eventdev.a # library version -LIBABIVER := 3 +LIBABIVER := 5 # build flags CFLAGS += -DALLOW_EXPERIMENTAL_API CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -LDLIBS += -lrte_eal -lrte_ring -lrte_ethdev -lrte_hash +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) +CFLAGS += -DLINUX +else +CFLAGS += -DBSD +endif +LDLIBS += -lrte_eal -lrte_ring -lrte_ethdev -lrte_hash -lrte_mempool -lrte_timer +LDLIBS += -lrte_mbuf -lrte_cryptodev -lpthread # library source files SRCS-y += rte_eventdev.c SRCS-y += rte_event_ring.c SRCS-y += rte_event_eth_rx_adapter.c +SRCS-y += rte_event_timer_adapter.c +SRCS-y += rte_event_crypto_adapter.c # export include files SYMLINK-y-include += rte_eventdev.h @@ -28,6 +36,9 @@ SYMLINK-y-include += rte_eventdev_pmd_pci.h SYMLINK-y-include += rte_eventdev_pmd_vdev.h SYMLINK-y-include += rte_event_ring.h SYMLINK-y-include += rte_event_eth_rx_adapter.h +SYMLINK-y-include += rte_event_timer_adapter.h +SYMLINK-y-include += rte_event_timer_adapter_pmd.h +SYMLINK-y-include += rte_event_crypto_adapter.h # versioning export map EXPORT_MAP := rte_eventdev_version.map diff --git a/lib/librte_eventdev/meson.build b/lib/librte_eventdev/meson.build index d1a99602..3cbaf298 100644 --- a/lib/librte_eventdev/meson.build +++ b/lib/librte_eventdev/meson.build @@ -1,14 +1,27 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation +version = 5 allow_experimental_apis = true + +if host_machine.system() == 'linux' + cflags += '-DLINUX' +else + cflags += '-DBSD' +endif + sources = files('rte_eventdev.c', 'rte_event_ring.c', - 'rte_event_eth_rx_adapter.c') + 'rte_event_eth_rx_adapter.c', + 'rte_event_timer_adapter.c', + 'rte_event_crypto_adapter.c') headers = files('rte_eventdev.h', 'rte_eventdev_pmd.h', 'rte_eventdev_pmd_pci.h', 'rte_eventdev_pmd_vdev.h', 'rte_event_ring.h', - 'rte_event_eth_rx_adapter.h') -deps += ['ring', 'ethdev', 'hash'] + 'rte_event_eth_rx_adapter.h', + 'rte_event_timer_adapter.h', + 'rte_event_timer_adapter_pmd.h', + 'rte_event_crypto_adapter.h') +deps += ['ring', 'ethdev', 'hash', 'mempool', 'mbuf', 'timer', 'cryptodev'] diff --git a/lib/librte_eventdev/rte_event_crypto_adapter.c b/lib/librte_eventdev/rte_event_crypto_adapter.c new file mode 100644 index 00000000..11b28ca9 --- /dev/null +++ b/lib/librte_eventdev/rte_event_crypto_adapter.c @@ -0,0 +1,1128 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation. + * All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_eventdev.h" +#include "rte_eventdev_pmd.h" +#include "rte_event_crypto_adapter.h" + +#define BATCH_SIZE 32 +#define DEFAULT_MAX_NB 128 +#define CRYPTO_ADAPTER_NAME_LEN 32 +#define CRYPTO_ADAPTER_MEM_NAME_LEN 32 +#define CRYPTO_ADAPTER_MAX_EV_ENQ_RETRIES 100 + +/* Flush an instance's enqueue buffers every CRYPTO_ENQ_FLUSH_THRESHOLD + * iterations of eca_crypto_adapter_enq_run() + */ +#define CRYPTO_ENQ_FLUSH_THRESHOLD 1024 + +struct rte_event_crypto_adapter { + /* Event device identifier */ + uint8_t eventdev_id; + /* Event port identifier */ + uint8_t event_port_id; + /* Store event device's implicit release capability */ + uint8_t implicit_release_disabled; + /* Max crypto ops processed in any service function invocation */ + uint32_t max_nb; + /* Lock to serialize config updates with service function */ + rte_spinlock_t lock; + /* Next crypto device to be processed */ + uint16_t next_cdev_id; + /* Per crypto device structure */ + struct crypto_device_info *cdevs; + /* Loop counter to flush crypto ops */ + uint16_t transmit_loop_count; + /* Per instance stats structure */ + struct rte_event_crypto_adapter_stats crypto_stats; + /* Configuration callback for rte_service configuration */ + rte_event_crypto_adapter_conf_cb conf_cb; + /* Configuration callback argument */ + void *conf_arg; + /* Set if default_cb is being used */ + int default_cb_arg; + /* Service initialization state */ + uint8_t service_inited; + /* Memory allocation name */ + char mem_name[CRYPTO_ADAPTER_MEM_NAME_LEN]; + /* Socket identifier cached from eventdev */ + int socket_id; + /* Per adapter EAL service */ + uint32_t service_id; + /* No. of queue pairs configured */ + uint16_t nb_qps; + /* Adapter mode */ + enum rte_event_crypto_adapter_mode mode; +} __rte_cache_aligned; + +/* Per crypto device information */ +struct crypto_device_info { + /* Pointer to cryptodev */ + struct rte_cryptodev *dev; + /* Pointer to queue pair info */ + struct crypto_queue_pair_info *qpairs; + /* Next queue pair to be processed */ + uint16_t next_queue_pair_id; + /* Set to indicate cryptodev->eventdev packet + * transfer uses a hardware mechanism + */ + uint8_t internal_event_port; + /* Set to indicate processing has been started */ + uint8_t dev_started; + /* If num_qpairs > 0, the start callback will + * be invoked if not already invoked + */ + uint16_t num_qpairs; +} __rte_cache_aligned; + +/* Per queue pair information */ +struct crypto_queue_pair_info { + /* Set to indicate queue pair is enabled */ + bool qp_enabled; + /* Pointer to hold rte_crypto_ops for batching */ + struct rte_crypto_op **op_buffer; + /* No of crypto ops accumulated */ + uint8_t len; +} __rte_cache_aligned; + +static struct rte_event_crypto_adapter **event_crypto_adapter; + +/* Macros to check for valid adapter */ +#define EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) do { \ + if (!eca_valid_id(id)) { \ + RTE_EDEV_LOG_ERR("Invalid crypto adapter id = %d\n", id); \ + return retval; \ + } \ +} while (0) + +static inline int +eca_valid_id(uint8_t id) +{ + return id < RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE; +} + +static int +eca_init(void) +{ + const char *name = "crypto_adapter_array"; + const struct rte_memzone *mz; + unsigned int sz; + + sz = sizeof(*event_crypto_adapter) * + RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE; + sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE); + + mz = rte_memzone_lookup(name); + if (mz == NULL) { + mz = rte_memzone_reserve_aligned(name, sz, rte_socket_id(), 0, + RTE_CACHE_LINE_SIZE); + if (mz == NULL) { + RTE_EDEV_LOG_ERR("failed to reserve memzone err = %" + PRId32, rte_errno); + return -rte_errno; + } + } + + event_crypto_adapter = mz->addr; + return 0; +} + +static inline struct rte_event_crypto_adapter * +eca_id_to_adapter(uint8_t id) +{ + return event_crypto_adapter ? + event_crypto_adapter[id] : NULL; +} + +static int +eca_default_config_cb(uint8_t id, uint8_t dev_id, + struct rte_event_crypto_adapter_conf *conf, void *arg) +{ + struct rte_event_dev_config dev_conf; + struct rte_eventdev *dev; + uint8_t port_id; + int started; + int ret; + struct rte_event_port_conf *port_conf = arg; + struct rte_event_crypto_adapter *adapter = eca_id_to_adapter(id); + + dev = &rte_eventdevs[adapter->eventdev_id]; + dev_conf = dev->data->dev_conf; + + started = dev->data->dev_started; + if (started) + rte_event_dev_stop(dev_id); + port_id = dev_conf.nb_event_ports; + dev_conf.nb_event_ports += 1; + ret = rte_event_dev_configure(dev_id, &dev_conf); + if (ret) { + RTE_EDEV_LOG_ERR("failed to configure event dev %u\n", dev_id); + if (started) { + if (rte_event_dev_start(dev_id)) + return -EIO; + } + return ret; + } + + ret = rte_event_port_setup(dev_id, port_id, port_conf); + if (ret) { + RTE_EDEV_LOG_ERR("failed to setup event port %u\n", port_id); + return ret; + } + + conf->event_port_id = port_id; + conf->max_nb = DEFAULT_MAX_NB; + if (started) + ret = rte_event_dev_start(dev_id); + + adapter->default_cb_arg = 1; + return ret; +} + +int __rte_experimental +rte_event_crypto_adapter_create_ext(uint8_t id, uint8_t dev_id, + rte_event_crypto_adapter_conf_cb conf_cb, + enum rte_event_crypto_adapter_mode mode, + void *conf_arg) +{ + struct rte_event_crypto_adapter *adapter; + char mem_name[CRYPTO_ADAPTER_NAME_LEN]; + struct rte_event_dev_info dev_info; + int socket_id; + uint8_t i; + int ret; + + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + if (conf_cb == NULL) + return -EINVAL; + + if (event_crypto_adapter == NULL) { + ret = eca_init(); + if (ret) + return ret; + } + + adapter = eca_id_to_adapter(id); + if (adapter != NULL) { + RTE_EDEV_LOG_ERR("Crypto adapter id %u already exists!", id); + return -EEXIST; + } + + socket_id = rte_event_dev_socket_id(dev_id); + snprintf(mem_name, CRYPTO_ADAPTER_MEM_NAME_LEN, + "rte_event_crypto_adapter_%d", id); + + adapter = rte_zmalloc_socket(mem_name, sizeof(*adapter), + RTE_CACHE_LINE_SIZE, socket_id); + if (adapter == NULL) { + RTE_EDEV_LOG_ERR("Failed to get mem for event crypto adapter!"); + return -ENOMEM; + } + + ret = rte_event_dev_info_get(dev_id, &dev_info); + if (ret < 0) { + RTE_EDEV_LOG_ERR("Failed to get info for eventdev %d: %s!", + dev_id, dev_info.driver_name); + return ret; + } + + adapter->implicit_release_disabled = (dev_info.event_dev_cap & + RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE); + adapter->eventdev_id = dev_id; + adapter->socket_id = socket_id; + adapter->conf_cb = conf_cb; + adapter->conf_arg = conf_arg; + adapter->mode = mode; + strcpy(adapter->mem_name, mem_name); + adapter->cdevs = rte_zmalloc_socket(adapter->mem_name, + rte_cryptodev_count() * + sizeof(struct crypto_device_info), 0, + socket_id); + if (adapter->cdevs == NULL) { + RTE_EDEV_LOG_ERR("Failed to get mem for crypto devices\n"); + rte_free(adapter); + return -ENOMEM; + } + + rte_spinlock_init(&adapter->lock); + for (i = 0; i < rte_cryptodev_count(); i++) + adapter->cdevs[i].dev = rte_cryptodev_pmd_get_dev(i); + + event_crypto_adapter[id] = adapter; + + return 0; +} + + +int __rte_experimental +rte_event_crypto_adapter_create(uint8_t id, uint8_t dev_id, + struct rte_event_port_conf *port_config, + enum rte_event_crypto_adapter_mode mode) +{ + struct rte_event_port_conf *pc; + int ret; + + if (port_config == NULL) + return -EINVAL; + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + pc = rte_malloc(NULL, sizeof(*pc), 0); + if (pc == NULL) + return -ENOMEM; + *pc = *port_config; + ret = rte_event_crypto_adapter_create_ext(id, dev_id, + eca_default_config_cb, + mode, + pc); + if (ret) + rte_free(pc); + + return ret; +} + +int __rte_experimental +rte_event_crypto_adapter_free(uint8_t id) +{ + struct rte_event_crypto_adapter *adapter; + + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + adapter = eca_id_to_adapter(id); + if (adapter == NULL) + return -EINVAL; + + if (adapter->nb_qps) { + RTE_EDEV_LOG_ERR("%" PRIu16 "Queue pairs not deleted", + adapter->nb_qps); + return -EBUSY; + } + + if (adapter->default_cb_arg) + rte_free(adapter->conf_arg); + rte_free(adapter->cdevs); + rte_free(adapter); + event_crypto_adapter[id] = NULL; + + return 0; +} + +static inline unsigned int +eca_enq_to_cryptodev(struct rte_event_crypto_adapter *adapter, + struct rte_event *ev, unsigned int cnt) +{ + struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats; + union rte_event_crypto_metadata *m_data = NULL; + struct crypto_queue_pair_info *qp_info = NULL; + struct rte_crypto_op *crypto_op; + unsigned int i, n; + uint16_t qp_id, len, ret; + uint8_t cdev_id; + + len = 0; + ret = 0; + n = 0; + stats->event_deq_count += cnt; + + for (i = 0; i < cnt; i++) { + crypto_op = ev[i].event_ptr; + if (crypto_op == NULL) + continue; + if (crypto_op->sess_type == RTE_CRYPTO_OP_WITH_SESSION) { + m_data = rte_cryptodev_sym_session_get_user_data( + crypto_op->sym->session); + if (m_data == NULL) { + rte_pktmbuf_free(crypto_op->sym->m_src); + rte_crypto_op_free(crypto_op); + continue; + } + + cdev_id = m_data->request_info.cdev_id; + qp_id = m_data->request_info.queue_pair_id; + qp_info = &adapter->cdevs[cdev_id].qpairs[qp_id]; + if (qp_info == NULL) { + rte_pktmbuf_free(crypto_op->sym->m_src); + rte_crypto_op_free(crypto_op); + continue; + } + len = qp_info->len; + qp_info->op_buffer[len] = crypto_op; + len++; + } else if (crypto_op->sess_type == RTE_CRYPTO_OP_SESSIONLESS && + crypto_op->private_data_offset) { + m_data = (union rte_event_crypto_metadata *) + ((uint8_t *)crypto_op + + crypto_op->private_data_offset); + cdev_id = m_data->request_info.cdev_id; + qp_id = m_data->request_info.queue_pair_id; + qp_info = &adapter->cdevs[cdev_id].qpairs[qp_id]; + if (qp_info == NULL) { + rte_pktmbuf_free(crypto_op->sym->m_src); + rte_crypto_op_free(crypto_op); + continue; + } + len = qp_info->len; + qp_info->op_buffer[len] = crypto_op; + len++; + } else { + rte_pktmbuf_free(crypto_op->sym->m_src); + rte_crypto_op_free(crypto_op); + continue; + } + + if (len == BATCH_SIZE) { + struct rte_crypto_op **op_buffer = qp_info->op_buffer; + ret = rte_cryptodev_enqueue_burst(cdev_id, + qp_id, + op_buffer, + BATCH_SIZE); + + stats->crypto_enq_count += ret; + + while (ret < len) { + struct rte_crypto_op *op; + op = op_buffer[ret++]; + stats->crypto_enq_fail++; + rte_pktmbuf_free(op->sym->m_src); + rte_crypto_op_free(op); + } + + len = 0; + } + + if (qp_info) + qp_info->len = len; + n += ret; + } + + return n; +} + +static unsigned int +eca_crypto_enq_flush(struct rte_event_crypto_adapter *adapter) +{ + struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats; + struct crypto_device_info *curr_dev; + struct crypto_queue_pair_info *curr_queue; + struct rte_crypto_op **op_buffer; + struct rte_cryptodev *dev; + uint8_t cdev_id; + uint16_t qp; + uint16_t ret; + uint16_t num_cdev = rte_cryptodev_count(); + + ret = 0; + for (cdev_id = 0; cdev_id < num_cdev; cdev_id++) { + curr_dev = &adapter->cdevs[cdev_id]; + if (curr_dev == NULL) + continue; + dev = curr_dev->dev; + + for (qp = 0; qp < dev->data->nb_queue_pairs; qp++) { + + curr_queue = &curr_dev->qpairs[qp]; + if (!curr_queue->qp_enabled) + continue; + + op_buffer = curr_queue->op_buffer; + ret = rte_cryptodev_enqueue_burst(cdev_id, + qp, + op_buffer, + curr_queue->len); + stats->crypto_enq_count += ret; + + while (ret < curr_queue->len) { + struct rte_crypto_op *op; + op = op_buffer[ret++]; + stats->crypto_enq_fail++; + rte_pktmbuf_free(op->sym->m_src); + rte_crypto_op_free(op); + } + curr_queue->len = 0; + } + } + + return ret; +} + +static int +eca_crypto_adapter_enq_run(struct rte_event_crypto_adapter *adapter, + unsigned int max_enq) +{ + struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats; + struct rte_event ev[BATCH_SIZE]; + unsigned int nb_enq, nb_enqueued; + uint16_t n; + uint8_t event_dev_id = adapter->eventdev_id; + uint8_t event_port_id = adapter->event_port_id; + + nb_enqueued = 0; + if (adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_NEW) + return 0; + + for (nb_enq = 0; nb_enq < max_enq; nb_enq += n) { + stats->event_poll_count++; + n = rte_event_dequeue_burst(event_dev_id, + event_port_id, ev, BATCH_SIZE, 0); + + if (!n) + break; + + nb_enqueued += eca_enq_to_cryptodev(adapter, ev, n); + } + + if ((++adapter->transmit_loop_count & + (CRYPTO_ENQ_FLUSH_THRESHOLD - 1)) == 0) { + nb_enqueued += eca_crypto_enq_flush(adapter); + } + + return nb_enqueued; +} + +static inline void +eca_ops_enqueue_burst(struct rte_event_crypto_adapter *adapter, + struct rte_crypto_op **ops, uint16_t num) +{ + struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats; + union rte_event_crypto_metadata *m_data = NULL; + uint8_t event_dev_id = adapter->eventdev_id; + uint8_t event_port_id = adapter->event_port_id; + struct rte_event events[BATCH_SIZE]; + uint16_t nb_enqueued, nb_ev; + uint8_t retry; + uint8_t i; + + nb_ev = 0; + retry = 0; + nb_enqueued = 0; + num = RTE_MIN(num, BATCH_SIZE); + for (i = 0; i < num; i++) { + struct rte_event *ev = &events[nb_ev++]; + if (ops[i]->sess_type == RTE_CRYPTO_OP_WITH_SESSION) { + m_data = rte_cryptodev_sym_session_get_user_data( + ops[i]->sym->session); + } else if (ops[i]->sess_type == RTE_CRYPTO_OP_SESSIONLESS && + ops[i]->private_data_offset) { + m_data = (union rte_event_crypto_metadata *) + ((uint8_t *)ops[i] + + ops[i]->private_data_offset); + } + + if (unlikely(m_data == NULL)) { + rte_pktmbuf_free(ops[i]->sym->m_src); + rte_crypto_op_free(ops[i]); + continue; + } + + rte_memcpy(ev, &m_data->response_info, sizeof(*ev)); + ev->event_ptr = ops[i]; + ev->event_type = RTE_EVENT_TYPE_CRYPTODEV; + if (adapter->implicit_release_disabled) + ev->op = RTE_EVENT_OP_FORWARD; + else + ev->op = RTE_EVENT_OP_NEW; + } + + do { + nb_enqueued += rte_event_enqueue_burst(event_dev_id, + event_port_id, + &events[nb_enqueued], + nb_ev - nb_enqueued); + } while (retry++ < CRYPTO_ADAPTER_MAX_EV_ENQ_RETRIES && + nb_enqueued < nb_ev); + + /* Free mbufs and rte_crypto_ops for failed events */ + for (i = nb_enqueued; i < nb_ev; i++) { + struct rte_crypto_op *op = events[i].event_ptr; + rte_pktmbuf_free(op->sym->m_src); + rte_crypto_op_free(op); + } + + stats->event_enq_fail_count += nb_ev - nb_enqueued; + stats->event_enq_count += nb_enqueued; + stats->event_enq_retry_count += retry - 1; +} + +static inline unsigned int +eca_crypto_adapter_deq_run(struct rte_event_crypto_adapter *adapter, + unsigned int max_deq) +{ + struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats; + struct crypto_device_info *curr_dev; + struct crypto_queue_pair_info *curr_queue; + struct rte_crypto_op *ops[BATCH_SIZE]; + uint16_t n, nb_deq; + struct rte_cryptodev *dev; + uint8_t cdev_id; + uint16_t qp, dev_qps; + bool done; + uint16_t num_cdev = rte_cryptodev_count(); + + nb_deq = 0; + do { + uint16_t queues = 0; + done = true; + + for (cdev_id = adapter->next_cdev_id; + cdev_id < num_cdev; cdev_id++) { + curr_dev = &adapter->cdevs[cdev_id]; + if (curr_dev == NULL) + continue; + dev = curr_dev->dev; + dev_qps = dev->data->nb_queue_pairs; + + for (qp = curr_dev->next_queue_pair_id; + queues < dev_qps; qp = (qp + 1) % dev_qps, + queues++) { + + curr_queue = &curr_dev->qpairs[qp]; + if (!curr_queue->qp_enabled) + continue; + + n = rte_cryptodev_dequeue_burst(cdev_id, qp, + ops, BATCH_SIZE); + if (!n) + continue; + + done = false; + stats->crypto_deq_count += n; + eca_ops_enqueue_burst(adapter, ops, n); + nb_deq += n; + + if (nb_deq > max_deq) { + if ((qp + 1) == dev_qps) { + adapter->next_cdev_id = + (cdev_id + 1) + % num_cdev; + } + curr_dev->next_queue_pair_id = (qp + 1) + % dev->data->nb_queue_pairs; + + return nb_deq; + } + } + } + } while (done == false); + return nb_deq; +} + +static void +eca_crypto_adapter_run(struct rte_event_crypto_adapter *adapter, + unsigned int max_ops) +{ + while (max_ops) { + unsigned int e_cnt, d_cnt; + + e_cnt = eca_crypto_adapter_deq_run(adapter, max_ops); + max_ops -= RTE_MIN(max_ops, e_cnt); + + d_cnt = eca_crypto_adapter_enq_run(adapter, max_ops); + max_ops -= RTE_MIN(max_ops, d_cnt); + + if (e_cnt == 0 && d_cnt == 0) + break; + + } +} + +static int +eca_service_func(void *args) +{ + struct rte_event_crypto_adapter *adapter = args; + + if (rte_spinlock_trylock(&adapter->lock) == 0) + return 0; + eca_crypto_adapter_run(adapter, adapter->max_nb); + rte_spinlock_unlock(&adapter->lock); + + return 0; +} + +static int +eca_init_service(struct rte_event_crypto_adapter *adapter, uint8_t id) +{ + struct rte_event_crypto_adapter_conf adapter_conf; + struct rte_service_spec service; + int ret; + + if (adapter->service_inited) + return 0; + + memset(&service, 0, sizeof(service)); + snprintf(service.name, CRYPTO_ADAPTER_NAME_LEN, + "rte_event_crypto_adapter_%d", id); + service.socket_id = adapter->socket_id; + service.callback = eca_service_func; + service.callback_userdata = adapter; + /* Service function handles locking for queue add/del updates */ + service.capabilities = RTE_SERVICE_CAP_MT_SAFE; + ret = rte_service_component_register(&service, &adapter->service_id); + if (ret) { + RTE_EDEV_LOG_ERR("failed to register service %s err = %" PRId32, + service.name, ret); + return ret; + } + + ret = adapter->conf_cb(id, adapter->eventdev_id, + &adapter_conf, adapter->conf_arg); + if (ret) { + RTE_EDEV_LOG_ERR("configuration callback failed err = %" PRId32, + ret); + return ret; + } + + adapter->max_nb = adapter_conf.max_nb; + adapter->event_port_id = adapter_conf.event_port_id; + adapter->service_inited = 1; + + return ret; +} + +static void +eca_update_qp_info(struct rte_event_crypto_adapter *adapter, + struct crypto_device_info *dev_info, + int32_t queue_pair_id, + uint8_t add) +{ + struct crypto_queue_pair_info *qp_info; + int enabled; + uint16_t i; + + if (dev_info->qpairs == NULL) + return; + + if (queue_pair_id == -1) { + for (i = 0; i < dev_info->dev->data->nb_queue_pairs; i++) + eca_update_qp_info(adapter, dev_info, i, add); + } else { + qp_info = &dev_info->qpairs[queue_pair_id]; + enabled = qp_info->qp_enabled; + if (add) { + adapter->nb_qps += !enabled; + dev_info->num_qpairs += !enabled; + } else { + adapter->nb_qps -= enabled; + dev_info->num_qpairs -= enabled; + } + qp_info->qp_enabled = !!add; + } +} + +static int +eca_add_queue_pair(struct rte_event_crypto_adapter *adapter, + uint8_t cdev_id, + int queue_pair_id) +{ + struct crypto_device_info *dev_info = &adapter->cdevs[cdev_id]; + struct crypto_queue_pair_info *qpairs; + uint32_t i; + + if (dev_info->qpairs == NULL) { + dev_info->qpairs = + rte_zmalloc_socket(adapter->mem_name, + dev_info->dev->data->nb_queue_pairs * + sizeof(struct crypto_queue_pair_info), + 0, adapter->socket_id); + if (dev_info->qpairs == NULL) + return -ENOMEM; + + qpairs = dev_info->qpairs; + qpairs->op_buffer = rte_zmalloc_socket(adapter->mem_name, + BATCH_SIZE * + sizeof(struct rte_crypto_op *), + 0, adapter->socket_id); + if (!qpairs->op_buffer) { + rte_free(qpairs); + return -ENOMEM; + } + } + + if (queue_pair_id == -1) { + for (i = 0; i < dev_info->dev->data->nb_queue_pairs; i++) + eca_update_qp_info(adapter, dev_info, i, 1); + } else + eca_update_qp_info(adapter, dev_info, + (uint16_t)queue_pair_id, 1); + + return 0; +} + +int __rte_experimental +rte_event_crypto_adapter_queue_pair_add(uint8_t id, + uint8_t cdev_id, + int32_t queue_pair_id, + const struct rte_event *event) +{ + struct rte_event_crypto_adapter *adapter; + struct rte_eventdev *dev; + struct crypto_device_info *dev_info; + uint32_t cap; + int ret; + + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + if (!rte_cryptodev_pmd_is_valid_dev(cdev_id)) { + RTE_EDEV_LOG_ERR("Invalid dev_id=%" PRIu8, cdev_id); + return -EINVAL; + } + + adapter = eca_id_to_adapter(id); + if (adapter == NULL) + return -EINVAL; + + dev = &rte_eventdevs[adapter->eventdev_id]; + ret = rte_event_crypto_adapter_caps_get(adapter->eventdev_id, + cdev_id, + &cap); + if (ret) { + RTE_EDEV_LOG_ERR("Failed to get adapter caps dev %" PRIu8 + " cdev %" PRIu8, id, cdev_id); + return ret; + } + + if ((cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) && + (event == NULL)) { + RTE_EDEV_LOG_ERR("Conf value can not be NULL for dev_id=%u", + cdev_id); + return -EINVAL; + } + + dev_info = &adapter->cdevs[cdev_id]; + + if (queue_pair_id != -1 && + (uint16_t)queue_pair_id >= dev_info->dev->data->nb_queue_pairs) { + RTE_EDEV_LOG_ERR("Invalid queue_pair_id %" PRIu16, + (uint16_t)queue_pair_id); + return -EINVAL; + } + + /* In case HW cap is RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD, + * no need of service core as HW supports event forward capability. + */ + if ((cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD) || + (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND && + adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_NEW) || + (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW && + adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_NEW)) { + RTE_FUNC_PTR_OR_ERR_RET( + *dev->dev_ops->crypto_adapter_queue_pair_add, + -ENOTSUP); + if (dev_info->qpairs == NULL) { + dev_info->qpairs = + rte_zmalloc_socket(adapter->mem_name, + dev_info->dev->data->nb_queue_pairs * + sizeof(struct crypto_queue_pair_info), + 0, adapter->socket_id); + if (dev_info->qpairs == NULL) + return -ENOMEM; + } + + ret = (*dev->dev_ops->crypto_adapter_queue_pair_add)(dev, + dev_info->dev, + queue_pair_id, + event); + if (ret) + return ret; + + else + eca_update_qp_info(adapter, &adapter->cdevs[cdev_id], + queue_pair_id, 1); + } + + /* In case HW cap is RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW, + * or SW adapter, initiate services so the application can choose + * which ever way it wants to use the adapter. + * Case 1: RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW + * Application may wants to use one of below two mode + * a. OP_FORWARD mode -> HW Dequeue + SW enqueue + * b. OP_NEW mode -> HW Dequeue + * Case 2: No HW caps, use SW adapter + * a. OP_FORWARD mode -> SW enqueue & dequeue + * b. OP_NEW mode -> SW Dequeue + */ + if ((cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW && + adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD) || + (!(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW) && + !(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD) && + !(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) && + (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA))) { + rte_spinlock_lock(&adapter->lock); + ret = eca_init_service(adapter, id); + if (ret == 0) + ret = eca_add_queue_pair(adapter, cdev_id, + queue_pair_id); + rte_spinlock_unlock(&adapter->lock); + + if (ret) + return ret; + + rte_service_component_runstate_set(adapter->service_id, 1); + } + + return 0; +} + +int __rte_experimental +rte_event_crypto_adapter_queue_pair_del(uint8_t id, uint8_t cdev_id, + int32_t queue_pair_id) +{ + struct rte_event_crypto_adapter *adapter; + struct crypto_device_info *dev_info; + struct rte_eventdev *dev; + int ret; + uint32_t cap; + uint16_t i; + + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + if (!rte_cryptodev_pmd_is_valid_dev(cdev_id)) { + RTE_EDEV_LOG_ERR("Invalid dev_id=%" PRIu8, cdev_id); + return -EINVAL; + } + + adapter = eca_id_to_adapter(id); + if (adapter == NULL) + return -EINVAL; + + dev = &rte_eventdevs[adapter->eventdev_id]; + ret = rte_event_crypto_adapter_caps_get(adapter->eventdev_id, + cdev_id, + &cap); + if (ret) + return ret; + + dev_info = &adapter->cdevs[cdev_id]; + + if (queue_pair_id != -1 && + (uint16_t)queue_pair_id >= dev_info->dev->data->nb_queue_pairs) { + RTE_EDEV_LOG_ERR("Invalid queue_pair_id %" PRIu16, + (uint16_t)queue_pair_id); + return -EINVAL; + } + + if ((cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD) || + (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW && + adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_NEW)) { + RTE_FUNC_PTR_OR_ERR_RET( + *dev->dev_ops->crypto_adapter_queue_pair_del, + -ENOTSUP); + ret = (*dev->dev_ops->crypto_adapter_queue_pair_del)(dev, + dev_info->dev, + queue_pair_id); + if (ret == 0) { + eca_update_qp_info(adapter, + &adapter->cdevs[cdev_id], + queue_pair_id, + 0); + if (dev_info->num_qpairs == 0) { + rte_free(dev_info->qpairs); + dev_info->qpairs = NULL; + } + } + } else { + if (adapter->nb_qps == 0) + return 0; + + rte_spinlock_lock(&adapter->lock); + if (queue_pair_id == -1) { + for (i = 0; i < dev_info->dev->data->nb_queue_pairs; + i++) + eca_update_qp_info(adapter, dev_info, + queue_pair_id, 0); + } else { + eca_update_qp_info(adapter, dev_info, + (uint16_t)queue_pair_id, 0); + } + + if (dev_info->num_qpairs == 0) { + rte_free(dev_info->qpairs); + dev_info->qpairs = NULL; + } + + rte_spinlock_unlock(&adapter->lock); + rte_service_component_runstate_set(adapter->service_id, + adapter->nb_qps); + } + + return ret; +} + +static int +eca_adapter_ctrl(uint8_t id, int start) +{ + struct rte_event_crypto_adapter *adapter; + struct crypto_device_info *dev_info; + struct rte_eventdev *dev; + uint32_t i; + int use_service; + int stop = !start; + + use_service = 0; + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + adapter = eca_id_to_adapter(id); + if (adapter == NULL) + return -EINVAL; + + dev = &rte_eventdevs[adapter->eventdev_id]; + + for (i = 0; i < rte_cryptodev_count(); i++) { + dev_info = &adapter->cdevs[i]; + /* if start check for num queue pairs */ + if (start && !dev_info->num_qpairs) + continue; + /* if stop check if dev has been started */ + if (stop && !dev_info->dev_started) + continue; + use_service |= !dev_info->internal_event_port; + dev_info->dev_started = start; + if (dev_info->internal_event_port == 0) + continue; + start ? (*dev->dev_ops->crypto_adapter_start)(dev, + &dev_info->dev[i]) : + (*dev->dev_ops->crypto_adapter_stop)(dev, + &dev_info->dev[i]); + } + + if (use_service) + rte_service_runstate_set(adapter->service_id, start); + + return 0; +} + +int __rte_experimental +rte_event_crypto_adapter_start(uint8_t id) +{ + struct rte_event_crypto_adapter *adapter; + + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + adapter = eca_id_to_adapter(id); + if (adapter == NULL) + return -EINVAL; + + return eca_adapter_ctrl(id, 1); +} + +int __rte_experimental +rte_event_crypto_adapter_stop(uint8_t id) +{ + return eca_adapter_ctrl(id, 0); +} + +int __rte_experimental +rte_event_crypto_adapter_stats_get(uint8_t id, + struct rte_event_crypto_adapter_stats *stats) +{ + struct rte_event_crypto_adapter *adapter; + struct rte_event_crypto_adapter_stats dev_stats_sum = { 0 }; + struct rte_event_crypto_adapter_stats dev_stats; + struct rte_eventdev *dev; + struct crypto_device_info *dev_info; + uint32_t i; + int ret; + + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + adapter = eca_id_to_adapter(id); + if (adapter == NULL || stats == NULL) + return -EINVAL; + + dev = &rte_eventdevs[adapter->eventdev_id]; + memset(stats, 0, sizeof(*stats)); + for (i = 0; i < rte_cryptodev_count(); i++) { + dev_info = &adapter->cdevs[i]; + if (dev_info->internal_event_port == 0 || + dev->dev_ops->crypto_adapter_stats_get == NULL) + continue; + ret = (*dev->dev_ops->crypto_adapter_stats_get)(dev, + dev_info->dev, + &dev_stats); + if (ret) + continue; + + dev_stats_sum.crypto_deq_count += dev_stats.crypto_deq_count; + dev_stats_sum.event_enq_count += + dev_stats.event_enq_count; + } + + if (adapter->service_inited) + *stats = adapter->crypto_stats; + + stats->crypto_deq_count += dev_stats_sum.crypto_deq_count; + stats->event_enq_count += dev_stats_sum.event_enq_count; + + return 0; +} + +int __rte_experimental +rte_event_crypto_adapter_stats_reset(uint8_t id) +{ + struct rte_event_crypto_adapter *adapter; + struct crypto_device_info *dev_info; + struct rte_eventdev *dev; + uint32_t i; + + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + adapter = eca_id_to_adapter(id); + if (adapter == NULL) + return -EINVAL; + + dev = &rte_eventdevs[adapter->eventdev_id]; + for (i = 0; i < rte_cryptodev_count(); i++) { + dev_info = &adapter->cdevs[i]; + if (dev_info->internal_event_port == 0 || + dev->dev_ops->crypto_adapter_stats_reset == NULL) + continue; + (*dev->dev_ops->crypto_adapter_stats_reset)(dev, + dev_info->dev); + } + + memset(&adapter->crypto_stats, 0, sizeof(adapter->crypto_stats)); + return 0; +} + +int __rte_experimental +rte_event_crypto_adapter_service_id_get(uint8_t id, uint32_t *service_id) +{ + struct rte_event_crypto_adapter *adapter; + + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + adapter = eca_id_to_adapter(id); + if (adapter == NULL || service_id == NULL) + return -EINVAL; + + if (adapter->service_inited) + *service_id = adapter->service_id; + + return adapter->service_inited ? 0 : -ESRCH; +} + +int __rte_experimental +rte_event_crypto_adapter_event_port_get(uint8_t id, uint8_t *event_port_id) +{ + struct rte_event_crypto_adapter *adapter; + + EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + + adapter = eca_id_to_adapter(id); + if (adapter == NULL || event_port_id == NULL) + return -EINVAL; + + *event_port_id = adapter->event_port_id; + + return 0; +} diff --git a/lib/librte_eventdev/rte_event_crypto_adapter.h b/lib/librte_eventdev/rte_event_crypto_adapter.h new file mode 100644 index 00000000..d367309c --- /dev/null +++ b/lib/librte_eventdev/rte_event_crypto_adapter.h @@ -0,0 +1,575 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation. + * All rights reserved. + */ + +#ifndef _RTE_EVENT_CRYPTO_ADAPTER_ +#define _RTE_EVENT_CRYPTO_ADAPTER_ + +/** + * @file + * + * RTE Event crypto adapter + * + * Eventdev library provides couple of adapters to bridge between various + * components for providing new event source. The event crypto adapter is + * one of those adapters which is intended to bridge between event devices + * and crypto devices. + * + * The crypto adapter adds support to enqueue/dequeue crypto operations to/ + * from event device. The packet flow between crypto device and the event + * device can be accomplished using both SW and HW based transfer mechanisms. + * The adapter uses an EAL service core function for SW based packet transfer + * and uses the eventdev PMD functions to configure HW based packet transfer + * between the crypto device and the event device. + * + * The application can choose to submit a crypto operation directly to + * crypto device or send it to the crypto adapter via eventdev based on + * RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD capability. + * The first mode is known as the event new(RTE_EVENT_CRYPTO_ADAPTER_OP_NEW) + * mode and the second as the event forward(RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD) + * mode. The choice of mode can be specified while creating the adapter. + * In the former mode, it is an application responsibility to enable ingress + * packet ordering. In the latter mode, it is the adapter responsibility to + * enable the ingress packet ordering. + * + * + * Working model of RTE_EVENT_CRYPTO_ADAPTER_OP_NEW mode: + * + * +--------------+ +--------------+ + * | | | Crypto stage | + * | Application |---[2]-->| + enqueue to | + * | | | cryptodev | + * +--------------+ +--------------+ + * ^ ^ | + * | | [3] + * [6] [1] | + * | | | + * +--------------+ | + * | | | + * | Event device | | + * | | | + * +--------------+ | + * ^ | + * | | + * [5] | + * | v + * +--------------+ +--------------+ + * | | | | + * |Crypto adapter|<--[4]---| Cryptodev | + * | | | | + * +--------------+ +--------------+ + * + * + * [1] Application dequeues events from the previous stage. + * [2] Application prepares the crypto operations. + * [3] Crypto operations are submitted to cryptodev by application. + * [4] Crypto adapter dequeues crypto completions from cryptodev. + * [5] Crypto adapter enqueues events to the eventdev. + * [6] Application dequeues from eventdev and prepare for further + * processing. + * + * In the RTE_EVENT_CRYPTO_ADAPTER_OP_NEW mode, application submits crypto + * operations directly to crypto device. The adapter then dequeues crypto + * completions from crypto device and enqueue events to the event device. + * This mode does not ensure ingress ordering, if the application directly + * enqueues to cryptodev without going through crypto/atomic stage i.e. + * removing item [1] and [2]. + * Events dequeued from the adapter will be treated as new events. + * In this mode, application needs to specify event information (response + * information) which is needed to enqueue an event after the crypto operation + * is completed. + * + * + * Working model of RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode: + * + * +--------------+ +--------------+ + * --[1]-->| |---[2]-->| Application | + * | Event device | | in | + * <--[8]--| |<--[3]---| Ordered stage| + * +--------------+ +--------------+ + * ^ | + * | [4] + * [7] | + * | v + * +----------------+ +--------------+ + * | |--[5]->| | + * | Crypto adapter | | Cryptodev | + * | |<-[6]--| | + * +----------------+ +--------------+ + * + * + * [1] Events from the previous stage. + * [2] Application in ordered stage dequeues events from eventdev. + * [3] Application enqueues crypto operations as events to eventdev. + * [4] Crypto adapter dequeues event from eventdev. + * [5] Crypto adapter submits crypto operations to cryptodev + * (Atomic stage). + * [6] Crypto adapter dequeues crypto completions from cryptodev + * [7] Crypto adapter enqueues events to the eventdev + * [8] Events to the next stage + * + * In the RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode, if HW supports + * RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD capability the application + * can directly submit the crypto operations to the cryptodev. + * If not, application retrieves crypto adapter's event port using + * rte_event_crypto_adapter_event_port_get() API. Then, links its event + * queue to this port and starts enqueuing crypto operations as events + * to the eventdev. The adapter then dequeues the events and submits the + * crypto operations to the cryptodev. After the crypto completions, the + * adapter enqueues events to the event device. + * Application can use this mode, when ingress packet ordering is needed. + * Events dequeued from the adapter will be treated as forwarded events. + * In this mode, the application needs to specify the cryptodev ID + * and queue pair ID (request information) needed to enqueue a crypto + * operation in addition to the event information (response information) + * needed to enqueue an event after the crypto operation has completed. + * + * + * The event crypto adapter provides common APIs to configure the packet flow + * from the crypto device to event devices for both SW and HW based transfers. + * The crypto event adapter's functions are: + * - rte_event_crypto_adapter_create_ext() + * - rte_event_crypto_adapter_create() + * - rte_event_crypto_adapter_free() + * - rte_event_crypto_adapter_queue_pair_add() + * - rte_event_crypto_adapter_queue_pair_del() + * - rte_event_crypto_adapter_start() + * - rte_event_crypto_adapter_stop() + * - rte_event_crypto_adapter_stats_get() + * - rte_event_crypto_adapter_stats_reset() + + * The applicaton creates an instance using rte_event_crypto_adapter_create() + * or rte_event_crypto_adapter_create_ext(). + * + * Cryptodev queue pair addition/deletion is done using the + * rte_event_crypto_adapter_queue_pair_xxx() APIs. If HW supports + * RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND capability, event + * information must be passed to the add API. + * + * The SW adapter or HW PMD uses rte_crypto_op::sess_type to decide whether + * request/response(private) data is located in the crypto/security session + * or at an offset in the rte_crypto_op. + * + * For session-based operations, the set and get API provides a mechanism for + * an application to store and retrieve the data information stored + * along with the crypto session. + * The RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA capability indicates + * whether HW or SW supports this feature. + * + * For session-less mode, the adapter gets the private data information placed + * along with the ``struct rte_crypto_op``. + * The rte_crypto_op::private_data_offset provides an offset to locate the + * request/response information in the rte_crypto_op. This offset is counted + * from the start of the rte_crypto_op including initialization vector (IV). + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "rte_eventdev.h" + +/** + * @warning + * @b EXPERIMENTAL: this enum may change without prior notice + * + * Crypto event adapter mode + */ +enum rte_event_crypto_adapter_mode { + RTE_EVENT_CRYPTO_ADAPTER_OP_NEW, + /**< Start the crypto adapter in event new mode. + * @see RTE_EVENT_OP_NEW. + * Application submits crypto operations to the cryptodev. + * Adapter only dequeues the crypto completions from cryptodev + * and enqueue events to the eventdev. + */ + RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD, + /**< Start the crypto adapter in event forward mode. + * @see RTE_EVENT_OP_FORWARD. + * Application submits crypto requests as events to the crypto + * adapter or crypto device based on + * RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD capability. + * Crypto completions are enqueued back to the eventdev by + * crypto adapter. + */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * Crypto event request structure will be filled by application to + * provide event request information to the adapter. + */ +struct rte_event_crypto_request { + uint8_t resv[8]; + /**< Overlaps with first 8 bytes of struct rte_event + * that encode the response event information. Application + * is expected to fill in struct rte_event response_info. + */ + uint16_t cdev_id; + /**< cryptodev ID to be used */ + uint16_t queue_pair_id; + /**< cryptodev queue pair ID to be used */ + uint32_t resv1; + /**< Reserved bits */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * Crypto event metadata structure will be filled by application + * to provide crypto request and event response information. + * + * If crypto events are enqueued using a HW mechanism, the cryptodev + * PMD will use the event response information to set up the event + * that is enqueued back to eventdev after completion of the crypto + * operation. If the transfer is done by SW, event response information + * will be used by the adapter. + */ +union rte_event_crypto_metadata { + struct rte_event_crypto_request request_info; + /**< Request information to be filled in by application + * for RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode. + */ + struct rte_event response_info; + /**< Response information to be filled in by application + * for RTE_EVENT_CRYPTO_ADAPTER_OP_NEW and + * RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode. + */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * Adapter configuration structure that the adapter configuration callback + * function is expected to fill out + * @see rte_event_crypto_adapter_conf_cb + */ +struct rte_event_crypto_adapter_conf { + uint8_t event_port_id; + /**< Event port identifier, the adapter enqueues events to this + * port and dequeues crypto request events in + * RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode. + */ + uint32_t max_nb; + /**< The adapter can return early if it has processed at least + * max_nb crypto ops. This isn't treated as a requirement; batching + * may cause the adapter to process more than max_nb crypto ops. + */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Function type used for adapter configuration callback. The callback is + * used to fill in members of the struct rte_event_crypto_adapter_conf, this + * callback is invoked when creating a SW service for packet transfer from + * cryptodev queue pair to the event device. The SW service is created within + * the rte_event_crypto_adapter_queue_pair_add() function if SW based packet + * transfers from cryptodev queue pair to the event device are required. + * + * @param id + * Adapter identifier. + * + * @param dev_id + * Event device identifier. + * + * @param conf + * Structure that needs to be populated by this callback. + * + * @param arg + * Argument to the callback. This is the same as the conf_arg passed to the + * rte_event_crypto_adapter_create_ext(). + */ +typedef int (*rte_event_crypto_adapter_conf_cb) (uint8_t id, uint8_t dev_id, + struct rte_event_crypto_adapter_conf *conf, + void *arg); + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * A structure used to retrieve statistics for an event crypto adapter + * instance. + */ + +struct rte_event_crypto_adapter_stats { + uint64_t event_poll_count; + /**< Event port poll count */ + uint64_t event_deq_count; + /**< Event dequeue count */ + uint64_t crypto_enq_count; + /**< Cryptodev enqueue count */ + uint64_t crypto_enq_fail; + /**< Cryptodev enqueue failed count */ + uint64_t crypto_deq_count; + /**< Cryptodev dequeue count */ + uint64_t event_enq_count; + /**< Event enqueue count */ + uint64_t event_enq_retry_count; + /**< Event enqueue retry count */ + uint64_t event_enq_fail_count; + /**< Event enqueue fail count */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Create a new event crypto adapter with the specified identifier. + * + * @param id + * Adapter identifier. + * + * @param dev_id + * Event device identifier. + * + * @param conf_cb + * Callback function that fills in members of a + * struct rte_event_crypto_adapter_conf struct passed into + * it. + * + * @param mode + * Flag to indicate the mode of the adapter. + * @see rte_event_crypto_adapter_mode + * + * @param conf_arg + * Argument that is passed to the conf_cb function. + * + * @return + * - 0: Success + * - <0: Error code on failure + */ +int __rte_experimental +rte_event_crypto_adapter_create_ext(uint8_t id, uint8_t dev_id, + rte_event_crypto_adapter_conf_cb conf_cb, + enum rte_event_crypto_adapter_mode mode, + void *conf_arg); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Create a new event crypto adapter with the specified identifier. + * This function uses an internal configuration function that creates an event + * port. This default function reconfigures the event device with an + * additional event port and set up the event port using the port_config + * parameter passed into this function. In case the application needs more + * control in configuration of the service, it should use the + * rte_event_crypto_adapter_create_ext() version. + * + * @param id + * Adapter identifier. + * + * @param dev_id + * Event device identifier. + * + * @param port_config + * Argument of type *rte_event_port_conf* that is passed to the conf_cb + * function. + * + * @param mode + * Flag to indicate the mode of the adapter. + * @see rte_event_crypto_adapter_mode + * + * @return + * - 0: Success + * - <0: Error code on failure + */ +int __rte_experimental +rte_event_crypto_adapter_create(uint8_t id, uint8_t dev_id, + struct rte_event_port_conf *port_config, + enum rte_event_crypto_adapter_mode mode); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Free an event crypto adapter + * + * @param id + * Adapter identifier. + * + * @return + * - 0: Success + * - <0: Error code on failure, If the adapter still has queue pairs + * added to it, the function returns -EBUSY. + */ +int __rte_experimental +rte_event_crypto_adapter_free(uint8_t id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Add a queue pair to an event crypto adapter. + * + * @param id + * Adapter identifier. + * + * @param cdev_id + * Cryptodev identifier. + * + * @param queue_pair_id + * Cryptodev queue pair identifier. If queue_pair_id is set -1, + * adapter adds all the pre configured queue pairs to the instance. + * + * @param event + * if HW supports cryptodev queue pair to event queue binding, application is + * expected to fill in event information, else it will be NULL. + * @see RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND + * + * @return + * - 0: Success, queue pair added correctly. + * - <0: Error code on failure. + */ +int __rte_experimental +rte_event_crypto_adapter_queue_pair_add(uint8_t id, + uint8_t cdev_id, + int32_t queue_pair_id, + const struct rte_event *event); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Delete a queue pair from an event crypto adapter. + * + * @param id + * Adapter identifier. + * + * @param cdev_id + * Cryptodev identifier. + * + * @param queue_pair_id + * Cryptodev queue pair identifier. + * + * @return + * - 0: Success, queue pair deleted successfully. + * - <0: Error code on failure. + */ +int __rte_experimental +rte_event_crypto_adapter_queue_pair_del(uint8_t id, uint8_t cdev_id, + int32_t queue_pair_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Start event crypto adapter + * + * @param id + * Adapter identifier. + * + * + * @return + * - 0: Success, adapter started successfully. + * - <0: Error code on failure. + */ +int __rte_experimental +rte_event_crypto_adapter_start(uint8_t id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Stop event crypto adapter + * + * @param id + * Adapter identifier. + * + * @return + * - 0: Success, adapter stopped successfully. + * - <0: Error code on failure. + */ +int __rte_experimental +rte_event_crypto_adapter_stop(uint8_t id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve statistics for an adapter + * + * @param id + * Adapter identifier. + * + * @param [out] stats + * A pointer to structure used to retrieve statistics for an adapter. + * + * @return + * - 0: Success, retrieved successfully. + * - <0: Error code on failure. + */ +int __rte_experimental +rte_event_crypto_adapter_stats_get(uint8_t id, + struct rte_event_crypto_adapter_stats *stats); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reset statistics for an adapter. + * + * @param id + * Adapter identifier. + * + * @return + * - 0: Success, statistics reset successfully. + * - <0: Error code on failure. + */ +int __rte_experimental +rte_event_crypto_adapter_stats_reset(uint8_t id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve the service ID of an adapter. If the adapter doesn't use + * a rte_service function, this function returns -ESRCH. + * + * @param id + * Adapter identifier. + * + * @param [out] service_id + * A pointer to a uint32_t, to be filled in with the service id. + * + * @return + * - 0: Success + * - <0: Error code on failure, if the adapter doesn't use a rte_service + * function, this function returns -ESRCH. + */ +int __rte_experimental +rte_event_crypto_adapter_service_id_get(uint8_t id, uint32_t *service_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve the event port of an adapter. + * + * @param id + * Adapter identifier. + * + * @param [out] event_port_id + * Application links its event queue to this adapter port which is used + * in RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode. + * + * @return + * - 0: Success + * - <0: Error code on failure. + */ +int __rte_experimental +rte_event_crypto_adapter_event_port_get(uint8_t id, uint8_t *event_port_id); + +#ifdef __cplusplus +} +#endif +#endif /* _RTE_EVENT_CRYPTO_ADAPTER_ */ diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c index 9aece9f8..f5e5a0b5 100644 --- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c +++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c @@ -1,3 +1,12 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation. + * All rights reserved. + */ +#if defined(LINUX) +#include +#endif +#include + #include #include #include @@ -7,6 +16,7 @@ #include #include #include +#include #include "rte_eventdev.h" #include "rte_eventdev_pmd.h" @@ -20,6 +30,22 @@ #define ETH_RX_ADAPTER_MEM_NAME_LEN 32 #define RSS_KEY_SIZE 40 +/* value written to intr thread pipe to signal thread exit */ +#define ETH_BRIDGE_INTR_THREAD_EXIT 1 +/* Sentinel value to detect initialized file handle */ +#define INIT_FD -1 + +/* + * Used to store port and queue ID of interrupting Rx queue + */ +union queue_data { + RTE_STD_C11 + void *ptr; + struct { + uint16_t port; + uint16_t queue; + }; +}; /* * There is an instance of this struct per polled Rx queue added to the @@ -27,7 +53,7 @@ */ struct eth_rx_poll_entry { /* Eth port to poll */ - uint8_t eth_dev_id; + uint16_t eth_dev_id; /* Eth rx queue to poll */ uint16_t eth_rx_qid; }; @@ -71,6 +97,30 @@ struct rte_event_eth_rx_adapter { uint16_t enq_block_count; /* Block start ts */ uint64_t rx_enq_block_start_ts; + /* epoll fd used to wait for Rx interrupts */ + int epd; + /* Num of interrupt driven interrupt queues */ + uint32_t num_rx_intr; + /* Used to send of interrupting Rx queues from + * the interrupt thread to the Rx thread + */ + struct rte_ring *intr_ring; + /* Rx Queue data (dev id, queue id) for the last non-empty + * queue polled + */ + union queue_data qd; + /* queue_data is valid */ + int qd_valid; + /* Interrupt ring lock, synchronizes Rx thread + * and interrupt thread + */ + rte_spinlock_t intr_ring_lock; + /* event array passed to rte_poll_wait */ + struct rte_epoll_event *epoll_events; + /* Count of interrupt vectors in use */ + uint32_t num_intr_vec; + /* Thread blocked on Rx interrupts */ + pthread_t rx_intr_thread; /* Configuration callback for rte_service configuration */ rte_event_eth_rx_adapter_conf_cb conf_cb; /* Configuration callback argument */ @@ -87,12 +137,20 @@ struct rte_event_eth_rx_adapter { int socket_id; /* Per adapter EAL service */ uint32_t service_id; + /* Adapter started flag */ + uint8_t rxa_started; + /* Adapter ID */ + uint8_t id; } __rte_cache_aligned; /* Per eth device */ struct eth_device_info { struct rte_eth_dev *dev; struct eth_rx_queue_info *rx_queue; + /* Rx callback */ + rte_event_eth_rx_adapter_cb_fn cb_fn; + /* Rx callback argument */ + void *cb_arg; /* Set if ethdev->eventdev packet transfer uses a * hardware mechanism */ @@ -103,15 +161,42 @@ struct eth_device_info { * rx_adapter_stop callback needs to be invoked */ uint8_t dev_rx_started; - /* If nb_dev_queues > 0, the start callback will + /* Number of queues added for this device */ + uint16_t nb_dev_queues; + /* Number of poll based queues + * If nb_rx_poll > 0, the start callback will * be invoked if not already invoked */ - uint16_t nb_dev_queues; + uint16_t nb_rx_poll; + /* Number of interrupt based queues + * If nb_rx_intr > 0, the start callback will + * be invoked if not already invoked. + */ + uint16_t nb_rx_intr; + /* Number of queues that use the shared interrupt */ + uint16_t nb_shared_intr; + /* sum(wrr(q)) for all queues within the device + * useful when deleting all device queues + */ + uint32_t wrr_len; + /* Intr based queue index to start polling from, this is used + * if the number of shared interrupts is non-zero + */ + uint16_t next_q_idx; + /* Intr based queue indices */ + uint16_t *intr_queue; + /* device generates per Rx queue interrupt for queue index + * for queue indices < RTE_MAX_RXTX_INTR_VEC_ID - 1 + */ + int multi_intr_cap; + /* shared interrupt enabled */ + int shared_intr_enabled; }; /* Per Rx queue */ struct eth_rx_queue_info { int queue_enabled; /* True if added */ + int intr_enabled; uint16_t wt; /* Polling weight */ uint8_t event_queue_id; /* Event queue to enqueue packets to */ uint8_t sched_type; /* Sched type for events */ @@ -123,30 +208,30 @@ struct eth_rx_queue_info { static struct rte_event_eth_rx_adapter **event_eth_rx_adapter; static inline int -valid_id(uint8_t id) +rxa_validate_id(uint8_t id) { return id < RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE; } #define RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) do { \ - if (!valid_id(id)) { \ + if (!rxa_validate_id(id)) { \ RTE_EDEV_LOG_ERR("Invalid eth Rx adapter id = %d\n", id); \ return retval; \ } \ } while (0) static inline int -sw_rx_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter) +rxa_sw_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter) { - return rx_adapter->num_rx_polled; + return rx_adapter->num_rx_polled + rx_adapter->num_rx_intr; } /* Greatest common divisor */ -static uint16_t gcd_u16(uint16_t a, uint16_t b) +static uint16_t rxa_gcd_u16(uint16_t a, uint16_t b) { uint16_t r = a % b; - return r ? gcd_u16(b, r) : b; + return r ? rxa_gcd_u16(b, r) : b; } /* Returns the next queue in the polling sequence @@ -154,7 +239,7 @@ static uint16_t gcd_u16(uint16_t a, uint16_t b) * http://kb.linuxvirtualserver.org/wiki/Weighted_Round-Robin_Scheduling */ static int -wrr_next(struct rte_event_eth_rx_adapter *rx_adapter, +rxa_wrr_next(struct rte_event_eth_rx_adapter *rx_adapter, unsigned int n, int *cw, struct eth_rx_poll_entry *eth_rx_poll, uint16_t max_wt, uint16_t gcd, int prev) @@ -164,7 +249,7 @@ wrr_next(struct rte_event_eth_rx_adapter *rx_adapter, while (1) { uint16_t q; - uint8_t d; + uint16_t d; i = (i + 1) % n; if (i == 0) { @@ -182,13 +267,298 @@ wrr_next(struct rte_event_eth_rx_adapter *rx_adapter, } } -/* Precalculate WRR polling sequence for all queues in rx_adapter */ +static inline int +rxa_shared_intr(struct eth_device_info *dev_info, + int rx_queue_id) +{ + int multi_intr_cap; + + if (dev_info->dev->intr_handle == NULL) + return 0; + + multi_intr_cap = rte_intr_cap_multiple(dev_info->dev->intr_handle); + return !multi_intr_cap || + rx_queue_id >= RTE_MAX_RXTX_INTR_VEC_ID - 1; +} + +static inline int +rxa_intr_queue(struct eth_device_info *dev_info, + int rx_queue_id) +{ + struct eth_rx_queue_info *queue_info; + + queue_info = &dev_info->rx_queue[rx_queue_id]; + return dev_info->rx_queue && + !dev_info->internal_event_port && + queue_info->queue_enabled && queue_info->wt == 0; +} + +static inline int +rxa_polled_queue(struct eth_device_info *dev_info, + int rx_queue_id) +{ + struct eth_rx_queue_info *queue_info; + + queue_info = &dev_info->rx_queue[rx_queue_id]; + return !dev_info->internal_event_port && + dev_info->rx_queue && + queue_info->queue_enabled && queue_info->wt != 0; +} + +/* Calculate change in number of vectors after Rx queue ID is add/deleted */ +static int +rxa_nb_intr_vect(struct eth_device_info *dev_info, int rx_queue_id, int add) +{ + uint16_t i; + int n, s; + uint16_t nbq; + + nbq = dev_info->dev->data->nb_rx_queues; + n = 0; /* non shared count */ + s = 0; /* shared count */ + + if (rx_queue_id == -1) { + for (i = 0; i < nbq; i++) { + if (!rxa_shared_intr(dev_info, i)) + n += add ? !rxa_intr_queue(dev_info, i) : + rxa_intr_queue(dev_info, i); + else + s += add ? !rxa_intr_queue(dev_info, i) : + rxa_intr_queue(dev_info, i); + } + + if (s > 0) { + if ((add && dev_info->nb_shared_intr == 0) || + (!add && dev_info->nb_shared_intr)) + n += 1; + } + } else { + if (!rxa_shared_intr(dev_info, rx_queue_id)) + n = add ? !rxa_intr_queue(dev_info, rx_queue_id) : + rxa_intr_queue(dev_info, rx_queue_id); + else + n = add ? !dev_info->nb_shared_intr : + dev_info->nb_shared_intr == 1; + } + + return add ? n : -n; +} + +/* Calculate nb_rx_intr after deleting interrupt mode rx queues + */ +static void +rxa_calc_nb_post_intr_del(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int rx_queue_id, + uint32_t *nb_rx_intr) +{ + uint32_t intr_diff; + + if (rx_queue_id == -1) + intr_diff = dev_info->nb_rx_intr; + else + intr_diff = rxa_intr_queue(dev_info, rx_queue_id); + + *nb_rx_intr = rx_adapter->num_rx_intr - intr_diff; +} + +/* Calculate nb_rx_* after adding interrupt mode rx queues, newly added + * interrupt queues could currently be poll mode Rx queues + */ +static void +rxa_calc_nb_post_add_intr(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int rx_queue_id, + uint32_t *nb_rx_poll, + uint32_t *nb_rx_intr, + uint32_t *nb_wrr) +{ + uint32_t intr_diff; + uint32_t poll_diff; + uint32_t wrr_len_diff; + + if (rx_queue_id == -1) { + intr_diff = dev_info->dev->data->nb_rx_queues - + dev_info->nb_rx_intr; + poll_diff = dev_info->nb_rx_poll; + wrr_len_diff = dev_info->wrr_len; + } else { + intr_diff = !rxa_intr_queue(dev_info, rx_queue_id); + poll_diff = rxa_polled_queue(dev_info, rx_queue_id); + wrr_len_diff = poll_diff ? dev_info->rx_queue[rx_queue_id].wt : + 0; + } + + *nb_rx_intr = rx_adapter->num_rx_intr + intr_diff; + *nb_rx_poll = rx_adapter->num_rx_polled - poll_diff; + *nb_wrr = rx_adapter->wrr_len - wrr_len_diff; +} + +/* Calculate size of the eth_rx_poll and wrr_sched arrays + * after deleting poll mode rx queues + */ +static void +rxa_calc_nb_post_poll_del(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int rx_queue_id, + uint32_t *nb_rx_poll, + uint32_t *nb_wrr) +{ + uint32_t poll_diff; + uint32_t wrr_len_diff; + + if (rx_queue_id == -1) { + poll_diff = dev_info->nb_rx_poll; + wrr_len_diff = dev_info->wrr_len; + } else { + poll_diff = rxa_polled_queue(dev_info, rx_queue_id); + wrr_len_diff = poll_diff ? dev_info->rx_queue[rx_queue_id].wt : + 0; + } + + *nb_rx_poll = rx_adapter->num_rx_polled - poll_diff; + *nb_wrr = rx_adapter->wrr_len - wrr_len_diff; +} + +/* Calculate nb_rx_* after adding poll mode rx queues + */ +static void +rxa_calc_nb_post_add_poll(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int rx_queue_id, + uint16_t wt, + uint32_t *nb_rx_poll, + uint32_t *nb_rx_intr, + uint32_t *nb_wrr) +{ + uint32_t intr_diff; + uint32_t poll_diff; + uint32_t wrr_len_diff; + + if (rx_queue_id == -1) { + intr_diff = dev_info->nb_rx_intr; + poll_diff = dev_info->dev->data->nb_rx_queues - + dev_info->nb_rx_poll; + wrr_len_diff = wt*dev_info->dev->data->nb_rx_queues + - dev_info->wrr_len; + } else { + intr_diff = rxa_intr_queue(dev_info, rx_queue_id); + poll_diff = !rxa_polled_queue(dev_info, rx_queue_id); + wrr_len_diff = rxa_polled_queue(dev_info, rx_queue_id) ? + wt - dev_info->rx_queue[rx_queue_id].wt : + wt; + } + + *nb_rx_poll = rx_adapter->num_rx_polled + poll_diff; + *nb_rx_intr = rx_adapter->num_rx_intr - intr_diff; + *nb_wrr = rx_adapter->wrr_len + wrr_len_diff; +} + +/* Calculate nb_rx_* after adding rx_queue_id */ +static void +rxa_calc_nb_post_add(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int rx_queue_id, + uint16_t wt, + uint32_t *nb_rx_poll, + uint32_t *nb_rx_intr, + uint32_t *nb_wrr) +{ + if (wt != 0) + rxa_calc_nb_post_add_poll(rx_adapter, dev_info, rx_queue_id, + wt, nb_rx_poll, nb_rx_intr, nb_wrr); + else + rxa_calc_nb_post_add_intr(rx_adapter, dev_info, rx_queue_id, + nb_rx_poll, nb_rx_intr, nb_wrr); +} + +/* Calculate nb_rx_* after deleting rx_queue_id */ +static void +rxa_calc_nb_post_del(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int rx_queue_id, + uint32_t *nb_rx_poll, + uint32_t *nb_rx_intr, + uint32_t *nb_wrr) +{ + rxa_calc_nb_post_poll_del(rx_adapter, dev_info, rx_queue_id, nb_rx_poll, + nb_wrr); + rxa_calc_nb_post_intr_del(rx_adapter, dev_info, rx_queue_id, + nb_rx_intr); +} + +/* + * Allocate the rx_poll array + */ +static struct eth_rx_poll_entry * +rxa_alloc_poll(struct rte_event_eth_rx_adapter *rx_adapter, + uint32_t num_rx_polled) +{ + size_t len; + + len = RTE_ALIGN(num_rx_polled * sizeof(*rx_adapter->eth_rx_poll), + RTE_CACHE_LINE_SIZE); + return rte_zmalloc_socket(rx_adapter->mem_name, + len, + RTE_CACHE_LINE_SIZE, + rx_adapter->socket_id); +} + +/* + * Allocate the WRR array + */ +static uint32_t * +rxa_alloc_wrr(struct rte_event_eth_rx_adapter *rx_adapter, int nb_wrr) +{ + size_t len; + + len = RTE_ALIGN(nb_wrr * sizeof(*rx_adapter->wrr_sched), + RTE_CACHE_LINE_SIZE); + return rte_zmalloc_socket(rx_adapter->mem_name, + len, + RTE_CACHE_LINE_SIZE, + rx_adapter->socket_id); +} + static int -eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter) +rxa_alloc_poll_arrays(struct rte_event_eth_rx_adapter *rx_adapter, + uint32_t nb_poll, + uint32_t nb_wrr, + struct eth_rx_poll_entry **rx_poll, + uint32_t **wrr_sched) +{ + + if (nb_poll == 0) { + *rx_poll = NULL; + *wrr_sched = NULL; + return 0; + } + + *rx_poll = rxa_alloc_poll(rx_adapter, nb_poll); + if (*rx_poll == NULL) { + *wrr_sched = NULL; + return -ENOMEM; + } + + *wrr_sched = rxa_alloc_wrr(rx_adapter, nb_wrr); + if (*wrr_sched == NULL) { + rte_free(*rx_poll); + return -ENOMEM; + } + return 0; +} + +/* Precalculate WRR polling sequence for all queues in rx_adapter */ +static void +rxa_calc_wrr_sequence(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_rx_poll_entry *rx_poll, + uint32_t *rx_wrr) { - uint8_t d; + uint16_t d; uint16_t q; unsigned int i; + int prev = -1; + int cw = -1; /* Initialize variables for calculation of wrr schedule */ uint16_t max_wrr_pos = 0; @@ -196,79 +566,52 @@ eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter) uint16_t max_wt = 0; uint16_t gcd = 0; - struct eth_rx_poll_entry *rx_poll = NULL; - uint32_t *rx_wrr = NULL; + if (rx_poll == NULL) + return; - if (rx_adapter->num_rx_polled) { - size_t len = RTE_ALIGN(rx_adapter->num_rx_polled * - sizeof(*rx_adapter->eth_rx_poll), - RTE_CACHE_LINE_SIZE); - rx_poll = rte_zmalloc_socket(rx_adapter->mem_name, - len, - RTE_CACHE_LINE_SIZE, - rx_adapter->socket_id); - if (rx_poll == NULL) - return -ENOMEM; + /* Generate array of all queues to poll, the size of this + * array is poll_q + */ + RTE_ETH_FOREACH_DEV(d) { + uint16_t nb_rx_queues; + struct eth_device_info *dev_info = + &rx_adapter->eth_devices[d]; + nb_rx_queues = dev_info->dev->data->nb_rx_queues; + if (dev_info->rx_queue == NULL) + continue; + if (dev_info->internal_event_port) + continue; + dev_info->wrr_len = 0; + for (q = 0; q < nb_rx_queues; q++) { + struct eth_rx_queue_info *queue_info = + &dev_info->rx_queue[q]; + uint16_t wt; - /* Generate array of all queues to poll, the size of this - * array is poll_q - */ - for (d = 0; d < rte_eth_dev_count(); d++) { - uint16_t nb_rx_queues; - struct eth_device_info *dev_info = - &rx_adapter->eth_devices[d]; - nb_rx_queues = dev_info->dev->data->nb_rx_queues; - if (dev_info->rx_queue == NULL) + if (!rxa_polled_queue(dev_info, q)) continue; - for (q = 0; q < nb_rx_queues; q++) { - struct eth_rx_queue_info *queue_info = - &dev_info->rx_queue[q]; - if (queue_info->queue_enabled == 0) - continue; - - uint16_t wt = queue_info->wt; - rx_poll[poll_q].eth_dev_id = d; - rx_poll[poll_q].eth_rx_qid = q; - max_wrr_pos += wt; - max_wt = RTE_MAX(max_wt, wt); - gcd = (gcd) ? gcd_u16(gcd, wt) : wt; - poll_q++; - } - } - - len = RTE_ALIGN(max_wrr_pos * sizeof(*rx_wrr), - RTE_CACHE_LINE_SIZE); - rx_wrr = rte_zmalloc_socket(rx_adapter->mem_name, - len, - RTE_CACHE_LINE_SIZE, - rx_adapter->socket_id); - if (rx_wrr == NULL) { - rte_free(rx_poll); - return -ENOMEM; - } - - /* Generate polling sequence based on weights */ - int prev = -1; - int cw = -1; - for (i = 0; i < max_wrr_pos; i++) { - rx_wrr[i] = wrr_next(rx_adapter, poll_q, &cw, - rx_poll, max_wt, gcd, prev); - prev = rx_wrr[i]; + wt = queue_info->wt; + rx_poll[poll_q].eth_dev_id = d; + rx_poll[poll_q].eth_rx_qid = q; + max_wrr_pos += wt; + dev_info->wrr_len += wt; + max_wt = RTE_MAX(max_wt, wt); + gcd = (gcd) ? rxa_gcd_u16(gcd, wt) : wt; + poll_q++; } } - rte_free(rx_adapter->eth_rx_poll); - rte_free(rx_adapter->wrr_sched); - - rx_adapter->eth_rx_poll = rx_poll; - rx_adapter->wrr_sched = rx_wrr; - rx_adapter->wrr_len = max_wrr_pos; - - return 0; + /* Generate polling sequence based on weights */ + prev = -1; + cw = -1; + for (i = 0; i < max_wrr_pos; i++) { + rx_wrr[i] = rxa_wrr_next(rx_adapter, poll_q, &cw, + rx_poll, max_wt, gcd, prev); + prev = rx_wrr[i]; + } } static inline void -mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr, +rxa_mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr, struct ipv6_hdr **ipv6_hdr) { struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); @@ -307,7 +650,7 @@ mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr, /* Calculate RSS hash for IPv4/6 */ static inline uint32_t -do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be) +rxa_do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be) { uint32_t input_len; void *tuple; @@ -316,7 +659,7 @@ do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be) struct ipv4_hdr *ipv4_hdr; struct ipv6_hdr *ipv6_hdr; - mtoip(m, &ipv4_hdr, &ipv6_hdr); + rxa_mtoip(m, &ipv4_hdr, &ipv6_hdr); if (ipv4_hdr) { ipv4_tuple.src_addr = rte_be_to_cpu_32(ipv4_hdr->src_addr); @@ -335,13 +678,13 @@ do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be) } static inline int -rx_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter) +rxa_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter) { return !!rx_adapter->enq_block_count; } static inline void -rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter) +rxa_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter) { if (rx_adapter->rx_enq_block_start_ts) return; @@ -354,13 +697,13 @@ rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter) } static inline void -rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter, +rxa_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter, struct rte_event_eth_rx_adapter_stats *stats) { if (unlikely(!stats->rx_enq_start_ts)) stats->rx_enq_start_ts = rte_get_tsc_cycles(); - if (likely(!rx_enq_blocked(rx_adapter))) + if (likely(!rxa_enq_blocked(rx_adapter))) return; rx_adapter->enq_block_count = 0; @@ -376,8 +719,8 @@ rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter, * this function */ static inline void -buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter, - struct rte_event *ev) +rxa_buffer_event(struct rte_event_eth_rx_adapter *rx_adapter, + struct rte_event *ev) { struct rte_eth_event_enqueue_buffer *buf = &rx_adapter->event_enqueue_buffer; @@ -386,7 +729,7 @@ buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter, /* Enqueue buffered events to event device */ static inline uint16_t -flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter) +rxa_flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter) { struct rte_eth_event_enqueue_buffer *buf = &rx_adapter->event_enqueue_buffer; @@ -403,8 +746,8 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter) stats->rx_enq_retry++; } - n ? rx_enq_block_end_ts(rx_adapter, stats) : - rx_enq_block_start_ts(rx_adapter); + n ? rxa_enq_block_end_ts(rx_adapter, stats) : + rxa_enq_block_start_ts(rx_adapter); buf->count -= n; stats->rx_enq_count += n; @@ -413,18 +756,19 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter) } static inline void -fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter, - uint8_t dev_id, - uint16_t rx_queue_id, - struct rte_mbuf **mbufs, - uint16_t num) +rxa_buffer_mbufs(struct rte_event_eth_rx_adapter *rx_adapter, + uint16_t eth_dev_id, + uint16_t rx_queue_id, + struct rte_mbuf **mbufs, + uint16_t num) { uint32_t i; - struct eth_device_info *eth_device_info = - &rx_adapter->eth_devices[dev_id]; + struct eth_device_info *dev_info = + &rx_adapter->eth_devices[eth_dev_id]; struct eth_rx_queue_info *eth_rx_queue_info = - ð_device_info->rx_queue[rx_queue_id]; - + &dev_info->rx_queue[rx_queue_id]; + struct rte_eth_event_enqueue_buffer *buf = + &rx_adapter->event_enqueue_buffer; int32_t qid = eth_rx_queue_info->event_queue_id; uint8_t sched_type = eth_rx_queue_info->sched_type; uint8_t priority = eth_rx_queue_info->priority; @@ -434,22 +778,48 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter, uint32_t rss_mask; uint32_t rss; int do_rss; + uint64_t ts; + struct rte_mbuf *cb_mbufs[BATCH_SIZE]; + uint16_t nb_cb; /* 0xffff ffff if PKT_RX_RSS_HASH is set, otherwise 0 */ rss_mask = ~(((m->ol_flags & PKT_RX_RSS_HASH) != 0) - 1); do_rss = !rss_mask && !eth_rx_queue_info->flow_id_mask; + if ((m->ol_flags & PKT_RX_TIMESTAMP) == 0) { + ts = rte_get_tsc_cycles(); + for (i = 0; i < num; i++) { + m = mbufs[i]; + + m->timestamp = ts; + m->ol_flags |= PKT_RX_TIMESTAMP; + } + } + + + nb_cb = dev_info->cb_fn ? dev_info->cb_fn(eth_dev_id, rx_queue_id, + ETH_EVENT_BUFFER_SIZE, + buf->count, mbufs, + num, + dev_info->cb_arg, + cb_mbufs) : + num; + if (nb_cb < num) { + mbufs = cb_mbufs; + num = nb_cb; + } + for (i = 0; i < num; i++) { m = mbufs[i]; struct rte_event *ev = &events[i]; rss = do_rss ? - do_softrss(m, rx_adapter->rss_key_be) : m->hash.rss; + rxa_do_softrss(m, rx_adapter->rss_key_be) : + m->hash.rss; flow_id = eth_rx_queue_info->flow_id & eth_rx_queue_info->flow_id_mask; flow_id |= rss & ~eth_rx_queue_info->flow_id_mask; - ev->flow_id = flow_id; ev->op = RTE_EVENT_OP_NEW; ev->sched_type = sched_type; @@ -459,10 +829,277 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter, ev->priority = priority; ev->mbuf = m; - buf_event_enqueue(rx_adapter, ev); + rxa_buffer_event(rx_adapter, ev); } } +/* Enqueue packets from to event buffer */ +static inline uint32_t +rxa_eth_rx(struct rte_event_eth_rx_adapter *rx_adapter, + uint16_t port_id, + uint16_t queue_id, + uint32_t rx_count, + uint32_t max_rx, + int *rxq_empty) +{ + struct rte_mbuf *mbufs[BATCH_SIZE]; + struct rte_eth_event_enqueue_buffer *buf = + &rx_adapter->event_enqueue_buffer; + struct rte_event_eth_rx_adapter_stats *stats = + &rx_adapter->stats; + uint16_t n; + uint32_t nb_rx = 0; + + if (rxq_empty) + *rxq_empty = 0; + /* Don't do a batch dequeue from the rx queue if there isn't + * enough space in the enqueue buffer. + */ + while (BATCH_SIZE <= (RTE_DIM(buf->events) - buf->count)) { + if (buf->count >= BATCH_SIZE) + rxa_flush_event_buffer(rx_adapter); + + stats->rx_poll_count++; + n = rte_eth_rx_burst(port_id, queue_id, mbufs, BATCH_SIZE); + if (unlikely(!n)) { + if (rxq_empty) + *rxq_empty = 1; + break; + } + rxa_buffer_mbufs(rx_adapter, port_id, queue_id, mbufs, n); + nb_rx += n; + if (rx_count + nb_rx > max_rx) + break; + } + + if (buf->count >= BATCH_SIZE) + rxa_flush_event_buffer(rx_adapter); + + return nb_rx; +} + +static inline void +rxa_intr_ring_enqueue(struct rte_event_eth_rx_adapter *rx_adapter, + void *data) +{ + uint16_t port_id; + uint16_t queue; + int err; + union queue_data qd; + struct eth_device_info *dev_info; + struct eth_rx_queue_info *queue_info; + int *intr_enabled; + + qd.ptr = data; + port_id = qd.port; + queue = qd.queue; + + dev_info = &rx_adapter->eth_devices[port_id]; + queue_info = &dev_info->rx_queue[queue]; + rte_spinlock_lock(&rx_adapter->intr_ring_lock); + if (rxa_shared_intr(dev_info, queue)) + intr_enabled = &dev_info->shared_intr_enabled; + else + intr_enabled = &queue_info->intr_enabled; + + if (*intr_enabled) { + *intr_enabled = 0; + err = rte_ring_enqueue(rx_adapter->intr_ring, data); + /* Entry should always be available. + * The ring size equals the maximum number of interrupt + * vectors supported (an interrupt vector is shared in + * case of shared interrupts) + */ + if (err) + RTE_EDEV_LOG_ERR("Failed to enqueue interrupt" + " to ring: %s", strerror(err)); + else + rte_eth_dev_rx_intr_disable(port_id, queue); + } + rte_spinlock_unlock(&rx_adapter->intr_ring_lock); +} + +static int +rxa_intr_ring_check_avail(struct rte_event_eth_rx_adapter *rx_adapter, + uint32_t num_intr_vec) +{ + if (rx_adapter->num_intr_vec + num_intr_vec > + RTE_EVENT_ETH_INTR_RING_SIZE) { + RTE_EDEV_LOG_ERR("Exceeded intr ring slots current" + " %d needed %d limit %d", rx_adapter->num_intr_vec, + num_intr_vec, RTE_EVENT_ETH_INTR_RING_SIZE); + return -ENOSPC; + } + + return 0; +} + +/* Delete entries for (dev, queue) from the interrupt ring */ +static void +rxa_intr_ring_del_entries(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + uint16_t rx_queue_id) +{ + int i, n; + union queue_data qd; + + rte_spinlock_lock(&rx_adapter->intr_ring_lock); + + n = rte_ring_count(rx_adapter->intr_ring); + for (i = 0; i < n; i++) { + rte_ring_dequeue(rx_adapter->intr_ring, &qd.ptr); + if (!rxa_shared_intr(dev_info, rx_queue_id)) { + if (qd.port == dev_info->dev->data->port_id && + qd.queue == rx_queue_id) + continue; + } else { + if (qd.port == dev_info->dev->data->port_id) + continue; + } + rte_ring_enqueue(rx_adapter->intr_ring, qd.ptr); + } + + rte_spinlock_unlock(&rx_adapter->intr_ring_lock); +} + +/* pthread callback handling interrupt mode receive queues + * After receiving an Rx interrupt, it enqueues the port id and queue id of the + * interrupting queue to the adapter's ring buffer for interrupt events. + * These events are picked up by rxa_intr_ring_dequeue() which is invoked from + * the adapter service function. + */ +static void * +rxa_intr_thread(void *arg) +{ + struct rte_event_eth_rx_adapter *rx_adapter = arg; + struct rte_epoll_event *epoll_events = rx_adapter->epoll_events; + int n, i; + + while (1) { + n = rte_epoll_wait(rx_adapter->epd, epoll_events, + RTE_EVENT_ETH_INTR_RING_SIZE, -1); + if (unlikely(n < 0)) + RTE_EDEV_LOG_ERR("rte_epoll_wait returned error %d", + n); + for (i = 0; i < n; i++) { + rxa_intr_ring_enqueue(rx_adapter, + epoll_events[i].epdata.data); + } + } + + return NULL; +} + +/* Dequeue from interrupt ring and enqueue received + * mbufs to eventdev + */ +static inline uint32_t +rxa_intr_ring_dequeue(struct rte_event_eth_rx_adapter *rx_adapter) +{ + uint32_t n; + uint32_t nb_rx = 0; + int rxq_empty; + struct rte_eth_event_enqueue_buffer *buf; + rte_spinlock_t *ring_lock; + uint8_t max_done = 0; + + if (rx_adapter->num_rx_intr == 0) + return 0; + + if (rte_ring_count(rx_adapter->intr_ring) == 0 + && !rx_adapter->qd_valid) + return 0; + + buf = &rx_adapter->event_enqueue_buffer; + ring_lock = &rx_adapter->intr_ring_lock; + + if (buf->count >= BATCH_SIZE) + rxa_flush_event_buffer(rx_adapter); + + while (BATCH_SIZE <= (RTE_DIM(buf->events) - buf->count)) { + struct eth_device_info *dev_info; + uint16_t port; + uint16_t queue; + union queue_data qd = rx_adapter->qd; + int err; + + if (!rx_adapter->qd_valid) { + struct eth_rx_queue_info *queue_info; + + rte_spinlock_lock(ring_lock); + err = rte_ring_dequeue(rx_adapter->intr_ring, &qd.ptr); + if (err) { + rte_spinlock_unlock(ring_lock); + break; + } + + port = qd.port; + queue = qd.queue; + rx_adapter->qd = qd; + rx_adapter->qd_valid = 1; + dev_info = &rx_adapter->eth_devices[port]; + if (rxa_shared_intr(dev_info, queue)) + dev_info->shared_intr_enabled = 1; + else { + queue_info = &dev_info->rx_queue[queue]; + queue_info->intr_enabled = 1; + } + rte_eth_dev_rx_intr_enable(port, queue); + rte_spinlock_unlock(ring_lock); + } else { + port = qd.port; + queue = qd.queue; + + dev_info = &rx_adapter->eth_devices[port]; + } + + if (rxa_shared_intr(dev_info, queue)) { + uint16_t i; + uint16_t nb_queues; + + nb_queues = dev_info->dev->data->nb_rx_queues; + n = 0; + for (i = dev_info->next_q_idx; i < nb_queues; i++) { + uint8_t enq_buffer_full; + + if (!rxa_intr_queue(dev_info, i)) + continue; + n = rxa_eth_rx(rx_adapter, port, i, nb_rx, + rx_adapter->max_nb_rx, + &rxq_empty); + nb_rx += n; + + enq_buffer_full = !rxq_empty && n == 0; + max_done = nb_rx > rx_adapter->max_nb_rx; + + if (enq_buffer_full || max_done) { + dev_info->next_q_idx = i; + goto done; + } + } + + rx_adapter->qd_valid = 0; + + /* Reinitialize for next interrupt */ + dev_info->next_q_idx = dev_info->multi_intr_cap ? + RTE_MAX_RXTX_INTR_VEC_ID - 1 : + 0; + } else { + n = rxa_eth_rx(rx_adapter, port, queue, nb_rx, + rx_adapter->max_nb_rx, + &rxq_empty); + rx_adapter->qd_valid = !rxq_empty; + nb_rx += n; + if (nb_rx > rx_adapter->max_nb_rx) + break; + } + } + +done: + rx_adapter->stats.rx_intr_packets += nb_rx; + return nb_rx; +} + /* * Polls receive queues added to the event adapter and enqueues received * packets to the event device. @@ -477,12 +1114,10 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter, * it. */ static inline uint32_t -eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter) +rxa_poll(struct rte_event_eth_rx_adapter *rx_adapter) { uint32_t num_queue; - uint16_t n; uint32_t nb_rx = 0; - struct rte_mbuf *mbufs[BATCH_SIZE]; struct rte_eth_event_enqueue_buffer *buf; uint32_t wrr_pos; uint32_t max_nb_rx; @@ -490,142 +1125,483 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter) wrr_pos = rx_adapter->wrr_pos; max_nb_rx = rx_adapter->max_nb_rx; buf = &rx_adapter->event_enqueue_buffer; - struct rte_event_eth_rx_adapter_stats *stats = &rx_adapter->stats; + stats = &rx_adapter->stats; /* Iterate through a WRR sequence */ for (num_queue = 0; num_queue < rx_adapter->wrr_len; num_queue++) { unsigned int poll_idx = rx_adapter->wrr_sched[wrr_pos]; uint16_t qid = rx_adapter->eth_rx_poll[poll_idx].eth_rx_qid; - uint8_t d = rx_adapter->eth_rx_poll[poll_idx].eth_dev_id; + uint16_t d = rx_adapter->eth_rx_poll[poll_idx].eth_dev_id; /* Don't do a batch dequeue from the rx queue if there isn't * enough space in the enqueue buffer. */ if (buf->count >= BATCH_SIZE) - flush_event_buffer(rx_adapter); - if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) + rxa_flush_event_buffer(rx_adapter); + if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) { + rx_adapter->wrr_pos = wrr_pos; + return nb_rx; + } + + nb_rx += rxa_eth_rx(rx_adapter, d, qid, nb_rx, max_nb_rx, + NULL); + if (nb_rx > max_nb_rx) { + rx_adapter->wrr_pos = + (wrr_pos + 1) % rx_adapter->wrr_len; break; + } + + if (++wrr_pos == rx_adapter->wrr_len) + wrr_pos = 0; + } + return nb_rx; +} + +static int +rxa_service_func(void *args) +{ + struct rte_event_eth_rx_adapter *rx_adapter = args; + struct rte_event_eth_rx_adapter_stats *stats; + + if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0) + return 0; + if (!rx_adapter->rxa_started) { + return 0; + rte_spinlock_unlock(&rx_adapter->rx_lock); + } + + stats = &rx_adapter->stats; + stats->rx_packets += rxa_intr_ring_dequeue(rx_adapter); + stats->rx_packets += rxa_poll(rx_adapter); + rte_spinlock_unlock(&rx_adapter->rx_lock); + return 0; +} + +static int +rte_event_eth_rx_adapter_init(void) +{ + const char *name = "rte_event_eth_rx_adapter_array"; + const struct rte_memzone *mz; + unsigned int sz; + + sz = sizeof(*event_eth_rx_adapter) * + RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE; + sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE); + + mz = rte_memzone_lookup(name); + if (mz == NULL) { + mz = rte_memzone_reserve_aligned(name, sz, rte_socket_id(), 0, + RTE_CACHE_LINE_SIZE); + if (mz == NULL) { + RTE_EDEV_LOG_ERR("failed to reserve memzone err = %" + PRId32, rte_errno); + return -rte_errno; + } + } + + event_eth_rx_adapter = mz->addr; + return 0; +} + +static inline struct rte_event_eth_rx_adapter * +rxa_id_to_adapter(uint8_t id) +{ + return event_eth_rx_adapter ? + event_eth_rx_adapter[id] : NULL; +} + +static int +rxa_default_conf_cb(uint8_t id, uint8_t dev_id, + struct rte_event_eth_rx_adapter_conf *conf, void *arg) +{ + int ret; + struct rte_eventdev *dev; + struct rte_event_dev_config dev_conf; + int started; + uint8_t port_id; + struct rte_event_port_conf *port_conf = arg; + struct rte_event_eth_rx_adapter *rx_adapter = rxa_id_to_adapter(id); + + dev = &rte_eventdevs[rx_adapter->eventdev_id]; + dev_conf = dev->data->dev_conf; + + started = dev->data->dev_started; + if (started) + rte_event_dev_stop(dev_id); + port_id = dev_conf.nb_event_ports; + dev_conf.nb_event_ports += 1; + ret = rte_event_dev_configure(dev_id, &dev_conf); + if (ret) { + RTE_EDEV_LOG_ERR("failed to configure event dev %u\n", + dev_id); + if (started) { + if (rte_event_dev_start(dev_id)) + return -EIO; + } + return ret; + } + + ret = rte_event_port_setup(dev_id, port_id, port_conf); + if (ret) { + RTE_EDEV_LOG_ERR("failed to setup event port %u\n", + port_id); + return ret; + } + + conf->event_port_id = port_id; + conf->max_nb_rx = 128; + if (started) + ret = rte_event_dev_start(dev_id); + rx_adapter->default_cb_arg = 1; + return ret; +} + +static int +rxa_epoll_create1(void) +{ +#if defined(LINUX) + int fd; + fd = epoll_create1(EPOLL_CLOEXEC); + return fd < 0 ? -errno : fd; +#elif defined(BSD) + return -ENOTSUP; +#endif +} + +static int +rxa_init_epd(struct rte_event_eth_rx_adapter *rx_adapter) +{ + if (rx_adapter->epd != INIT_FD) + return 0; + + rx_adapter->epd = rxa_epoll_create1(); + if (rx_adapter->epd < 0) { + int err = rx_adapter->epd; + rx_adapter->epd = INIT_FD; + RTE_EDEV_LOG_ERR("epoll_create1() failed, err %d", err); + return err; + } + + return 0; +} + +static int +rxa_create_intr_thread(struct rte_event_eth_rx_adapter *rx_adapter) +{ + int err; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (rx_adapter->intr_ring) + return 0; + + rx_adapter->intr_ring = rte_ring_create("intr_ring", + RTE_EVENT_ETH_INTR_RING_SIZE, + rte_socket_id(), 0); + if (!rx_adapter->intr_ring) + return -ENOMEM; + + rx_adapter->epoll_events = rte_zmalloc_socket(rx_adapter->mem_name, + RTE_EVENT_ETH_INTR_RING_SIZE * + sizeof(struct rte_epoll_event), + RTE_CACHE_LINE_SIZE, + rx_adapter->socket_id); + if (!rx_adapter->epoll_events) { + err = -ENOMEM; + goto error; + } + + rte_spinlock_init(&rx_adapter->intr_ring_lock); + + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "rx-intr-thread-%d", rx_adapter->id); + + err = rte_ctrl_thread_create(&rx_adapter->rx_intr_thread, thread_name, + NULL, rxa_intr_thread, rx_adapter); + if (!err) { + rte_thread_setname(rx_adapter->rx_intr_thread, thread_name); + return 0; + } + + RTE_EDEV_LOG_ERR("Failed to create interrupt thread err = %d\n", err); +error: + rte_ring_free(rx_adapter->intr_ring); + rx_adapter->intr_ring = NULL; + rx_adapter->epoll_events = NULL; + return err; +} + +static int +rxa_destroy_intr_thread(struct rte_event_eth_rx_adapter *rx_adapter) +{ + int err; + + err = pthread_cancel(rx_adapter->rx_intr_thread); + if (err) + RTE_EDEV_LOG_ERR("Can't cancel interrupt thread err = %d\n", + err); + + err = pthread_join(rx_adapter->rx_intr_thread, NULL); + if (err) + RTE_EDEV_LOG_ERR("Can't join interrupt thread err = %d\n", err); + + rte_free(rx_adapter->epoll_events); + rte_ring_free(rx_adapter->intr_ring); + rx_adapter->intr_ring = NULL; + rx_adapter->epoll_events = NULL; + return 0; +} + +static int +rxa_free_intr_resources(struct rte_event_eth_rx_adapter *rx_adapter) +{ + int ret; - stats->rx_poll_count++; - n = rte_eth_rx_burst(d, qid, mbufs, BATCH_SIZE); + if (rx_adapter->num_rx_intr == 0) + return 0; - if (n) { - stats->rx_packets += n; - /* The check before rte_eth_rx_burst() ensures that - * all n mbufs can be buffered - */ - fill_event_buffer(rx_adapter, d, qid, mbufs, n); - nb_rx += n; - if (nb_rx > max_nb_rx) { - rx_adapter->wrr_pos = - (wrr_pos + 1) % rx_adapter->wrr_len; - return nb_rx; - } - } + ret = rxa_destroy_intr_thread(rx_adapter); + if (ret) + return ret; - if (++wrr_pos == rx_adapter->wrr_len) - wrr_pos = 0; - } + close(rx_adapter->epd); + rx_adapter->epd = INIT_FD; - return nb_rx; + return ret; } static int -event_eth_rx_adapter_service_func(void *args) +rxa_disable_intr(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + uint16_t rx_queue_id) { - struct rte_event_eth_rx_adapter *rx_adapter = args; - struct rte_eth_event_enqueue_buffer *buf; + int err; + uint16_t eth_dev_id = dev_info->dev->data->port_id; + int sintr = rxa_shared_intr(dev_info, rx_queue_id); + + err = rte_eth_dev_rx_intr_disable(eth_dev_id, rx_queue_id); + if (err) { + RTE_EDEV_LOG_ERR("Could not disable interrupt for Rx queue %u", + rx_queue_id); + return err; + } - buf = &rx_adapter->event_enqueue_buffer; - if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0) - return 0; - if (eth_rx_poll(rx_adapter) == 0 && buf->count) - flush_event_buffer(rx_adapter); - rte_spinlock_unlock(&rx_adapter->rx_lock); - return 0; + err = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id, + rx_adapter->epd, + RTE_INTR_EVENT_DEL, + 0); + if (err) + RTE_EDEV_LOG_ERR("Interrupt event deletion failed %d", err); + + if (sintr) + dev_info->rx_queue[rx_queue_id].intr_enabled = 0; + else + dev_info->shared_intr_enabled = 0; + return err; } static int -rte_event_eth_rx_adapter_init(void) +rxa_del_intr_queue(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int rx_queue_id) { - const char *name = "rte_event_eth_rx_adapter_array"; - const struct rte_memzone *mz; - unsigned int sz; + int err; + int i; + int s; - sz = sizeof(*event_eth_rx_adapter) * - RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE; - sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE); + if (dev_info->nb_rx_intr == 0) + return 0; - mz = rte_memzone_lookup(name); - if (mz == NULL) { - mz = rte_memzone_reserve_aligned(name, sz, rte_socket_id(), 0, - RTE_CACHE_LINE_SIZE); - if (mz == NULL) { - RTE_EDEV_LOG_ERR("failed to reserve memzone err = %" - PRId32, rte_errno); - return -rte_errno; + err = 0; + if (rx_queue_id == -1) { + s = dev_info->nb_shared_intr; + for (i = 0; i < dev_info->nb_rx_intr; i++) { + int sintr; + uint16_t q; + + q = dev_info->intr_queue[i]; + sintr = rxa_shared_intr(dev_info, q); + s -= sintr; + + if (!sintr || s == 0) { + + err = rxa_disable_intr(rx_adapter, dev_info, + q); + if (err) + return err; + rxa_intr_ring_del_entries(rx_adapter, dev_info, + q); + } + } + } else { + if (!rxa_intr_queue(dev_info, rx_queue_id)) + return 0; + if (!rxa_shared_intr(dev_info, rx_queue_id) || + dev_info->nb_shared_intr == 1) { + err = rxa_disable_intr(rx_adapter, dev_info, + rx_queue_id); + if (err) + return err; + rxa_intr_ring_del_entries(rx_adapter, dev_info, + rx_queue_id); + } + + for (i = 0; i < dev_info->nb_rx_intr; i++) { + if (dev_info->intr_queue[i] == rx_queue_id) { + for (; i < dev_info->nb_rx_intr - 1; i++) + dev_info->intr_queue[i] = + dev_info->intr_queue[i + 1]; + break; + } } } - event_eth_rx_adapter = mz->addr; - return 0; + return err; } -static inline struct rte_event_eth_rx_adapter * -id_to_rx_adapter(uint8_t id) +static int +rxa_config_intr(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + uint16_t rx_queue_id) { - return event_eth_rx_adapter ? - event_eth_rx_adapter[id] : NULL; + int err, err1; + uint16_t eth_dev_id = dev_info->dev->data->port_id; + union queue_data qd; + int init_fd; + uint16_t *intr_queue; + int sintr = rxa_shared_intr(dev_info, rx_queue_id); + + if (rxa_intr_queue(dev_info, rx_queue_id)) + return 0; + + intr_queue = dev_info->intr_queue; + if (dev_info->intr_queue == NULL) { + size_t len = + dev_info->dev->data->nb_rx_queues * sizeof(uint16_t); + dev_info->intr_queue = + rte_zmalloc_socket( + rx_adapter->mem_name, + len, + 0, + rx_adapter->socket_id); + if (dev_info->intr_queue == NULL) + return -ENOMEM; + } + + init_fd = rx_adapter->epd; + err = rxa_init_epd(rx_adapter); + if (err) + goto err_free_queue; + + qd.port = eth_dev_id; + qd.queue = rx_queue_id; + + err = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id, + rx_adapter->epd, + RTE_INTR_EVENT_ADD, + qd.ptr); + if (err) { + RTE_EDEV_LOG_ERR("Failed to add interrupt event for" + " Rx Queue %u err %d", rx_queue_id, err); + goto err_del_fd; + } + + err = rte_eth_dev_rx_intr_enable(eth_dev_id, rx_queue_id); + if (err) { + RTE_EDEV_LOG_ERR("Could not enable interrupt for" + " Rx Queue %u err %d", rx_queue_id, err); + + goto err_del_event; + } + + err = rxa_create_intr_thread(rx_adapter); + if (!err) { + if (sintr) + dev_info->shared_intr_enabled = 1; + else + dev_info->rx_queue[rx_queue_id].intr_enabled = 1; + return 0; + } + + + err = rte_eth_dev_rx_intr_disable(eth_dev_id, rx_queue_id); + if (err) + RTE_EDEV_LOG_ERR("Could not disable interrupt for" + " Rx Queue %u err %d", rx_queue_id, err); +err_del_event: + err1 = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id, + rx_adapter->epd, + RTE_INTR_EVENT_DEL, + 0); + if (err1) { + RTE_EDEV_LOG_ERR("Could not delete event for" + " Rx Queue %u err %d", rx_queue_id, err1); + } +err_del_fd: + if (init_fd == INIT_FD) { + close(rx_adapter->epd); + rx_adapter->epd = -1; + } +err_free_queue: + if (intr_queue == NULL) + rte_free(dev_info->intr_queue); + + return err; } static int -default_conf_cb(uint8_t id, uint8_t dev_id, - struct rte_event_eth_rx_adapter_conf *conf, void *arg) +rxa_add_intr_queue(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int rx_queue_id) + { - int ret; - struct rte_eventdev *dev; - struct rte_event_dev_config dev_conf; - int started; - uint8_t port_id; - struct rte_event_port_conf *port_conf = arg; - struct rte_event_eth_rx_adapter *rx_adapter = id_to_rx_adapter(id); + int i, j, err; + int si = -1; + int shared_done = (dev_info->nb_shared_intr > 0); + + if (rx_queue_id != -1) { + if (rxa_shared_intr(dev_info, rx_queue_id) && shared_done) + return 0; + return rxa_config_intr(rx_adapter, dev_info, rx_queue_id); + } - dev = &rte_eventdevs[rx_adapter->eventdev_id]; - dev_conf = dev->data->dev_conf; + err = 0; + for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) { - started = dev->data->dev_started; - if (started) - rte_event_dev_stop(dev_id); - port_id = dev_conf.nb_event_ports; - dev_conf.nb_event_ports += 1; - ret = rte_event_dev_configure(dev_id, &dev_conf); - if (ret) { - RTE_EDEV_LOG_ERR("failed to configure event dev %u\n", - dev_id); - if (started) { - if (rte_event_dev_start(dev_id)) - return -EIO; + if (rxa_shared_intr(dev_info, i) && shared_done) + continue; + + err = rxa_config_intr(rx_adapter, dev_info, i); + + shared_done = err == 0 && rxa_shared_intr(dev_info, i); + if (shared_done) { + si = i; + dev_info->shared_intr_enabled = 1; } - return ret; + if (err) + break; } - ret = rte_event_port_setup(dev_id, port_id, port_conf); - if (ret) { - RTE_EDEV_LOG_ERR("failed to setup event port %u\n", - port_id); - return ret; + if (err == 0) + return 0; + + shared_done = (dev_info->nb_shared_intr > 0); + for (j = 0; j < i; j++) { + if (rxa_intr_queue(dev_info, j)) + continue; + if (rxa_shared_intr(dev_info, j) && si != j) + continue; + err = rxa_disable_intr(rx_adapter, dev_info, j); + if (err) + break; + } - conf->event_port_id = port_id; - conf->max_nb_rx = 128; - if (started) - ret = rte_event_dev_start(dev_id); - rx_adapter->default_cb_arg = 1; - return ret; + return err; } + static int -init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id) +rxa_init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id) { int ret; struct rte_service_spec service; @@ -638,7 +1614,7 @@ init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id) snprintf(service.name, ETH_RX_ADAPTER_SERVICE_NAME_LEN, "rte_event_eth_rx_adapter_%d", id); service.socket_id = rx_adapter->socket_id; - service.callback = event_eth_rx_adapter_service_func; + service.callback = rxa_service_func; service.callback_userdata = rx_adapter; /* Service function handles locking for queue add/del updates */ service.capabilities = RTE_SERVICE_CAP_MT_SAFE; @@ -659,6 +1635,7 @@ init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id) rx_adapter->event_port_id = rx_adapter_conf.event_port_id; rx_adapter->max_nb_rx = rx_adapter_conf.max_nb_rx; rx_adapter->service_inited = 1; + rx_adapter->epd = INIT_FD; return 0; err_done: @@ -666,9 +1643,8 @@ err_done: return ret; } - static void -update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter, +rxa_update_queue(struct rte_event_eth_rx_adapter *rx_adapter, struct eth_device_info *dev_info, int32_t rx_queue_id, uint8_t add) @@ -682,7 +1658,7 @@ update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter, if (rx_queue_id == -1) { for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) - update_queue_info(rx_adapter, dev_info, i, add); + rxa_update_queue(rx_adapter, dev_info, i, add); } else { queue_info = &dev_info->rx_queue[rx_queue_id]; enabled = queue_info->queue_enabled; @@ -697,31 +1673,65 @@ update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter, } } -static int -event_eth_rx_adapter_queue_del(struct rte_event_eth_rx_adapter *rx_adapter, - struct eth_device_info *dev_info, - uint16_t rx_queue_id) +static void +rxa_sw_del(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int32_t rx_queue_id) { - struct eth_rx_queue_info *queue_info; + int pollq; + int intrq; + int sintrq; + if (rx_adapter->nb_queues == 0) - return 0; + return; - queue_info = &dev_info->rx_queue[rx_queue_id]; - rx_adapter->num_rx_polled -= queue_info->queue_enabled; - update_queue_info(rx_adapter, dev_info, rx_queue_id, 0); - return 0; + if (rx_queue_id == -1) { + uint16_t nb_rx_queues; + uint16_t i; + + nb_rx_queues = dev_info->dev->data->nb_rx_queues; + for (i = 0; i < nb_rx_queues; i++) + rxa_sw_del(rx_adapter, dev_info, i); + return; + } + + pollq = rxa_polled_queue(dev_info, rx_queue_id); + intrq = rxa_intr_queue(dev_info, rx_queue_id); + sintrq = rxa_shared_intr(dev_info, rx_queue_id); + rxa_update_queue(rx_adapter, dev_info, rx_queue_id, 0); + rx_adapter->num_rx_polled -= pollq; + dev_info->nb_rx_poll -= pollq; + rx_adapter->num_rx_intr -= intrq; + dev_info->nb_rx_intr -= intrq; + dev_info->nb_shared_intr -= intrq && sintrq; } static void -event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter, - struct eth_device_info *dev_info, - uint16_t rx_queue_id, - const struct rte_event_eth_rx_adapter_queue_conf *conf) - +rxa_add_queue(struct rte_event_eth_rx_adapter *rx_adapter, + struct eth_device_info *dev_info, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *conf) { struct eth_rx_queue_info *queue_info; const struct rte_event *ev = &conf->ev; + int pollq; + int intrq; + int sintrq; + + if (rx_queue_id == -1) { + uint16_t nb_rx_queues; + uint16_t i; + + nb_rx_queues = dev_info->dev->data->nb_rx_queues; + for (i = 0; i < nb_rx_queues; i++) + rxa_add_queue(rx_adapter, dev_info, i, conf); + return; + } + + pollq = rxa_polled_queue(dev_info, rx_queue_id); + intrq = rxa_intr_queue(dev_info, rx_queue_id); + sintrq = rxa_shared_intr(dev_info, rx_queue_id); queue_info = &dev_info->rx_queue[rx_queue_id]; queue_info->event_queue_id = ev->queue_id; @@ -735,69 +1745,162 @@ event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter, queue_info->flow_id_mask = ~0; } - /* The same queue can be added more than once */ - rx_adapter->num_rx_polled += !queue_info->queue_enabled; - update_queue_info(rx_adapter, dev_info, rx_queue_id, 1); + rxa_update_queue(rx_adapter, dev_info, rx_queue_id, 1); + if (rxa_polled_queue(dev_info, rx_queue_id)) { + rx_adapter->num_rx_polled += !pollq; + dev_info->nb_rx_poll += !pollq; + rx_adapter->num_rx_intr -= intrq; + dev_info->nb_rx_intr -= intrq; + dev_info->nb_shared_intr -= intrq && sintrq; + } + + if (rxa_intr_queue(dev_info, rx_queue_id)) { + rx_adapter->num_rx_polled -= pollq; + dev_info->nb_rx_poll -= pollq; + rx_adapter->num_rx_intr += !intrq; + dev_info->nb_rx_intr += !intrq; + dev_info->nb_shared_intr += !intrq && sintrq; + if (dev_info->nb_shared_intr == 1) { + if (dev_info->multi_intr_cap) + dev_info->next_q_idx = + RTE_MAX_RXTX_INTR_VEC_ID - 1; + else + dev_info->next_q_idx = 0; + } + } } -static int add_rx_queue(struct rte_event_eth_rx_adapter *rx_adapter, - uint8_t eth_dev_id, +static int rxa_sw_add(struct rte_event_eth_rx_adapter *rx_adapter, + uint16_t eth_dev_id, int rx_queue_id, const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) { struct eth_device_info *dev_info = &rx_adapter->eth_devices[eth_dev_id]; struct rte_event_eth_rx_adapter_queue_conf temp_conf; - uint32_t i; int ret; + struct eth_rx_poll_entry *rx_poll; + struct eth_rx_queue_info *rx_queue; + uint32_t *rx_wrr; + uint16_t nb_rx_queues; + uint32_t nb_rx_poll, nb_wrr; + uint32_t nb_rx_intr; + int num_intr_vec; + uint16_t wt; if (queue_conf->servicing_weight == 0) { - struct rte_eth_dev_data *data = dev_info->dev->data; - if (data->dev_conf.intr_conf.rxq) { - RTE_EDEV_LOG_ERR("Interrupt driven queues" - " not supported"); - return -ENOTSUP; - } - temp_conf = *queue_conf; - /* If Rx interrupts are disabled set wt = 1 */ - temp_conf.servicing_weight = 1; + temp_conf = *queue_conf; + if (!data->dev_conf.intr_conf.rxq) { + /* If Rx interrupts are disabled set wt = 1 */ + temp_conf.servicing_weight = 1; + } queue_conf = &temp_conf; } + nb_rx_queues = dev_info->dev->data->nb_rx_queues; + rx_queue = dev_info->rx_queue; + wt = queue_conf->servicing_weight; + if (dev_info->rx_queue == NULL) { dev_info->rx_queue = rte_zmalloc_socket(rx_adapter->mem_name, - dev_info->dev->data->nb_rx_queues * + nb_rx_queues * sizeof(struct eth_rx_queue_info), 0, rx_adapter->socket_id); if (dev_info->rx_queue == NULL) return -ENOMEM; } + rx_wrr = NULL; + rx_poll = NULL; - if (rx_queue_id == -1) { - for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) - event_eth_rx_adapter_queue_add(rx_adapter, - dev_info, i, - queue_conf); + rxa_calc_nb_post_add(rx_adapter, dev_info, rx_queue_id, + queue_conf->servicing_weight, + &nb_rx_poll, &nb_rx_intr, &nb_wrr); + + if (dev_info->dev->intr_handle) + dev_info->multi_intr_cap = + rte_intr_cap_multiple(dev_info->dev->intr_handle); + + ret = rxa_alloc_poll_arrays(rx_adapter, nb_rx_poll, nb_wrr, + &rx_poll, &rx_wrr); + if (ret) + goto err_free_rxqueue; + + if (wt == 0) { + num_intr_vec = rxa_nb_intr_vect(dev_info, rx_queue_id, 1); + + ret = rxa_intr_ring_check_avail(rx_adapter, num_intr_vec); + if (ret) + goto err_free_rxqueue; + + ret = rxa_add_intr_queue(rx_adapter, dev_info, rx_queue_id); + if (ret) + goto err_free_rxqueue; } else { - event_eth_rx_adapter_queue_add(rx_adapter, dev_info, - (uint16_t)rx_queue_id, - queue_conf); + + num_intr_vec = 0; + if (rx_adapter->num_rx_intr > nb_rx_intr) { + num_intr_vec = rxa_nb_intr_vect(dev_info, + rx_queue_id, 0); + /* interrupt based queues are being converted to + * poll mode queues, delete the interrupt configuration + * for those. + */ + ret = rxa_del_intr_queue(rx_adapter, + dev_info, rx_queue_id); + if (ret) + goto err_free_rxqueue; + } } - ret = eth_poll_wrr_calc(rx_adapter); - if (ret) { - event_eth_rx_adapter_queue_del(rx_adapter, - dev_info, rx_queue_id); - return ret; + if (nb_rx_intr == 0) { + ret = rxa_free_intr_resources(rx_adapter); + if (ret) + goto err_free_rxqueue; } - return ret; + if (wt == 0) { + uint16_t i; + + if (rx_queue_id == -1) { + for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) + dev_info->intr_queue[i] = i; + } else { + if (!rxa_intr_queue(dev_info, rx_queue_id)) + dev_info->intr_queue[nb_rx_intr - 1] = + rx_queue_id; + } + } + + + + rxa_add_queue(rx_adapter, dev_info, rx_queue_id, queue_conf); + rxa_calc_wrr_sequence(rx_adapter, rx_poll, rx_wrr); + + rte_free(rx_adapter->eth_rx_poll); + rte_free(rx_adapter->wrr_sched); + + rx_adapter->eth_rx_poll = rx_poll; + rx_adapter->wrr_sched = rx_wrr; + rx_adapter->wrr_len = nb_wrr; + rx_adapter->num_intr_vec += num_intr_vec; + return 0; + +err_free_rxqueue: + if (rx_queue == NULL) { + rte_free(dev_info->rx_queue); + dev_info->rx_queue = NULL; + } + + rte_free(rx_poll); + rte_free(rx_wrr); + + return 0; } static int -rx_adapter_ctrl(uint8_t id, int start) +rxa_ctrl(uint8_t id, int start) { struct rte_event_eth_rx_adapter *rx_adapter; struct rte_eventdev *dev; @@ -807,13 +1910,13 @@ rx_adapter_ctrl(uint8_t id, int start) int stop = !start; RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); - rx_adapter = id_to_rx_adapter(id); + rx_adapter = rxa_id_to_adapter(id); if (rx_adapter == NULL) return -EINVAL; dev = &rte_eventdevs[rx_adapter->eventdev_id]; - for (i = 0; i < rte_eth_dev_count(); i++) { + RTE_ETH_FOREACH_DEV(i) { dev_info = &rx_adapter->eth_devices[i]; /* if start check for num dev queues */ if (start && !dev_info->nb_dev_queues) @@ -831,8 +1934,12 @@ rx_adapter_ctrl(uint8_t id, int start) &rte_eth_devices[i]); } - if (use_service) + if (use_service) { + rte_spinlock_lock(&rx_adapter->rx_lock); + rx_adapter->rxa_started = start; rte_service_runstate_set(rx_adapter->service_id, start); + rte_spinlock_unlock(&rx_adapter->rx_lock); + } return 0; } @@ -845,7 +1952,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id, struct rte_event_eth_rx_adapter *rx_adapter; int ret; int socket_id; - uint8_t i; + uint16_t i; char mem_name[ETH_RX_ADAPTER_SERVICE_NAME_LEN]; const uint8_t default_rss_key[] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, @@ -866,7 +1973,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id, return ret; } - rx_adapter = id_to_rx_adapter(id); + rx_adapter = rxa_id_to_adapter(id); if (rx_adapter != NULL) { RTE_EDEV_LOG_ERR("Eth Rx adapter exists id = %" PRIu8, id); return -EEXIST; @@ -888,9 +1995,11 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id, rx_adapter->socket_id = socket_id; rx_adapter->conf_cb = conf_cb; rx_adapter->conf_arg = conf_arg; + rx_adapter->id = id; strcpy(rx_adapter->mem_name, mem_name); rx_adapter->eth_devices = rte_zmalloc_socket(rx_adapter->mem_name, - rte_eth_dev_count() * + /* FIXME: incompatible with hotplug */ + rte_eth_dev_count_total() * sizeof(struct eth_device_info), 0, socket_id); rte_convert_rss_key((const uint32_t *)default_rss_key, @@ -903,11 +2012,11 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id, return -ENOMEM; } rte_spinlock_init(&rx_adapter->rx_lock); - for (i = 0; i < rte_eth_dev_count(); i++) + RTE_ETH_FOREACH_DEV(i) rx_adapter->eth_devices[i].dev = &rte_eth_devices[i]; event_eth_rx_adapter[id] = rx_adapter; - if (conf_cb == default_conf_cb) + if (conf_cb == rxa_default_conf_cb) rx_adapter->default_cb_arg = 1; return 0; } @@ -928,7 +2037,7 @@ rte_event_eth_rx_adapter_create(uint8_t id, uint8_t dev_id, return -ENOMEM; *pc = *port_config; ret = rte_event_eth_rx_adapter_create_ext(id, dev_id, - default_conf_cb, + rxa_default_conf_cb, pc); if (ret) rte_free(pc); @@ -942,7 +2051,7 @@ rte_event_eth_rx_adapter_free(uint8_t id) RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); - rx_adapter = id_to_rx_adapter(id); + rx_adapter = rxa_id_to_adapter(id); if (rx_adapter == NULL) return -EINVAL; @@ -963,7 +2072,7 @@ rte_event_eth_rx_adapter_free(uint8_t id) int rte_event_eth_rx_adapter_queue_add(uint8_t id, - uint8_t eth_dev_id, + uint16_t eth_dev_id, int32_t rx_queue_id, const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) { @@ -972,12 +2081,11 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, struct rte_event_eth_rx_adapter *rx_adapter; struct rte_eventdev *dev; struct eth_device_info *dev_info; - int start_service; RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL); - rx_adapter = id_to_rx_adapter(id); + rx_adapter = rxa_id_to_adapter(id); if ((rx_adapter == NULL) || (queue_conf == NULL)) return -EINVAL; @@ -987,7 +2095,7 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, &cap); if (ret) { RTE_EDEV_LOG_ERR("Failed to get adapter caps edev %" PRIu8 - "eth port %" PRIu8, id, eth_dev_id); + "eth port %" PRIu16, id, eth_dev_id); return ret; } @@ -995,7 +2103,7 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, && (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)) { RTE_EDEV_LOG_ERR("Flow ID override is not supported," - " eth port: %" PRIu8 " adapter id: %" PRIu8, + " eth port: %" PRIu16 " adapter id: %" PRIu8, eth_dev_id, id); return -EINVAL; } @@ -1003,7 +2111,8 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, if ((cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ) == 0 && (rx_queue_id != -1)) { RTE_EDEV_LOG_ERR("Rx queues can only be connected to single " - "event queue id %u eth port %u", id, eth_dev_id); + "event queue, eth port: %" PRIu16 " adapter id: %" + PRIu8, eth_dev_id, id); return -EINVAL; } @@ -1014,7 +2123,6 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, return -EINVAL; } - start_service = 0; dev_info = &rx_adapter->eth_devices[eth_dev_id]; if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) { @@ -1034,33 +2142,34 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, &rte_eth_devices[eth_dev_id], rx_queue_id, queue_conf); if (ret == 0) { - update_queue_info(rx_adapter, + dev_info->internal_event_port = 1; + rxa_update_queue(rx_adapter, &rx_adapter->eth_devices[eth_dev_id], rx_queue_id, 1); } } else { rte_spinlock_lock(&rx_adapter->rx_lock); - ret = init_service(rx_adapter, id); - if (ret == 0) - ret = add_rx_queue(rx_adapter, eth_dev_id, rx_queue_id, + dev_info->internal_event_port = 0; + ret = rxa_init_service(rx_adapter, id); + if (ret == 0) { + uint32_t service_id = rx_adapter->service_id; + ret = rxa_sw_add(rx_adapter, eth_dev_id, rx_queue_id, queue_conf); + rte_service_component_runstate_set(service_id, + rxa_sw_adapter_queue_count(rx_adapter)); + } rte_spinlock_unlock(&rx_adapter->rx_lock); - if (ret == 0) - start_service = !!sw_rx_adapter_queue_count(rx_adapter); } if (ret) return ret; - if (start_service) - rte_service_component_runstate_set(rx_adapter->service_id, 1); - return 0; } int -rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id, +rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id, int32_t rx_queue_id) { int ret = 0; @@ -1068,12 +2177,17 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id, struct rte_event_eth_rx_adapter *rx_adapter; struct eth_device_info *dev_info; uint32_t cap; - uint16_t i; + uint32_t nb_rx_poll = 0; + uint32_t nb_wrr = 0; + uint32_t nb_rx_intr; + struct eth_rx_poll_entry *rx_poll = NULL; + uint32_t *rx_wrr = NULL; + int num_intr_vec; RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL); - rx_adapter = id_to_rx_adapter(id); + rx_adapter = rxa_id_to_adapter(id); if (rx_adapter == NULL) return -EINVAL; @@ -1100,7 +2214,7 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id, &rte_eth_devices[eth_dev_id], rx_queue_id); if (ret == 0) { - update_queue_info(rx_adapter, + rxa_update_queue(rx_adapter, &rx_adapter->eth_devices[eth_dev_id], rx_queue_id, 0); @@ -1110,48 +2224,78 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id, } } } else { - int rc; + rxa_calc_nb_post_del(rx_adapter, dev_info, rx_queue_id, + &nb_rx_poll, &nb_rx_intr, &nb_wrr); + + ret = rxa_alloc_poll_arrays(rx_adapter, nb_rx_poll, nb_wrr, + &rx_poll, &rx_wrr); + if (ret) + return ret; + rte_spinlock_lock(&rx_adapter->rx_lock); - if (rx_queue_id == -1) { - for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) - event_eth_rx_adapter_queue_del(rx_adapter, - dev_info, - i); - } else { - event_eth_rx_adapter_queue_del(rx_adapter, - dev_info, - (uint16_t)rx_queue_id); + + num_intr_vec = 0; + if (rx_adapter->num_rx_intr > nb_rx_intr) { + + num_intr_vec = rxa_nb_intr_vect(dev_info, + rx_queue_id, 0); + ret = rxa_del_intr_queue(rx_adapter, dev_info, + rx_queue_id); + if (ret) + goto unlock_ret; + } + + if (nb_rx_intr == 0) { + ret = rxa_free_intr_resources(rx_adapter); + if (ret) + goto unlock_ret; + } + + rxa_sw_del(rx_adapter, dev_info, rx_queue_id); + rxa_calc_wrr_sequence(rx_adapter, rx_poll, rx_wrr); + + rte_free(rx_adapter->eth_rx_poll); + rte_free(rx_adapter->wrr_sched); + + if (nb_rx_intr == 0) { + rte_free(dev_info->intr_queue); + dev_info->intr_queue = NULL; } - rc = eth_poll_wrr_calc(rx_adapter); - if (rc) - RTE_EDEV_LOG_ERR("WRR recalculation failed %" PRId32, - rc); + rx_adapter->eth_rx_poll = rx_poll; + rx_adapter->wrr_sched = rx_wrr; + rx_adapter->wrr_len = nb_wrr; + rx_adapter->num_intr_vec += num_intr_vec; if (dev_info->nb_dev_queues == 0) { rte_free(dev_info->rx_queue); dev_info->rx_queue = NULL; } - +unlock_ret: rte_spinlock_unlock(&rx_adapter->rx_lock); + if (ret) { + rte_free(rx_poll); + rte_free(rx_wrr); + return ret; + } + rte_service_component_runstate_set(rx_adapter->service_id, - sw_rx_adapter_queue_count(rx_adapter)); + rxa_sw_adapter_queue_count(rx_adapter)); } return ret; } - int rte_event_eth_rx_adapter_start(uint8_t id) { - return rx_adapter_ctrl(id, 1); + return rxa_ctrl(id, 1); } int rte_event_eth_rx_adapter_stop(uint8_t id) { - return rx_adapter_ctrl(id, 0); + return rxa_ctrl(id, 0); } int @@ -1168,13 +2312,13 @@ rte_event_eth_rx_adapter_stats_get(uint8_t id, RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); - rx_adapter = id_to_rx_adapter(id); + rx_adapter = rxa_id_to_adapter(id); if (rx_adapter == NULL || stats == NULL) return -EINVAL; dev = &rte_eventdevs[rx_adapter->eventdev_id]; memset(stats, 0, sizeof(*stats)); - for (i = 0; i < rte_eth_dev_count(); i++) { + RTE_ETH_FOREACH_DEV(i) { dev_info = &rx_adapter->eth_devices[i]; if (dev_info->internal_event_port == 0 || dev->dev_ops->eth_rx_adapter_stats_get == NULL) @@ -1206,12 +2350,12 @@ rte_event_eth_rx_adapter_stats_reset(uint8_t id) RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); - rx_adapter = id_to_rx_adapter(id); + rx_adapter = rxa_id_to_adapter(id); if (rx_adapter == NULL) return -EINVAL; dev = &rte_eventdevs[rx_adapter->eventdev_id]; - for (i = 0; i < rte_eth_dev_count(); i++) { + RTE_ETH_FOREACH_DEV(i) { dev_info = &rx_adapter->eth_devices[i]; if (dev_info->internal_event_port == 0 || dev->dev_ops->eth_rx_adapter_stats_reset == NULL) @@ -1231,7 +2375,7 @@ rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id) RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); - rx_adapter = id_to_rx_adapter(id); + rx_adapter = rxa_id_to_adapter(id); if (rx_adapter == NULL || service_id == NULL) return -EINVAL; @@ -1240,3 +2384,47 @@ rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id) return rx_adapter->service_inited ? 0 : -ESRCH; } + +int rte_event_eth_rx_adapter_cb_register(uint8_t id, + uint16_t eth_dev_id, + rte_event_eth_rx_adapter_cb_fn cb_fn, + void *cb_arg) +{ + struct rte_event_eth_rx_adapter *rx_adapter; + struct eth_device_info *dev_info; + uint32_t cap; + int ret; + + RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL); + RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL); + + rx_adapter = rxa_id_to_adapter(id); + if (rx_adapter == NULL) + return -EINVAL; + + dev_info = &rx_adapter->eth_devices[eth_dev_id]; + if (dev_info->rx_queue == NULL) + return -EINVAL; + + ret = rte_event_eth_rx_adapter_caps_get(rx_adapter->eventdev_id, + eth_dev_id, + &cap); + if (ret) { + RTE_EDEV_LOG_ERR("Failed to get adapter caps edev %" PRIu8 + "eth port %" PRIu16, id, eth_dev_id); + return ret; + } + + if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) { + RTE_EDEV_LOG_ERR("Rx callback not supported for eth port %" + PRIu16, eth_dev_id); + return -EINVAL; + } + + rte_spinlock_lock(&rx_adapter->rx_lock); + dev_info->cb_fn = cb_fn; + dev_info->cb_arg = cb_arg; + rte_spinlock_unlock(&rx_adapter->rx_lock); + + return 0; +} diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.h b/lib/librte_eventdev/rte_event_eth_rx_adapter.h index c20507b2..332ee216 100644 --- a/lib/librte_eventdev/rte_event_eth_rx_adapter.h +++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.h @@ -1,32 +1,6 @@ -/* - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation. + * All rights reserved. */ #ifndef _RTE_EVENT_ETH_RX_ADAPTER_ @@ -47,7 +21,11 @@ * * The adapter uses a EAL service core function for SW based packet transfer * and uses the eventdev PMD functions to configure HW based packet transfer - * between the ethernet device and the event device. + * between the ethernet device and the event device. For SW based packet + * transfer, if the mbuf does not have a timestamp set, the adapter adds a + * timestamp to the mbuf using rte_get_tsc_cycles(), this provides a more + * accurate timestamp as compared to if the application were to set the time + * stamp since it avoids event device schedule latency. * * The ethernet Rx event adapter's functions are: * - rte_event_eth_rx_adapter_create_ext() @@ -85,7 +63,23 @@ * rte_event_eth_rx_adapter_service_id_get() function can be used to retrieve * the service function ID of the adapter in this case. * - * Note: Interrupt driven receive queues are currently unimplemented. + * For SW based packet transfers, i.e., when the + * RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT is not set in the adapter's + * capabilities flags for a particular ethernet device, the service function + * temporarily enqueues mbufs to an event buffer before batch enqueueing these + * to the event device. If the buffer fills up, the service function stops + * dequeueing packets from the ethernet device. The application may want to + * monitor the buffer fill level and instruct the service function to + * selectively buffer packets. The application may also use some other + * criteria to decide which packets should enter the event device even when + * the event buffer fill level is low. The + * rte_event_eth_rx_adapter_cb_register() function allows the + * application to register a callback that selects which packets to enqueue + * to the event device. + * + * Note: + * 1) Devices created after an instance of rte_event_eth_rx_adapter_create + * should be added to a new instance of the rx adapter. */ #ifdef __cplusplus @@ -218,8 +212,51 @@ struct rte_event_eth_rx_adapter_stats { * block cycles can be used to compute the percentage of * cycles the service is blocked by the event device. */ + uint64_t rx_intr_packets; + /**< Received packet count for interrupt mode Rx queues */ }; +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Callback function invoked by the SW adapter before it continues + * to process packets. The callback is passed the size of the enqueue + * buffer in the SW adapter and the occupancy of the buffer. The + * callback can use these values to decide which mbufs should be + * enqueued to the event device. If the return value of the callback + * is less than nb_mbuf then the SW adapter uses the return value to + * enqueue enq_mbuf[] to the event device. + * + * @param eth_dev_id + * Port identifier of the Ethernet device. + * @param queue_id + * Receive queue index. + * @param enqueue_buf_size + * Total enqueue buffer size. + * @param enqueue_buf_count + * mbuf count in enqueue buffer. + * @param mbuf + * mbuf array. + * @param nb_mbuf + * mbuf count. + * @param cb_arg + * Callback argument. + * @param[out] enq_mbuf + * The adapter enqueues enq_mbuf[] if the return value of the + * callback is less than nb_mbuf + * @return + * Returns the number of mbufs should be enqueued to eventdev + */ +typedef uint16_t (*rte_event_eth_rx_adapter_cb_fn)(uint16_t eth_dev_id, + uint16_t queue_id, + uint32_t enqueue_buf_size, + uint32_t enqueue_buf_count, + struct rte_mbuf **mbuf, + uint16_t nb_mbuf, + void *cb_arg, + struct rte_mbuf **enq_buf); + /** * @warning * @b EXPERIMENTAL: this API may change without prior notice @@ -329,7 +366,7 @@ int rte_event_eth_rx_adapter_free(uint8_t id); * combination of the two error codes. */ int rte_event_eth_rx_adapter_queue_add(uint8_t id, - uint8_t eth_dev_id, + uint16_t eth_dev_id, int32_t rx_queue_id, const struct rte_event_eth_rx_adapter_queue_conf *conf); @@ -357,7 +394,7 @@ int rte_event_eth_rx_adapter_queue_add(uint8_t id, * - 0: Success, Receive queue deleted correctly. * - <0: Error code on failure. */ -int rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id, +int rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id, int32_t rx_queue_id); /** @@ -444,6 +481,32 @@ int rte_event_eth_rx_adapter_stats_reset(uint8_t id); */ int rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Register callback to process Rx packets, this is supported for + * SW based packet transfers. + * @see rte_event_eth_rx_cb_fn + * + * @param id + * Adapter identifier. + * @param eth_dev_id + * Port identifier of Ethernet device. + * @param cb_fn + * Callback function. + * @param cb_arg + * Callback arg. + * @return + * - 0: Success + * - <0: Error code on failure. + */ +int __rte_experimental +rte_event_eth_rx_adapter_cb_register(uint8_t id, + uint16_t eth_dev_id, + rte_event_eth_rx_adapter_cb_fn cb_fn, + void *cb_arg); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eventdev/rte_event_ring.c b/lib/librte_eventdev/rte_event_ring.c index eb67751d..16d02a95 100644 --- a/lib/librte_eventdev/rte_event_ring.c +++ b/lib/librte_eventdev/rte_event_ring.c @@ -82,11 +82,16 @@ rte_event_ring_create(const char *name, unsigned int count, int socket_id, mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags); if (mz != NULL) { r = mz->addr; - /* - * no need to check return value here, we already checked the - * arguments above - */ - rte_event_ring_init(r, name, requested_count, flags); + /* Check return value in case rte_ring_init() fails on size */ + int err = rte_event_ring_init(r, name, requested_count, flags); + if (err) { + RTE_LOG(ERR, RING, "Ring init failed\n"); + if (rte_memzone_free(mz) != 0) + RTE_LOG(ERR, RING, "Cannot free memzone\n"); + rte_free(te); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return NULL; + } te->data = (void *) r; r->r.memzone = mz; diff --git a/lib/librte_eventdev/rte_event_ring.h b/lib/librte_eventdev/rte_event_ring.h index 29d4228a..827a3209 100644 --- a/lib/librte_eventdev/rte_event_ring.h +++ b/lib/librte_eventdev/rte_event_ring.h @@ -99,7 +99,7 @@ rte_event_ring_enqueue_burst(struct rte_event_ring *r, ENQUEUE_PTRS(&r->r, &r[1], prod_head, events, n, struct rte_event); - update_tail(&r->r.prod, prod_head, prod_next, 1, 1); + update_tail(&r->r.prod, prod_head, prod_next, r->r.prod.single, 1); end: if (free_space != NULL) *free_space = free_entries - n; @@ -140,7 +140,7 @@ rte_event_ring_dequeue_burst(struct rte_event_ring *r, DEQUEUE_PTRS(&r->r, &r[1], cons_head, events, n, struct rte_event); - update_tail(&r->r.cons, cons_head, cons_next, 1, 0); + update_tail(&r->r.cons, cons_head, cons_next, r->r.cons.single, 0); end: if (available != NULL) diff --git a/lib/librte_eventdev/rte_event_timer_adapter.c b/lib/librte_eventdev/rte_event_timer_adapter.c new file mode 100644 index 00000000..79070d48 --- /dev/null +++ b/lib/librte_eventdev/rte_event_timer_adapter.c @@ -0,0 +1,1299 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation. + * All rights reserved. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_eventdev.h" +#include "rte_eventdev_pmd.h" +#include "rte_event_timer_adapter.h" +#include "rte_event_timer_adapter_pmd.h" + +#define DATA_MZ_NAME_MAX_LEN 64 +#define DATA_MZ_NAME_FORMAT "rte_event_timer_adapter_data_%d" + +static int evtim_logtype; +static int evtim_svc_logtype; +static int evtim_buffer_logtype; + +static struct rte_event_timer_adapter adapters[RTE_EVENT_TIMER_ADAPTER_NUM_MAX]; + +static const struct rte_event_timer_adapter_ops sw_event_adapter_timer_ops; + +#define EVTIM_LOG(level, logtype, ...) \ + rte_log(RTE_LOG_ ## level, logtype, \ + RTE_FMT("EVTIMER: %s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) \ + "\n", __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,))) + +#define EVTIM_LOG_ERR(...) EVTIM_LOG(ERR, evtim_logtype, __VA_ARGS__) + +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG +#define EVTIM_LOG_DBG(...) \ + EVTIM_LOG(DEBUG, evtim_logtype, __VA_ARGS__) +#define EVTIM_BUF_LOG_DBG(...) \ + EVTIM_LOG(DEBUG, evtim_buffer_logtype, __VA_ARGS__) +#define EVTIM_SVC_LOG_DBG(...) \ + EVTIM_LOG(DEBUG, evtim_svc_logtype, __VA_ARGS__) +#else +#define EVTIM_LOG_DBG(...) (void)0 +#define EVTIM_BUF_LOG_DBG(...) (void)0 +#define EVTIM_SVC_LOG_DBG(...) (void)0 +#endif + +static int +default_port_conf_cb(uint16_t id, uint8_t event_dev_id, uint8_t *event_port_id, + void *conf_arg) +{ + struct rte_event_timer_adapter *adapter; + struct rte_eventdev *dev; + struct rte_event_dev_config dev_conf; + struct rte_event_port_conf *port_conf, def_port_conf = {0}; + int started; + uint8_t port_id; + uint8_t dev_id; + int ret; + + RTE_SET_USED(event_dev_id); + + adapter = &adapters[id]; + dev = &rte_eventdevs[adapter->data->event_dev_id]; + dev_id = dev->data->dev_id; + dev_conf = dev->data->dev_conf; + + started = dev->data->dev_started; + if (started) + rte_event_dev_stop(dev_id); + + port_id = dev_conf.nb_event_ports; + dev_conf.nb_event_ports += 1; + ret = rte_event_dev_configure(dev_id, &dev_conf); + if (ret < 0) { + EVTIM_LOG_ERR("failed to configure event dev %u\n", dev_id); + if (started) + if (rte_event_dev_start(dev_id)) + return -EIO; + + return ret; + } + + if (conf_arg != NULL) + port_conf = conf_arg; + else { + port_conf = &def_port_conf; + ret = rte_event_port_default_conf_get(dev_id, port_id, + port_conf); + if (ret < 0) + return ret; + } + + ret = rte_event_port_setup(dev_id, port_id, port_conf); + if (ret < 0) { + EVTIM_LOG_ERR("failed to setup event port %u on event dev %u\n", + port_id, dev_id); + return ret; + } + + *event_port_id = port_id; + + if (started) + ret = rte_event_dev_start(dev_id); + + return ret; +} + +struct rte_event_timer_adapter * __rte_experimental +rte_event_timer_adapter_create(const struct rte_event_timer_adapter_conf *conf) +{ + return rte_event_timer_adapter_create_ext(conf, default_port_conf_cb, + NULL); +} + +struct rte_event_timer_adapter * __rte_experimental +rte_event_timer_adapter_create_ext( + const struct rte_event_timer_adapter_conf *conf, + rte_event_timer_adapter_port_conf_cb_t conf_cb, + void *conf_arg) +{ + uint16_t adapter_id; + struct rte_event_timer_adapter *adapter; + const struct rte_memzone *mz; + char mz_name[DATA_MZ_NAME_MAX_LEN]; + int n, ret; + struct rte_eventdev *dev; + + if (conf == NULL) { + rte_errno = EINVAL; + return NULL; + } + + /* Check eventdev ID */ + if (!rte_event_pmd_is_valid_dev(conf->event_dev_id)) { + rte_errno = EINVAL; + return NULL; + } + dev = &rte_eventdevs[conf->event_dev_id]; + + adapter_id = conf->timer_adapter_id; + + /* Check that adapter_id is in range */ + if (adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) { + rte_errno = EINVAL; + return NULL; + } + + /* Check adapter ID not already allocated */ + adapter = &adapters[adapter_id]; + if (adapter->allocated) { + rte_errno = EEXIST; + return NULL; + } + + /* Create shared data area. */ + n = snprintf(mz_name, sizeof(mz_name), DATA_MZ_NAME_FORMAT, adapter_id); + if (n >= (int)sizeof(mz_name)) { + rte_errno = EINVAL; + return NULL; + } + mz = rte_memzone_reserve(mz_name, + sizeof(struct rte_event_timer_adapter_data), + conf->socket_id, 0); + if (mz == NULL) + /* rte_errno set by rte_memzone_reserve */ + return NULL; + + adapter->data = mz->addr; + memset(adapter->data, 0, sizeof(struct rte_event_timer_adapter_data)); + + adapter->data->mz = mz; + adapter->data->event_dev_id = conf->event_dev_id; + adapter->data->id = adapter_id; + adapter->data->socket_id = conf->socket_id; + adapter->data->conf = *conf; /* copy conf structure */ + + /* Query eventdev PMD for timer adapter capabilities and ops */ + ret = dev->dev_ops->timer_adapter_caps_get(dev, + adapter->data->conf.flags, + &adapter->data->caps, + &adapter->ops); + if (ret < 0) { + rte_errno = ret; + goto free_memzone; + } + + if (!(adapter->data->caps & + RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT)) { + FUNC_PTR_OR_NULL_RET_WITH_ERRNO(conf_cb, -EINVAL); + ret = conf_cb(adapter->data->id, adapter->data->event_dev_id, + &adapter->data->event_port_id, conf_arg); + if (ret < 0) { + rte_errno = ret; + goto free_memzone; + } + } + + /* If eventdev PMD did not provide ops, use default software + * implementation. + */ + if (adapter->ops == NULL) + adapter->ops = &sw_event_adapter_timer_ops; + + /* Allow driver to do some setup */ + FUNC_PTR_OR_NULL_RET_WITH_ERRNO(adapter->ops->init, -ENOTSUP); + ret = adapter->ops->init(adapter); + if (ret < 0) { + rte_errno = ret; + goto free_memzone; + } + + /* Set fast-path function pointers */ + adapter->arm_burst = adapter->ops->arm_burst; + adapter->arm_tmo_tick_burst = adapter->ops->arm_tmo_tick_burst; + adapter->cancel_burst = adapter->ops->cancel_burst; + + adapter->allocated = 1; + + return adapter; + +free_memzone: + rte_memzone_free(adapter->data->mz); + return NULL; +} + +int __rte_experimental +rte_event_timer_adapter_get_info(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_info *adapter_info) +{ + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + + if (adapter->ops->get_info) + /* let driver set values it knows */ + adapter->ops->get_info(adapter, adapter_info); + + /* Set common values */ + adapter_info->conf = adapter->data->conf; + adapter_info->event_dev_port_id = adapter->data->event_port_id; + adapter_info->caps = adapter->data->caps; + + return 0; +} + +int __rte_experimental +rte_event_timer_adapter_start(const struct rte_event_timer_adapter *adapter) +{ + int ret; + + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + FUNC_PTR_OR_ERR_RET(adapter->ops->start, -EINVAL); + + ret = adapter->ops->start(adapter); + if (ret < 0) + return ret; + + adapter->data->started = 1; + + return 0; +} + +int __rte_experimental +rte_event_timer_adapter_stop(const struct rte_event_timer_adapter *adapter) +{ + int ret; + + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + FUNC_PTR_OR_ERR_RET(adapter->ops->stop, -EINVAL); + + if (adapter->data->started == 0) { + EVTIM_LOG_ERR("event timer adapter %"PRIu8" already stopped", + adapter->data->id); + return 0; + } + + ret = adapter->ops->stop(adapter); + if (ret < 0) + return ret; + + adapter->data->started = 0; + + return 0; +} + +struct rte_event_timer_adapter * __rte_experimental +rte_event_timer_adapter_lookup(uint16_t adapter_id) +{ + char name[DATA_MZ_NAME_MAX_LEN]; + const struct rte_memzone *mz; + struct rte_event_timer_adapter_data *data; + struct rte_event_timer_adapter *adapter; + int ret; + struct rte_eventdev *dev; + + if (adapters[adapter_id].allocated) + return &adapters[adapter_id]; /* Adapter is already loaded */ + + snprintf(name, DATA_MZ_NAME_MAX_LEN, DATA_MZ_NAME_FORMAT, adapter_id); + mz = rte_memzone_lookup(name); + if (mz == NULL) { + rte_errno = ENOENT; + return NULL; + } + + data = mz->addr; + + adapter = &adapters[data->id]; + adapter->data = data; + + dev = &rte_eventdevs[adapter->data->event_dev_id]; + + /* Query eventdev PMD for timer adapter capabilities and ops */ + ret = dev->dev_ops->timer_adapter_caps_get(dev, + adapter->data->conf.flags, + &adapter->data->caps, + &adapter->ops); + if (ret < 0) { + rte_errno = EINVAL; + return NULL; + } + + /* If eventdev PMD did not provide ops, use default software + * implementation. + */ + if (adapter->ops == NULL) + adapter->ops = &sw_event_adapter_timer_ops; + + /* Set fast-path function pointers */ + adapter->arm_burst = adapter->ops->arm_burst; + adapter->arm_tmo_tick_burst = adapter->ops->arm_tmo_tick_burst; + adapter->cancel_burst = adapter->ops->cancel_burst; + + adapter->allocated = 1; + + return adapter; +} + +int __rte_experimental +rte_event_timer_adapter_free(struct rte_event_timer_adapter *adapter) +{ + int ret; + + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + FUNC_PTR_OR_ERR_RET(adapter->ops->uninit, -EINVAL); + + if (adapter->data->started == 1) { + EVTIM_LOG_ERR("event timer adapter %"PRIu8" must be stopped " + "before freeing", adapter->data->id); + return -EBUSY; + } + + /* free impl priv data */ + ret = adapter->ops->uninit(adapter); + if (ret < 0) + return ret; + + /* free shared data area */ + ret = rte_memzone_free(adapter->data->mz); + if (ret < 0) + return ret; + + adapter->data = NULL; + adapter->allocated = 0; + + return 0; +} + +int __rte_experimental +rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter, + uint32_t *service_id) +{ + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + + if (adapter->data->service_inited && service_id != NULL) + *service_id = adapter->data->service_id; + + return adapter->data->service_inited ? 0 : -ESRCH; +} + +int __rte_experimental +rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_stats *stats) +{ + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + FUNC_PTR_OR_ERR_RET(adapter->ops->stats_get, -EINVAL); + if (stats == NULL) + return -EINVAL; + + return adapter->ops->stats_get(adapter, stats); +} + +int __rte_experimental +rte_event_timer_adapter_stats_reset(struct rte_event_timer_adapter *adapter) +{ + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + FUNC_PTR_OR_ERR_RET(adapter->ops->stats_reset, -EINVAL); + return adapter->ops->stats_reset(adapter); +} + +/* + * Software event timer adapter buffer helper functions + */ + +#define NSECPERSEC 1E9 + +/* Optimizations used to index into the buffer require that the buffer size + * be a power of 2. + */ +#define EVENT_BUFFER_SZ 4096 +#define EVENT_BUFFER_BATCHSZ 32 +#define EVENT_BUFFER_MASK (EVENT_BUFFER_SZ - 1) + +struct event_buffer { + uint16_t head; + uint16_t tail; + struct rte_event events[EVENT_BUFFER_SZ]; +} __rte_cache_aligned; + +static inline bool +event_buffer_full(struct event_buffer *bufp) +{ + return (bufp->head - bufp->tail) == EVENT_BUFFER_SZ; +} + +static inline bool +event_buffer_batch_ready(struct event_buffer *bufp) +{ + return (bufp->head - bufp->tail) >= EVENT_BUFFER_BATCHSZ; +} + +static void +event_buffer_init(struct event_buffer *bufp) +{ + bufp->head = bufp->tail = 0; + memset(&bufp->events, 0, sizeof(struct rte_event) * EVENT_BUFFER_SZ); +} + +static int +event_buffer_add(struct event_buffer *bufp, struct rte_event *eventp) +{ + uint16_t head_idx; + struct rte_event *buf_eventp; + + if (event_buffer_full(bufp)) + return -1; + + /* Instead of modulus, bitwise AND with mask to get head_idx. */ + head_idx = bufp->head & EVENT_BUFFER_MASK; + buf_eventp = &bufp->events[head_idx]; + rte_memcpy(buf_eventp, eventp, sizeof(struct rte_event)); + + /* Wrap automatically when overflow occurs. */ + bufp->head++; + + return 0; +} + +static void +event_buffer_flush(struct event_buffer *bufp, uint8_t dev_id, uint8_t port_id, + uint16_t *nb_events_flushed, + uint16_t *nb_events_inv) +{ + uint16_t head_idx, tail_idx, n = 0; + struct rte_event *events = bufp->events; + + /* Instead of modulus, bitwise AND with mask to get index. */ + head_idx = bufp->head & EVENT_BUFFER_MASK; + tail_idx = bufp->tail & EVENT_BUFFER_MASK; + + /* Determine the largest contigous run we can attempt to enqueue to the + * event device. + */ + if (head_idx > tail_idx) + n = head_idx - tail_idx; + else if (head_idx < tail_idx) + n = EVENT_BUFFER_SZ - tail_idx; + else { + *nb_events_flushed = 0; + return; + } + + *nb_events_inv = 0; + *nb_events_flushed = rte_event_enqueue_burst(dev_id, port_id, + &events[tail_idx], n); + if (*nb_events_flushed != n && rte_errno == -EINVAL) { + EVTIM_LOG_ERR("failed to enqueue invalid event - dropping it"); + (*nb_events_inv)++; + } + + bufp->tail = bufp->tail + *nb_events_flushed + *nb_events_inv; +} + +/* + * Software event timer adapter implementation + */ + +struct rte_event_timer_adapter_sw_data { + /* List of messages for outstanding timers */ + TAILQ_HEAD(, msg) msgs_tailq_head; + /* Lock to guard tailq and armed count */ + rte_spinlock_t msgs_tailq_sl; + /* Identifier of service executing timer management logic. */ + uint32_t service_id; + /* The cycle count at which the adapter should next tick */ + uint64_t next_tick_cycles; + /* Incremented as the service moves through phases of an iteration */ + volatile int service_phase; + /* The tick resolution used by adapter instance. May have been + * adjusted from what user requested + */ + uint64_t timer_tick_ns; + /* Maximum timeout in nanoseconds allowed by adapter instance. */ + uint64_t max_tmo_ns; + /* Ring containing messages to arm or cancel event timers */ + struct rte_ring *msg_ring; + /* Mempool containing msg objects */ + struct rte_mempool *msg_pool; + /* Buffered timer expiry events to be enqueued to an event device. */ + struct event_buffer buffer; + /* Statistics */ + struct rte_event_timer_adapter_stats stats; + /* The number of threads currently adding to the message ring */ + rte_atomic16_t message_producer_count; +}; + +enum msg_type {MSG_TYPE_ARM, MSG_TYPE_CANCEL}; + +struct msg { + enum msg_type type; + struct rte_event_timer *evtim; + struct rte_timer tim; + TAILQ_ENTRY(msg) msgs; +}; + +static void +sw_event_timer_cb(struct rte_timer *tim, void *arg) +{ + int ret; + uint16_t nb_evs_flushed = 0; + uint16_t nb_evs_invalid = 0; + uint64_t opaque; + struct rte_event_timer *evtim; + struct rte_event_timer_adapter *adapter; + struct rte_event_timer_adapter_sw_data *sw_data; + + evtim = arg; + opaque = evtim->impl_opaque[1]; + adapter = (struct rte_event_timer_adapter *)(uintptr_t)opaque; + sw_data = adapter->data->adapter_priv; + + ret = event_buffer_add(&sw_data->buffer, &evtim->ev); + if (ret < 0) { + /* If event buffer is full, put timer back in list with + * immediate expiry value, so that we process it again on the + * next iteration. + */ + rte_timer_reset_sync(tim, 0, SINGLE, rte_lcore_id(), + sw_event_timer_cb, evtim); + + sw_data->stats.evtim_retry_count++; + EVTIM_LOG_DBG("event buffer full, resetting rte_timer with " + "immediate expiry value"); + } else { + struct msg *m = container_of(tim, struct msg, tim); + TAILQ_REMOVE(&sw_data->msgs_tailq_head, m, msgs); + EVTIM_BUF_LOG_DBG("buffered an event timer expiry event"); + evtim->state = RTE_EVENT_TIMER_NOT_ARMED; + + /* Free the msg object containing the rte_timer now that + * we've buffered its event successfully. + */ + rte_mempool_put(sw_data->msg_pool, m); + + /* Bump the count when we successfully add an expiry event to + * the buffer. + */ + sw_data->stats.evtim_exp_count++; + } + + if (event_buffer_batch_ready(&sw_data->buffer)) { + event_buffer_flush(&sw_data->buffer, + adapter->data->event_dev_id, + adapter->data->event_port_id, + &nb_evs_flushed, + &nb_evs_invalid); + + sw_data->stats.ev_enq_count += nb_evs_flushed; + sw_data->stats.ev_inv_count += nb_evs_invalid; + } +} + +static __rte_always_inline uint64_t +get_timeout_cycles(struct rte_event_timer *evtim, + struct rte_event_timer_adapter *adapter) +{ + uint64_t timeout_ns; + struct rte_event_timer_adapter_sw_data *sw_data; + + sw_data = adapter->data->adapter_priv; + timeout_ns = evtim->timeout_ticks * sw_data->timer_tick_ns; + return timeout_ns * rte_get_timer_hz() / NSECPERSEC; + +} + +/* This function returns true if one or more (adapter) ticks have occurred since + * the last time it was called. + */ +static inline bool +adapter_did_tick(struct rte_event_timer_adapter *adapter) +{ + uint64_t cycles_per_adapter_tick, start_cycles; + uint64_t *next_tick_cyclesp; + struct rte_event_timer_adapter_sw_data *sw_data; + + sw_data = adapter->data->adapter_priv; + next_tick_cyclesp = &sw_data->next_tick_cycles; + + cycles_per_adapter_tick = sw_data->timer_tick_ns * + (rte_get_timer_hz() / NSECPERSEC); + + start_cycles = rte_get_timer_cycles(); + + /* Note: initially, *next_tick_cyclesp == 0, so the clause below will + * execute, and set things going. + */ + + if (start_cycles >= *next_tick_cyclesp) { + /* Snap the current cycle count to the preceding adapter tick + * boundary. + */ + start_cycles -= start_cycles % cycles_per_adapter_tick; + + *next_tick_cyclesp = start_cycles + cycles_per_adapter_tick; + + return true; + } + + return false; +} + +/* Check that event timer timeout value is in range */ +static __rte_always_inline int +check_timeout(struct rte_event_timer *evtim, + const struct rte_event_timer_adapter *adapter) +{ + uint64_t tmo_nsec; + struct rte_event_timer_adapter_sw_data *sw_data; + + sw_data = adapter->data->adapter_priv; + tmo_nsec = evtim->timeout_ticks * sw_data->timer_tick_ns; + + if (tmo_nsec > sw_data->max_tmo_ns) + return -1; + + if (tmo_nsec < sw_data->timer_tick_ns) + return -2; + + return 0; +} + +/* Check that event timer event queue sched type matches destination event queue + * sched type + */ +static __rte_always_inline int +check_destination_event_queue(struct rte_event_timer *evtim, + const struct rte_event_timer_adapter *adapter) +{ + int ret; + uint32_t sched_type; + + ret = rte_event_queue_attr_get(adapter->data->event_dev_id, + evtim->ev.queue_id, + RTE_EVENT_QUEUE_ATTR_SCHEDULE_TYPE, + &sched_type); + + if ((ret < 0 && ret != -EOVERFLOW) || + evtim->ev.sched_type != sched_type) + return -1; + + return 0; +} + +#define NB_OBJS 32 +static int +sw_event_timer_adapter_service_func(void *arg) +{ + int i, num_msgs; + uint64_t cycles, opaque; + uint16_t nb_evs_flushed = 0; + uint16_t nb_evs_invalid = 0; + struct rte_event_timer_adapter *adapter; + struct rte_event_timer_adapter_sw_data *sw_data; + struct rte_event_timer *evtim = NULL; + struct rte_timer *tim = NULL; + struct msg *msg, *msgs[NB_OBJS]; + + adapter = arg; + sw_data = adapter->data->adapter_priv; + + sw_data->service_phase = 1; + rte_smp_wmb(); + + while (rte_atomic16_read(&sw_data->message_producer_count) > 0 || + !rte_ring_empty(sw_data->msg_ring)) { + + num_msgs = rte_ring_dequeue_burst(sw_data->msg_ring, + (void **)msgs, NB_OBJS, NULL); + + for (i = 0; i < num_msgs; i++) { + int ret = 0; + + RTE_SET_USED(ret); + + msg = msgs[i]; + evtim = msg->evtim; + + switch (msg->type) { + case MSG_TYPE_ARM: + EVTIM_SVC_LOG_DBG("dequeued ARM message from " + "ring"); + tim = &msg->tim; + rte_timer_init(tim); + cycles = get_timeout_cycles(evtim, + adapter); + ret = rte_timer_reset(tim, cycles, SINGLE, + rte_lcore_id(), + sw_event_timer_cb, + evtim); + RTE_ASSERT(ret == 0); + + evtim->impl_opaque[0] = (uintptr_t)tim; + evtim->impl_opaque[1] = (uintptr_t)adapter; + + TAILQ_INSERT_TAIL(&sw_data->msgs_tailq_head, + msg, + msgs); + break; + case MSG_TYPE_CANCEL: + EVTIM_SVC_LOG_DBG("dequeued CANCEL message " + "from ring"); + opaque = evtim->impl_opaque[0]; + tim = (struct rte_timer *)(uintptr_t)opaque; + RTE_ASSERT(tim != NULL); + + ret = rte_timer_stop(tim); + RTE_ASSERT(ret == 0); + + /* Free the msg object for the original arm + * request. + */ + struct msg *m; + m = container_of(tim, struct msg, tim); + TAILQ_REMOVE(&sw_data->msgs_tailq_head, m, + msgs); + rte_mempool_put(sw_data->msg_pool, m); + + /* Free the msg object for the current msg */ + rte_mempool_put(sw_data->msg_pool, msg); + + evtim->impl_opaque[0] = 0; + evtim->impl_opaque[1] = 0; + + break; + } + } + } + + sw_data->service_phase = 2; + rte_smp_wmb(); + + if (adapter_did_tick(adapter)) { + rte_timer_manage(); + + event_buffer_flush(&sw_data->buffer, + adapter->data->event_dev_id, + adapter->data->event_port_id, + &nb_evs_flushed, &nb_evs_invalid); + + sw_data->stats.ev_enq_count += nb_evs_flushed; + sw_data->stats.ev_inv_count += nb_evs_invalid; + sw_data->stats.adapter_tick_count++; + } + + sw_data->service_phase = 0; + rte_smp_wmb(); + + return 0; +} + +/* The adapter initialization function rounds the mempool size up to the next + * power of 2, so we can take the difference between that value and what the + * user requested, and use the space for caches. This avoids a scenario where a + * user can't arm the number of timers the adapter was configured with because + * mempool objects have been lost to caches. + * + * nb_actual should always be a power of 2, so we can iterate over the powers + * of 2 to see what the largest cache size we can use is. + */ +static int +compute_msg_mempool_cache_size(uint64_t nb_requested, uint64_t nb_actual) +{ + int i; + int size; + int cache_size = 0; + + for (i = 0; ; i++) { + size = 1 << i; + + if (RTE_MAX_LCORE * size < (int)(nb_actual - nb_requested) && + size < RTE_MEMPOOL_CACHE_MAX_SIZE && + size <= nb_actual / 1.5) + cache_size = size; + else + break; + } + + return cache_size; +} + +#define SW_MIN_INTERVAL 1E5 + +static int +sw_event_timer_adapter_init(struct rte_event_timer_adapter *adapter) +{ + int ret; + struct rte_event_timer_adapter_sw_data *sw_data; + uint64_t nb_timers; + unsigned int flags; + struct rte_service_spec service; + static bool timer_subsystem_inited; // static initialized to false + + /* Allocate storage for SW implementation data */ + char priv_data_name[RTE_RING_NAMESIZE]; + snprintf(priv_data_name, RTE_RING_NAMESIZE, "sw_evtim_adap_priv_%"PRIu8, + adapter->data->id); + adapter->data->adapter_priv = rte_zmalloc_socket( + priv_data_name, + sizeof(struct rte_event_timer_adapter_sw_data), + RTE_CACHE_LINE_SIZE, + adapter->data->socket_id); + if (adapter->data->adapter_priv == NULL) { + EVTIM_LOG_ERR("failed to allocate space for private data"); + rte_errno = ENOMEM; + return -1; + } + + if (adapter->data->conf.timer_tick_ns < SW_MIN_INTERVAL) { + EVTIM_LOG_ERR("failed to create adapter with requested tick " + "interval"); + rte_errno = EINVAL; + return -1; + } + + sw_data = adapter->data->adapter_priv; + + sw_data->timer_tick_ns = adapter->data->conf.timer_tick_ns; + sw_data->max_tmo_ns = adapter->data->conf.max_tmo_ns; + + TAILQ_INIT(&sw_data->msgs_tailq_head); + rte_spinlock_init(&sw_data->msgs_tailq_sl); + rte_atomic16_init(&sw_data->message_producer_count); + + /* Rings require power of 2, so round up to next such value */ + nb_timers = rte_align64pow2(adapter->data->conf.nb_timers); + + char msg_ring_name[RTE_RING_NAMESIZE]; + snprintf(msg_ring_name, RTE_RING_NAMESIZE, + "sw_evtim_adap_msg_ring_%"PRIu8, adapter->data->id); + flags = adapter->data->conf.flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT ? + RING_F_SP_ENQ | RING_F_SC_DEQ : + RING_F_SC_DEQ; + sw_data->msg_ring = rte_ring_create(msg_ring_name, nb_timers, + adapter->data->socket_id, flags); + if (sw_data->msg_ring == NULL) { + EVTIM_LOG_ERR("failed to create message ring"); + rte_errno = ENOMEM; + goto free_priv_data; + } + + char pool_name[RTE_RING_NAMESIZE]; + snprintf(pool_name, RTE_RING_NAMESIZE, "sw_evtim_adap_msg_pool_%"PRIu8, + adapter->data->id); + + /* Both the arming/canceling thread and the service thread will do puts + * to the mempool, but if the SP_PUT flag is enabled, we can specify + * single-consumer get for the mempool. + */ + flags = adapter->data->conf.flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT ? + MEMPOOL_F_SC_GET : 0; + + /* The usable size of a ring is count - 1, so subtract one here to + * make the counts agree. + */ + int pool_size = nb_timers - 1; + int cache_size = compute_msg_mempool_cache_size( + adapter->data->conf.nb_timers, nb_timers); + sw_data->msg_pool = rte_mempool_create(pool_name, pool_size, + sizeof(struct msg), cache_size, + 0, NULL, NULL, NULL, NULL, + adapter->data->socket_id, flags); + if (sw_data->msg_pool == NULL) { + EVTIM_LOG_ERR("failed to create message object mempool"); + rte_errno = ENOMEM; + goto free_msg_ring; + } + + event_buffer_init(&sw_data->buffer); + + /* Register a service component to run adapter logic */ + memset(&service, 0, sizeof(service)); + snprintf(service.name, RTE_SERVICE_NAME_MAX, + "sw_evimer_adap_svc_%"PRIu8, adapter->data->id); + service.socket_id = adapter->data->socket_id; + service.callback = sw_event_timer_adapter_service_func; + service.callback_userdata = adapter; + service.capabilities &= ~(RTE_SERVICE_CAP_MT_SAFE); + ret = rte_service_component_register(&service, &sw_data->service_id); + if (ret < 0) { + EVTIM_LOG_ERR("failed to register service %s with id %"PRIu32 + ": err = %d", service.name, sw_data->service_id, + ret); + + rte_errno = ENOSPC; + goto free_msg_pool; + } + + EVTIM_LOG_DBG("registered service %s with id %"PRIu32, service.name, + sw_data->service_id); + + adapter->data->service_id = sw_data->service_id; + adapter->data->service_inited = 1; + + if (!timer_subsystem_inited) { + rte_timer_subsystem_init(); + timer_subsystem_inited = true; + } + + return 0; + +free_msg_pool: + rte_mempool_free(sw_data->msg_pool); +free_msg_ring: + rte_ring_free(sw_data->msg_ring); +free_priv_data: + rte_free(sw_data); + return -1; +} + +static int +sw_event_timer_adapter_uninit(struct rte_event_timer_adapter *adapter) +{ + int ret; + struct msg *m1, *m2; + struct rte_event_timer_adapter_sw_data *sw_data = + adapter->data->adapter_priv; + + rte_spinlock_lock(&sw_data->msgs_tailq_sl); + + /* Cancel outstanding rte_timers and free msg objects */ + m1 = TAILQ_FIRST(&sw_data->msgs_tailq_head); + while (m1 != NULL) { + EVTIM_LOG_DBG("freeing outstanding timer"); + m2 = TAILQ_NEXT(m1, msgs); + + rte_timer_stop_sync(&m1->tim); + rte_mempool_put(sw_data->msg_pool, m1); + + m1 = m2; + } + + rte_spinlock_unlock(&sw_data->msgs_tailq_sl); + + ret = rte_service_component_unregister(sw_data->service_id); + if (ret < 0) { + EVTIM_LOG_ERR("failed to unregister service component"); + return ret; + } + + rte_ring_free(sw_data->msg_ring); + rte_mempool_free(sw_data->msg_pool); + rte_free(adapter->data->adapter_priv); + + return 0; +} + +static inline int32_t +get_mapped_count_for_service(uint32_t service_id) +{ + int32_t core_count, i, mapped_count = 0; + uint32_t lcore_arr[RTE_MAX_LCORE]; + + core_count = rte_service_lcore_list(lcore_arr, RTE_MAX_LCORE); + + for (i = 0; i < core_count; i++) + if (rte_service_map_lcore_get(service_id, lcore_arr[i]) == 1) + mapped_count++; + + return mapped_count; +} + +static int +sw_event_timer_adapter_start(const struct rte_event_timer_adapter *adapter) +{ + int mapped_count; + struct rte_event_timer_adapter_sw_data *sw_data; + + sw_data = adapter->data->adapter_priv; + + /* Mapping the service to more than one service core can introduce + * delays while one thread is waiting to acquire a lock, so only allow + * one core to be mapped to the service. + */ + mapped_count = get_mapped_count_for_service(sw_data->service_id); + + if (mapped_count == 1) + return rte_service_component_runstate_set(sw_data->service_id, + 1); + + return mapped_count < 1 ? -ENOENT : -ENOTSUP; +} + +static int +sw_event_timer_adapter_stop(const struct rte_event_timer_adapter *adapter) +{ + int ret; + struct rte_event_timer_adapter_sw_data *sw_data = + adapter->data->adapter_priv; + + ret = rte_service_component_runstate_set(sw_data->service_id, 0); + if (ret < 0) + return ret; + + /* Wait for the service to complete its final iteration before + * stopping. + */ + while (sw_data->service_phase != 0) + rte_pause(); + + rte_smp_rmb(); + + return 0; +} + +static void +sw_event_timer_adapter_get_info(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_info *adapter_info) +{ + struct rte_event_timer_adapter_sw_data *sw_data; + sw_data = adapter->data->adapter_priv; + + adapter_info->min_resolution_ns = sw_data->timer_tick_ns; + adapter_info->max_tmo_ns = sw_data->max_tmo_ns; +} + +static int +sw_event_timer_adapter_stats_get(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_stats *stats) +{ + struct rte_event_timer_adapter_sw_data *sw_data; + sw_data = adapter->data->adapter_priv; + *stats = sw_data->stats; + return 0; +} + +static int +sw_event_timer_adapter_stats_reset( + const struct rte_event_timer_adapter *adapter) +{ + struct rte_event_timer_adapter_sw_data *sw_data; + sw_data = adapter->data->adapter_priv; + memset(&sw_data->stats, 0, sizeof(sw_data->stats)); + return 0; +} + +static __rte_always_inline uint16_t +__sw_event_timer_arm_burst(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **evtims, + uint16_t nb_evtims) +{ + uint16_t i; + int ret; + struct rte_event_timer_adapter_sw_data *sw_data; + struct msg *msgs[nb_evtims]; + +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG + /* Check that the service is running. */ + if (rte_service_runstate_get(adapter->data->service_id) != 1) { + rte_errno = EINVAL; + return 0; + } +#endif + + sw_data = adapter->data->adapter_priv; + + ret = rte_mempool_get_bulk(sw_data->msg_pool, (void **)msgs, nb_evtims); + if (ret < 0) { + rte_errno = ENOSPC; + return 0; + } + + /* Let the service know we're producing messages for it to process */ + rte_atomic16_inc(&sw_data->message_producer_count); + + /* If the service is managing timers, wait for it to finish */ + while (sw_data->service_phase == 2) + rte_pause(); + + rte_smp_rmb(); + + for (i = 0; i < nb_evtims; i++) { + /* Don't modify the event timer state in these cases */ + if (evtims[i]->state == RTE_EVENT_TIMER_ARMED) { + rte_errno = EALREADY; + break; + } else if (!(evtims[i]->state == RTE_EVENT_TIMER_NOT_ARMED || + evtims[i]->state == RTE_EVENT_TIMER_CANCELED)) { + rte_errno = EINVAL; + break; + } + + ret = check_timeout(evtims[i], adapter); + if (ret == -1) { + evtims[i]->state = RTE_EVENT_TIMER_ERROR_TOOLATE; + rte_errno = EINVAL; + break; + } + if (ret == -2) { + evtims[i]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY; + rte_errno = EINVAL; + break; + } + + if (check_destination_event_queue(evtims[i], adapter) < 0) { + evtims[i]->state = RTE_EVENT_TIMER_ERROR; + rte_errno = EINVAL; + break; + } + + /* Checks passed, set up a message to enqueue */ + msgs[i]->type = MSG_TYPE_ARM; + msgs[i]->evtim = evtims[i]; + + /* Set the payload pointer if not set. */ + if (evtims[i]->ev.event_ptr == NULL) + evtims[i]->ev.event_ptr = evtims[i]; + + /* msg objects that get enqueued successfully will be freed + * either by a future cancel operation or by the timer + * expiration callback. + */ + if (rte_ring_enqueue(sw_data->msg_ring, msgs[i]) < 0) { + rte_errno = ENOSPC; + break; + } + + EVTIM_LOG_DBG("enqueued ARM message to ring"); + + evtims[i]->state = RTE_EVENT_TIMER_ARMED; + } + + /* Let the service know we're done producing messages */ + rte_atomic16_dec(&sw_data->message_producer_count); + + if (i < nb_evtims) + rte_mempool_put_bulk(sw_data->msg_pool, (void **)&msgs[i], + nb_evtims - i); + + return i; +} + +static uint16_t +sw_event_timer_arm_burst(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **evtims, + uint16_t nb_evtims) +{ + return __sw_event_timer_arm_burst(adapter, evtims, nb_evtims); +} + +static uint16_t +sw_event_timer_cancel_burst(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **evtims, + uint16_t nb_evtims) +{ + uint16_t i; + int ret; + struct rte_event_timer_adapter_sw_data *sw_data; + struct msg *msgs[nb_evtims]; + +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG + /* Check that the service is running. */ + if (rte_service_runstate_get(adapter->data->service_id) != 1) { + rte_errno = EINVAL; + return 0; + } +#endif + + sw_data = adapter->data->adapter_priv; + + ret = rte_mempool_get_bulk(sw_data->msg_pool, (void **)msgs, nb_evtims); + if (ret < 0) { + rte_errno = ENOSPC; + return 0; + } + + /* Let the service know we're producing messages for it to process */ + rte_atomic16_inc(&sw_data->message_producer_count); + + /* If the service could be modifying event timer states, wait */ + while (sw_data->service_phase == 2) + rte_pause(); + + rte_smp_rmb(); + + for (i = 0; i < nb_evtims; i++) { + /* Don't modify the event timer state in these cases */ + if (evtims[i]->state == RTE_EVENT_TIMER_CANCELED) { + rte_errno = EALREADY; + break; + } else if (evtims[i]->state != RTE_EVENT_TIMER_ARMED) { + rte_errno = EINVAL; + break; + } + + msgs[i]->type = MSG_TYPE_CANCEL; + msgs[i]->evtim = evtims[i]; + + if (rte_ring_enqueue(sw_data->msg_ring, msgs[i]) < 0) { + rte_errno = ENOSPC; + break; + } + + EVTIM_LOG_DBG("enqueued CANCEL message to ring"); + + evtims[i]->state = RTE_EVENT_TIMER_CANCELED; + } + + /* Let the service know we're done producing messages */ + rte_atomic16_dec(&sw_data->message_producer_count); + + if (i < nb_evtims) + rte_mempool_put_bulk(sw_data->msg_pool, (void **)&msgs[i], + nb_evtims - i); + + return i; +} + +static uint16_t +sw_event_timer_arm_tmo_tick_burst(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **evtims, + uint64_t timeout_ticks, + uint16_t nb_evtims) +{ + int i; + + for (i = 0; i < nb_evtims; i++) + evtims[i]->timeout_ticks = timeout_ticks; + + return __sw_event_timer_arm_burst(adapter, evtims, nb_evtims); +} + +static const struct rte_event_timer_adapter_ops sw_event_adapter_timer_ops = { + .init = sw_event_timer_adapter_init, + .uninit = sw_event_timer_adapter_uninit, + .start = sw_event_timer_adapter_start, + .stop = sw_event_timer_adapter_stop, + .get_info = sw_event_timer_adapter_get_info, + .stats_get = sw_event_timer_adapter_stats_get, + .stats_reset = sw_event_timer_adapter_stats_reset, + .arm_burst = sw_event_timer_arm_burst, + .arm_tmo_tick_burst = sw_event_timer_arm_tmo_tick_burst, + .cancel_burst = sw_event_timer_cancel_burst, +}; + +RTE_INIT(event_timer_adapter_init_log) +{ + evtim_logtype = rte_log_register("lib.eventdev.adapter.timer"); + if (evtim_logtype >= 0) + rte_log_set_level(evtim_logtype, RTE_LOG_NOTICE); + + evtim_buffer_logtype = rte_log_register("lib.eventdev.adapter.timer." + "buffer"); + if (evtim_buffer_logtype >= 0) + rte_log_set_level(evtim_buffer_logtype, RTE_LOG_NOTICE); + + evtim_svc_logtype = rte_log_register("lib.eventdev.adapter.timer.svc"); + if (evtim_svc_logtype >= 0) + rte_log_set_level(evtim_svc_logtype, RTE_LOG_NOTICE); +} diff --git a/lib/librte_eventdev/rte_event_timer_adapter.h b/lib/librte_eventdev/rte_event_timer_adapter.h new file mode 100644 index 00000000..d4ea6f17 --- /dev/null +++ b/lib/librte_eventdev/rte_event_timer_adapter.h @@ -0,0 +1,766 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc. + * Copyright(c) 2017-2018 Intel Corporation. + * All rights reserved. + */ + +#ifndef __RTE_EVENT_TIMER_ADAPTER_H__ +#define __RTE_EVENT_TIMER_ADAPTER_H__ + +/** + * @file + * + * RTE Event Timer Adapter + * + * An event timer adapter has the following abstract working model: + * + * timer_tick_ns + * + + * +-------+ | + * | | | + * +-------+ bkt 0 +----v---+ + * | | | | + * | +-------+ | + * +---+---+ +---+---+ +---+---+---+---+ + * | | | | | | | | | + * | bkt n | | bkt 1 |<-> t0| t1| t2| tn| + * | | | | | | | | | + * +---+---+ +---+---+ +---+---+---+---+ + * | Timer adapter | + * +---+---+ +---+---+ + * | | | | + * | bkt 4 | | bkt 2 |<--- Current bucket + * | | | | + * +---+---+ +---+---+ + * | +-------+ | + * | | | | + * +------+ bkt 3 +-------+ + * | | + * +-------+ + * + * - It has a virtual monotonically increasing 64-bit timer adapter clock based + * on *enum rte_event_timer_adapter_clk_src* clock source. The clock source + * could be a CPU clock, or a platform dependent external clock. + * + * - The application creates a timer adapter instance with given the clock + * source, the total number of event timers, and a resolution(expressed in ns) + * to traverse between the buckets. + * + * - Each timer adapter may have 0 to n buckets based on the configured + * max timeout(max_tmo_ns) and resolution(timer_tick_ns). Upon starting the + * timer adapter, the adapter starts ticking at *timer_tick_ns* resolution. + * + * - The application arms an event timer that will expire *timer_tick_ns* + * from now. + * + * - The application can cancel an armed timer and no timer expiry event will be + * generated. + * + * - If a timer expires then the library injects the timer expiry event in + * the designated event queue. + * + * - The timer expiry event will be received through *rte_event_dequeue_burst*. + * + * - The application frees the timer adapter instance. + * + * Multiple timer adapters can be created with a varying level of resolution + * for various expiry use cases that run in parallel. + * + * Before using the timer adapter, the application has to create and configure + * an event device along with the event port. Based on the event device + * capability it might require creating an additional event port to be used + * by the timer adapter. + * + * The application creates the event timer adapter using the + * ``rte_event_timer_adapter_create()``. The event device id is passed to this + * function, inside this function the event device capability is checked, + * and if an in-built port is absent the application uses the default + * function to create a new producer port. + * + * The application may also use the function + * ``rte_event_timer_adapter_create_ext()`` to have granular control over + * producer port creation in a case where the in-built port is absent. + * + * After creating the timer adapter, the application has to start it + * using ``rte_event_timer_adapter_start()``. The buckets are traversed from + * 0 to n; when the adapter ticks, the next bucket is visited. Each time, + * the list per bucket is processed, and timer expiry events are sent to the + * designated event queue. + * + * The application can arm one or more event timers using the + * ``rte_event_timer_arm_burst()``. The *timeout_ticks* represents the number + * of *timer_tick_ns* after which the timer has to expire. The timeout at + * which the timers expire can be grouped or be independent of each + * event timer instance. ``rte_event_timer_arm_tmo_tick_burst()`` addresses the + * former case and ``rte_event_timer_arm_burst()`` addresses the latter case. + * + * The application can cancel the timers from expiring using the + * ``rte_event_timer_cancel_burst()``. + * + * On the secondary process, ``rte_event_timer_adapter_lookup()`` can be used + * to get the timer adapter pointer from its id and use it to invoke fastpath + * operations such as arm and cancel. + * + * Some of the use cases of event timer adapter are Beacon Timers, + * Generic SW Timeout, Wireless MAC Scheduling, 3G Frame Protocols, + * Packet Scheduling, Protocol Retransmission Timers, Supervision Timers. + * All these use cases require high resolution and low time drift. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "rte_eventdev.h" + +/** + * @warning + * @b EXPERIMENTAL: this enum may change without prior notice + * + * Timer adapter clock source + */ +enum rte_event_timer_adapter_clk_src { + RTE_EVENT_TIMER_ADAPTER_CPU_CLK, + /**< Use CPU clock as the clock source. */ + RTE_EVENT_TIMER_ADAPTER_EXT_CLK0, + /**< Platform dependent external clock source 0. */ + RTE_EVENT_TIMER_ADAPTER_EXT_CLK1, + /**< Platform dependent external clock source 1. */ + RTE_EVENT_TIMER_ADAPTER_EXT_CLK2, + /**< Platform dependent external clock source 2. */ + RTE_EVENT_TIMER_ADAPTER_EXT_CLK3, + /**< Platform dependent external clock source 3. */ +}; + +#define RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES (1ULL << 0) +/**< The event timer adapter implementation may have constraints on the + * resolution (timer_tick_ns) and maximum timer expiry timeout(max_tmo_ns) + * based on the given timer adapter or system. If this flag is set, the + * implementation adjusts the resolution and maximum timeout to the best + * possible configuration. On successful timer adapter creation, the + * application can get the configured resolution and max timeout with + * ``rte_event_timer_adapter_get_info()``. + * + * @see struct rte_event_timer_adapter_info::min_resolution_ns + * @see struct rte_event_timer_adapter_info::max_tmo_ns + */ +#define RTE_EVENT_TIMER_ADAPTER_F_SP_PUT (1ULL << 1) +/**< ``rte_event_timer_arm_burst()`` API to be used in single producer mode. + * + * @see struct rte_event_timer_adapter_conf::flags + */ + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * Timer adapter configuration structure + */ +struct rte_event_timer_adapter_conf { + uint8_t event_dev_id; + /**< Event device identifier */ + uint16_t timer_adapter_id; + /**< Event timer adapter identifier */ + uint32_t socket_id; + /**< Identifier of socket from which to allocate memory for adapter */ + enum rte_event_timer_adapter_clk_src clk_src; + /**< Clock source for timer adapter */ + uint64_t timer_tick_ns; + /**< Timer adapter resolution in ns */ + uint64_t max_tmo_ns; + /**< Maximum timer timeout(expiry) in ns */ + uint64_t nb_timers; + /**< Total number of timers per adapter */ + uint64_t flags; + /**< Timer adapter config flags (RTE_EVENT_TIMER_ADAPTER_F_*) */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * Event timer adapter stats structure + */ +struct rte_event_timer_adapter_stats { + uint64_t evtim_exp_count; + /**< Number of event timers that have expired. */ + uint64_t ev_enq_count; + /**< Eventdev enqueue count */ + uint64_t ev_inv_count; + /**< Invalid expiry event count */ + uint64_t evtim_retry_count; + /**< Event timer retry count */ + uint64_t adapter_tick_count; + /**< Tick count for the adapter, at its resolution */ +}; + +struct rte_event_timer_adapter; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Callback function type for producer port creation. + */ +typedef int (*rte_event_timer_adapter_port_conf_cb_t)(uint16_t id, + uint8_t event_dev_id, + uint8_t *event_port_id, + void *conf_arg); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Create an event timer adapter. + * + * This function must be invoked first before any other function in the API. + * + * @param conf + * The event timer adapter configuration structure. + * + * @return + * A pointer to the new allocated event timer adapter on success. + * NULL on error with rte_errno set appropriately. + * Possible rte_errno values include: + * - ERANGE: timer_tick_ns is not in supported range. + * - ENOMEM: unable to allocate sufficient memory for adapter instances + * - EINVAL: invalid event device identifier specified in config + * - ENOSPC: maximum number of adapters already created + * - EIO: event device reconfiguration and restart error. The adapter + * reconfigures the event device with an additional port by default if it is + * required to use a service to manage timers. If the device had been started + * before this call, this error code indicates an error in restart following + * an error in reconfiguration, i.e., a combination of the two error codes. + */ +struct rte_event_timer_adapter * __rte_experimental +rte_event_timer_adapter_create(const struct rte_event_timer_adapter_conf *conf); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Create a timer adapter with the supplied callback. + * + * This function can be used to have a more granular control over the timer + * adapter creation. If a built-in port is absent, then the function uses the + * callback provided to create and get the port id to be used as a producer + * port. + * + * @param conf + * The timer adapter configuration structure + * @param conf_cb + * The port config callback function. + * @param conf_arg + * Opaque pointer to the argument for the callback function + * + * @return + * A pointer to the new allocated event timer adapter on success. + * NULL on error with rte_errno set appropriately. + * Possible rte_errno values include: + * - ERANGE: timer_tick_ns is not in supported range. + * - ENOMEM: unable to allocate sufficient memory for adapter instances + * - EINVAL: invalid event device identifier specified in config + * - ENOSPC: maximum number of adapters already created + */ +struct rte_event_timer_adapter * __rte_experimental +rte_event_timer_adapter_create_ext( + const struct rte_event_timer_adapter_conf *conf, + rte_event_timer_adapter_port_conf_cb_t conf_cb, + void *conf_arg); + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * Timer adapter info structure. + */ +struct rte_event_timer_adapter_info { + uint64_t min_resolution_ns; + /**< Minimum timer adapter resolution in ns */ + uint64_t max_tmo_ns; + /**< Maximum timer timeout(expire) in ns */ + struct rte_event_timer_adapter_conf conf; + /**< Configured timer adapter attributes */ + uint32_t caps; + /**< Event timer adapter capabilities */ + int16_t event_dev_port_id; + /**< Event device port ID, if applicable */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve the contextual information of an event timer adapter. + * + * @param adapter + * A pointer to the event timer adapter structure. + * + * @param[out] adapter_info + * A pointer to a structure of type *rte_event_timer_adapter_info* to be + * filled with the contextual information of the adapter. + * + * @return + * - 0: Success, driver updates the contextual information of the + * timer adapter + * - <0: Error code returned by the driver info get function. + * - -EINVAL: adapter identifier invalid + * + * @see RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES, + * struct rte_event_timer_adapter_info + * + */ +int __rte_experimental +rte_event_timer_adapter_get_info( + const struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_info *adapter_info); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Start a timer adapter. + * + * The adapter start step is the last one and consists of setting the timer + * adapter to start accepting the timers and schedules to event queues. + * + * On success, all basic functions exported by the API (timer arm, + * timer cancel and so on) can be invoked. + * + * @param adapter + * A pointer to the event timer adapter structure. + * + * @return + * - 0: Success, adapter started. + * - <0: Error code returned by the driver start function. + * - -EINVAL if adapter identifier invalid + * - -ENOENT if software adapter but no service core mapped + * - -ENOTSUP if software adapter and more than one service core mapped + */ +int __rte_experimental +rte_event_timer_adapter_start( + const struct rte_event_timer_adapter *adapter); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Stop an event timer adapter. + * + * The adapter can be restarted with a call to + * ``rte_event_timer_adapter_start()``. + * + * @param adapter + * A pointer to the event timer adapter structure. + * + * @return + * - 0: Success, adapter stopped. + * - <0: Error code returned by the driver stop function. + * - -EINVAL if adapter identifier invalid + */ +int __rte_experimental +rte_event_timer_adapter_stop(const struct rte_event_timer_adapter *adapter); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Lookup an event timer adapter using its identifier. + * + * If an event timer adapter was created in another process with the same + * identifier, this function will locate its state and set up access to it + * so that it can be used in this process. + * + * @param adapter_id + * The event timer adapter identifier. + * + * @return + * A pointer to the event timer adapter matching the identifier on success. + * NULL on error with rte_errno set appropriately. + * Possible rte_errno values include: + * - ENOENT - requested entry not available to return. + */ +struct rte_event_timer_adapter * __rte_experimental +rte_event_timer_adapter_lookup(uint16_t adapter_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Free an event timer adapter. + * + * Destroy an event timer adapter, freeing all resources. + * + * Before invoking this function, the application must wait for all the + * armed timers to expire or cancel the outstanding armed timers. + * + * @param adapter + * A pointer to an event timer adapter structure. + * + * @return + * - 0: Successfully freed the event timer adapter resources. + * - <0: Failed to free the event timer adapter resources. + * - -EAGAIN: adapter is busy; timers outstanding + * - -EBUSY: stop hasn't been called for this adapter yet + * - -EINVAL: adapter id invalid, or adapter invalid + */ +int __rte_experimental +rte_event_timer_adapter_free(struct rte_event_timer_adapter *adapter); + +/** + * Retrieve the service ID of the event timer adapter. If the adapter doesn't + * use an rte_service function, this function returns -ESRCH. + * + * @param adapter + * A pointer to an event timer adapter. + * + * @param [out] service_id + * A pointer to a uint32_t, to be filled in with the service id. + * + * @return + * - 0: Success + * - <0: Error code on failure + * - -ESRCH: the adapter does not require a service to operate + */ +int __rte_experimental +rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter, + uint32_t *service_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve statistics for an event timer adapter instance. + * + * @param adapter + * A pointer to an event timer adapter structure. + * @param[out] stats + * A pointer to a structure to fill with statistics. + * + * @return + * - 0: Successfully retrieved. + * - <0: Failure; error code returned. + */ +int __rte_experimental +rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_stats *stats); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reset statistics for an event timer adapter instance. + * + * @param adapter + * A pointer to an event timer adapter structure. + * + * @return + * - 0: Successfully reset; + * - <0: Failure; error code returned. + */ +int __rte_experimental rte_event_timer_adapter_stats_reset( + struct rte_event_timer_adapter *adapter); + +/** + * Retrieve the service ID of the event timer adapter. If the adapter doesn't + * use an rte_service function, this function returns -ESRCH. + * + * @param adapter + * A pointer to an event timer adapter. + * + * @param [out] service_id + * A pointer to a uint32_t, to be filled in with the service id. + * + * @return + * - 0: Success + * - <0: Error code on failure, if the event dev doesn't use a rte_service + * function, this function returns -ESRCH. + */ +int +rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter, + uint32_t *service_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve statistics for an event timer adapter instance. + * + * @param adapter + * A pointer to an event timer adapter structure. + * @param[out] stats + * A pointer to a structure to fill with statistics. + * + * @return + * - 0: Successfully retrieved. + * - <0: Failure; error code returned. + */ +int rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_stats *stats); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reset statistics for an event timer adapter instance. + * + * @param adapter + * A pointer to an event timer adapter structure. + * + * @return + * - 0: Successfully reset; + * - <0: Failure; error code returned. + */ +int rte_event_timer_adapter_stats_reset( + struct rte_event_timer_adapter *adapter); + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * Event timer state. + */ +enum rte_event_timer_state { + RTE_EVENT_TIMER_NOT_ARMED = 0, + /**< Event timer not armed. */ + RTE_EVENT_TIMER_ARMED = 1, + /**< Event timer successfully armed. */ + RTE_EVENT_TIMER_CANCELED = 2, + /**< Event timer successfully canceled. */ + RTE_EVENT_TIMER_ERROR = -1, + /**< Generic event timer error. */ + RTE_EVENT_TIMER_ERROR_TOOEARLY = -2, + /**< Event timer timeout tick value is too small for the adapter to + * handle, given its configured resolution. + */ + RTE_EVENT_TIMER_ERROR_TOOLATE = -3, + /**< Event timer timeout tick is greater than the maximum timeout.*/ +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * The generic *rte_event_timer* structure to hold the event timer attributes + * for arm and cancel operations. + */ +RTE_STD_C11 +struct rte_event_timer { + struct rte_event ev; + /**< + * Expiry event attributes. On successful event timer timeout, + * the following attributes will be used to inject the expiry event to + * the eventdev: + * - event_queue_id: Targeted event queue id for expiry events. + * - event_priority: Event priority of the event expiry event in the + * event queue relative to other events. + * - sched_type: Scheduling type of the expiry event. + * - flow_id: Flow id of the expiry event. + * - op: RTE_EVENT_OP_NEW + * - event_type: RTE_EVENT_TYPE_TIMER + */ + volatile enum rte_event_timer_state state; + /**< State of the event timer. */ + uint64_t timeout_ticks; + /**< Expiry timer ticks expressed in number of *timer_ticks_ns* from + * now. + * @see struct rte_event_timer_adapter_info::adapter_conf::timer_tick_ns + */ + uint64_t impl_opaque[2]; + /**< Implementation-specific opaque data. + * An event timer adapter implementation use this field to hold + * implementation specific values to share between the arm and cancel + * operations. The application should not modify this field. + */ + uint8_t user_meta[0]; + /**< Memory to store user specific metadata. + * The event timer adapter implementation should not modify this area. + */ +} __rte_cache_aligned; + +typedef uint16_t (*rte_event_timer_arm_burst_t)( + const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **tims, + uint16_t nb_tims); +/**< @internal Enable event timers to enqueue timer events upon expiry */ +typedef uint16_t (*rte_event_timer_arm_tmo_tick_burst_t)( + const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **tims, + uint64_t timeout_tick, + uint16_t nb_tims); +/**< @internal Enable event timers with common expiration time */ +typedef uint16_t (*rte_event_timer_cancel_burst_t)( + const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **tims, + uint16_t nb_tims); +/**< @internal Prevent event timers from enqueuing timer events */ + +/** + * @internal Data structure associated with each event timer adapter. + */ +struct rte_event_timer_adapter { + rte_event_timer_arm_burst_t arm_burst; + /**< Pointer to driver arm_burst function. */ + rte_event_timer_arm_tmo_tick_burst_t arm_tmo_tick_burst; + /**< Pointer to driver arm_tmo_tick_burst function. */ + rte_event_timer_cancel_burst_t cancel_burst; + /**< Pointer to driver cancel function. */ + struct rte_event_timer_adapter_data *data; + /**< Pointer to shared adapter data */ + const struct rte_event_timer_adapter_ops *ops; + /**< Functions exported by adapter driver */ + + RTE_STD_C11 + uint8_t allocated : 1; + /**< Flag to indicate that this adapter has been allocated */ +} __rte_cache_aligned; + +#define ADAPTER_VALID_OR_ERR_RET(adapter, retval) do { \ + if (adapter == NULL || !adapter->allocated) \ + return retval; \ +} while (0) + +#define FUNC_PTR_OR_ERR_RET(func, errval) do { \ + if ((func) == NULL) \ + return errval; \ +} while (0) + +#define FUNC_PTR_OR_NULL_RET_WITH_ERRNO(func, errval) do { \ + if ((func) == NULL) { \ + rte_errno = errval; \ + return NULL; \ + } \ +} while (0) + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Arm a burst of event timers with separate expiration timeout tick for each + * event timer. + * + * Before calling this function, the application allocates + * ``struct rte_event_timer`` objects from mempool or huge page backed + * application buffers of desired size. On successful allocation, + * application updates the `struct rte_event_timer`` attributes such as + * expiry event attributes, timeout ticks from now. + * This function submits the event timer arm requests to the event timer adapter + * and on expiry, the events will be injected to designated event queue. + * + * @param adapter + * A pointer to an event timer adapter structure. + * @param evtims + * Pointer to an array of objects of type *rte_event_timer* structure. + * @param nb_evtims + * Number of event timers in the supplied array. + * + * @return + * The number of successfully armed event timers. The return value can be less + * than the value of the *nb_evtims* parameter. If the return value is less + * than *nb_evtims*, the remaining event timers at the end of *evtims* + * are not consumed, and the caller has to take care of them, and rte_errno + * is set accordingly. Possible errno values include: + * - EINVAL Invalid timer adapter, expiry event queue ID is invalid, or an + * expiry event's sched type doesn't match the capabilities of the + * destination event queue. + * - EAGAIN Specified timer adapter is not running + * - EALREADY A timer was encountered that was already armed + */ +static inline uint16_t __rte_experimental +rte_event_timer_arm_burst(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **evtims, + uint16_t nb_evtims) +{ +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + FUNC_PTR_OR_ERR_RET(adapter->arm_burst, -EINVAL); +#endif + return adapter->arm_burst(adapter, evtims, nb_evtims); +} + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Arm a burst of event timers with same expiration timeout tick. + * + * Provides the same functionality as ``rte_event_timer_arm_burst()``, except + * that application can use this API when all the event timers have the + * same timeout expiration tick. This specialized function can provide the + * additional hint to the adapter implementation and optimize if possible. + * + * @param adapter + * A pointer to an event timer adapter structure. + * @param evtims + * Points to an array of objects of type *rte_event_timer* structure. + * @param timeout_ticks + * The number of ticks in which the timers should expire. + * @param nb_evtims + * Number of event timers in the supplied array. + * + * @return + * The number of successfully armed event timers. The return value can be less + * than the value of the *nb_evtims* parameter. If the return value is less + * than *nb_evtims*, the remaining event timers at the end of *evtims* + * are not consumed, and the caller has to take care of them, and rte_errno + * is set accordingly. Possible errno values include: + * - EINVAL Invalid timer adapter, expiry event queue ID is invalid, or an + * expiry event's sched type doesn't match the capabilities of the + * destination event queue. + * - EAGAIN Specified event timer adapter is not running + * - EALREADY A timer was encountered that was already armed + */ +static inline uint16_t __rte_experimental +rte_event_timer_arm_tmo_tick_burst( + const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **evtims, + const uint64_t timeout_ticks, + const uint16_t nb_evtims) +{ +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + FUNC_PTR_OR_ERR_RET(adapter->arm_tmo_tick_burst, -EINVAL); +#endif + return adapter->arm_tmo_tick_burst(adapter, evtims, timeout_ticks, + nb_evtims); +} + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Cancel a burst of event timers from being scheduled to the event device. + * + * @param adapter + * A pointer to an event timer adapter structure. + * @param evtims + * Points to an array of objects of type *rte_event_timer* structure + * @param nb_evtims + * Number of event timer instances in the supplied array. + * + * @return + * The number of successfully canceled event timers. The return value can be + * less than the value of the *nb_evtims* parameter. If the return value is + * less than *nb_evtims*, the remaining event timers at the end of *evtims* + * are not consumed, and the caller has to take care of them, and rte_errno + * is set accordingly. Possible errno values include: + * - EINVAL Invalid timer adapter identifier + * - EAGAIN Specified timer adapter is not running + * - EALREADY A timer was encountered that was already canceled + */ +static inline uint16_t __rte_experimental +rte_event_timer_cancel_burst(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer **evtims, + uint16_t nb_evtims) +{ +#ifdef RTE_LIBRTE_EVENTDEV_DEBUG + ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL); + FUNC_PTR_OR_ERR_RET(adapter->cancel_burst, -EINVAL); +#endif + return adapter->cancel_burst(adapter, evtims, nb_evtims); +} + +#endif /* __RTE_EVENT_TIMER_ADAPTER_H__ */ diff --git a/lib/librte_eventdev/rte_event_timer_adapter_pmd.h b/lib/librte_eventdev/rte_event_timer_adapter_pmd.h new file mode 100644 index 00000000..cf3509dc --- /dev/null +++ b/lib/librte_eventdev/rte_event_timer_adapter_pmd.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation. + * All rights reserved. + */ + +#ifndef __RTE_EVENT_TIMER_ADAPTER_PMD_H__ +#define __RTE_EVENT_TIMER_ADAPTER_PMD_H__ + +/** + * @file + * RTE Event Timer Adapter API (PMD Side) + * + * @note + * This file provides implementation helpers for internal use by PMDs. They + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "rte_event_timer_adapter.h" + +/* + * Definitions of functions exported by an event timer adapter implementation + * through *rte_event_timer_adapter_ops* structure supplied in the + * *rte_event_timer_adapter* structure associated with an event timer adapter. + */ + +typedef int (*rte_event_timer_adapter_init_t)( + struct rte_event_timer_adapter *adapter); +/**< @internal Event timer adapter implementation setup */ +typedef int (*rte_event_timer_adapter_uninit_t)( + struct rte_event_timer_adapter *adapter); +/**< @internal Event timer adapter implementation teardown */ +typedef int (*rte_event_timer_adapter_start_t)( + const struct rte_event_timer_adapter *adapter); +/**< @internal Start running event timer adapter */ +typedef int (*rte_event_timer_adapter_stop_t)( + const struct rte_event_timer_adapter *adapter); +/**< @internal Stop running event timer adapter */ +typedef void (*rte_event_timer_adapter_get_info_t)( + const struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_info *adapter_info); +/**< @internal Get contextual information for event timer adapter */ +typedef int (*rte_event_timer_adapter_stats_get_t)( + const struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_stats *stats); +/**< @internal Get statistics for event timer adapter */ +typedef int (*rte_event_timer_adapter_stats_reset_t)( + const struct rte_event_timer_adapter *adapter); +/**< @internal Reset statistics for event timer adapter */ + +/** + * @internal Structure containing the functions exported by an event timer + * adapter implementation. + */ +struct rte_event_timer_adapter_ops { + rte_event_timer_adapter_init_t init; /**< Set up adapter */ + rte_event_timer_adapter_uninit_t uninit;/**< Tear down adapter */ + rte_event_timer_adapter_start_t start; /**< Start adapter */ + rte_event_timer_adapter_stop_t stop; /**< Stop adapter */ + rte_event_timer_adapter_get_info_t get_info; + /**< Get info from driver */ + rte_event_timer_adapter_stats_get_t stats_get; + /**< Get adapter statistics */ + rte_event_timer_adapter_stats_reset_t stats_reset; + /**< Reset adapter statistics */ + rte_event_timer_arm_burst_t arm_burst; + /**< Arm one or more event timers */ + rte_event_timer_arm_tmo_tick_burst_t arm_tmo_tick_burst; + /**< Arm event timers with same expiration time */ + rte_event_timer_cancel_burst_t cancel_burst; + /**< Cancel one or more event timers */ +}; + +/** + * @internal Adapter data; structure to be placed in shared memory to be + * accessible by various processes in a multi-process configuration. + */ +struct rte_event_timer_adapter_data { + uint8_t id; + /**< Event timer adapter ID */ + uint8_t event_dev_id; + /**< Event device ID */ + uint32_t socket_id; + /**< Socket ID where memory is allocated */ + uint8_t event_port_id; + /**< Optional: event port ID used when the inbuilt port is absent */ + const struct rte_memzone *mz; + /**< Event timer adapter memzone pointer */ + struct rte_event_timer_adapter_conf conf; + /**< Configuration used to configure the adapter. */ + uint32_t caps; + /**< Adapter capabilities */ + void *adapter_priv; + /**< Timer adapter private data*/ + uint8_t service_inited; + /**< Service initialization state */ + uint32_t service_id; + /**< Service ID*/ + + RTE_STD_C11 + uint8_t started : 1; + /**< Flag to indicate adapter started. */ +} __rte_cache_aligned; + +#ifdef __cplusplus +} +#endif + +#endif /* __RTE_EVENT_TIMER_ADAPTER_PMD_H__ */ diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c index 851a1190..801810ed 100644 --- a/lib/librte_eventdev/rte_eventdev.c +++ b/lib/librte_eventdev/rte_eventdev.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include "rte_eventdev.h" #include "rte_eventdev_pmd.h" @@ -55,16 +57,21 @@ int rte_event_dev_get_dev_id(const char *name) { int i; + uint8_t cmp; if (!name) return -EINVAL; - for (i = 0; i < rte_eventdev_globals->nb_devs; i++) - if ((strcmp(rte_event_devices[i].data->name, name) - == 0) && - (rte_event_devices[i].attached == - RTE_EVENTDEV_ATTACHED)) + for (i = 0; i < rte_eventdev_globals->nb_devs; i++) { + cmp = (strncmp(rte_event_devices[i].data->name, name, + RTE_EVENTDEV_NAME_MAX_LEN) == 0) || + (rte_event_devices[i].dev ? (strncmp( + rte_event_devices[i].dev->driver->name, name, + RTE_EVENTDEV_NAME_MAX_LEN) == 0) : 0); + if (cmp && (rte_event_devices[i].attached == + RTE_EVENTDEV_ATTACHED)) return i; + } return -ENODEV; } @@ -123,6 +130,51 @@ rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id, : 0; } +int __rte_experimental +rte_event_timer_adapter_caps_get(uint8_t dev_id, uint32_t *caps) +{ + struct rte_eventdev *dev; + const struct rte_event_timer_adapter_ops *ops; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + + dev = &rte_eventdevs[dev_id]; + + if (caps == NULL) + return -EINVAL; + *caps = 0; + + return dev->dev_ops->timer_adapter_caps_get ? + (*dev->dev_ops->timer_adapter_caps_get)(dev, + 0, + caps, + &ops) + : 0; +} + +int __rte_experimental +rte_event_crypto_adapter_caps_get(uint8_t dev_id, uint8_t cdev_id, + uint32_t *caps) +{ + struct rte_eventdev *dev; + struct rte_cryptodev *cdev; + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + if (!rte_cryptodev_pmd_is_valid_dev(cdev_id)) + return -EINVAL; + + dev = &rte_eventdevs[dev_id]; + cdev = rte_cryptodev_pmd_get_dev(cdev_id); + + if (caps == NULL) + return -EINVAL; + *caps = 0; + + return dev->dev_ops->crypto_adapter_caps_get ? + (*dev->dev_ops->crypto_adapter_caps_get) + (dev, cdev, caps) : -ENOTSUP; +} + static inline int rte_event_dev_queue_config(struct rte_eventdev *dev, uint8_t nb_queues) { @@ -1123,6 +1175,23 @@ rte_event_dev_start(uint8_t dev_id) return 0; } +int +rte_event_dev_stop_flush_callback_register(uint8_t dev_id, + eventdev_stop_flush_t callback, void *userdata) +{ + struct rte_eventdev *dev; + + RTE_EDEV_LOG_DEBUG("Stop flush register dev_id=%" PRIu8, dev_id); + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_eventdevs[dev_id]; + + dev->dev_ops->dev_stop_flush = callback; + dev->data->dev_stop_flush_arg = userdata; + + return 0; +} + void rte_event_dev_stop(uint8_t dev_id) { diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index b21c2717..b6fd6ee7 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -1,35 +1,8 @@ -/* - * BSD LICENSE - * - * Copyright 2016 Cavium, Inc. - * Copyright 2016 Intel Corporation. - * Copyright 2016 NXP. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Cavium, Inc nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016 Cavium, Inc. + * Copyright(c) 2016-2018 Intel Corporation. + * Copyright 2016 NXP + * All rights reserved. */ #ifndef _RTE_EVENTDEV_H_ @@ -244,6 +217,7 @@ extern "C" { #include struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */ +struct rte_event; /* Event device capability bitmap flags */ #define RTE_EVENT_DEV_CAP_QUEUE_QOS (1ULL << 0) @@ -835,15 +809,60 @@ int rte_event_dev_start(uint8_t dev_id); /** - * Stop an event device. The device can be restarted with a call to - * rte_event_dev_start() + * Stop an event device. + * + * This function causes all queued events to be drained, including those + * residing in event ports. While draining events out of the device, this + * function calls the user-provided flush callback (if one was registered) once + * per event. + * + * The device can be restarted with a call to rte_event_dev_start(). Threads + * that continue to enqueue/dequeue while the device is stopped, or being + * stopped, will result in undefined behavior. This includes event adapters, + * which must be stopped prior to stopping the eventdev. * * @param dev_id * Event device identifier. + * + * @see rte_event_dev_stop_flush_callback_register() */ void rte_event_dev_stop(uint8_t dev_id); +typedef void (*eventdev_stop_flush_t)(uint8_t dev_id, struct rte_event event, + void *arg); +/**< Callback function called during rte_event_dev_stop(), invoked once per + * flushed event. + */ + +/** + * Registers a callback function to be invoked during rte_event_dev_stop() for + * each flushed event. This function can be used to properly dispose of queued + * events, for example events containing memory pointers. + * + * The callback function is only registered for the calling process. The + * callback function must be registered in every process that can call + * rte_event_dev_stop(). + * + * To unregister a callback, call this function with a NULL callback pointer. + * + * @param dev_id + * The identifier of the device. + * @param callback + * Callback function invoked once per flushed event. + * @param userdata + * Argument supplied to callback. + * + * @return + * - 0 on success. + * - -EINVAL if *dev_id* is invalid + * + * @see rte_event_dev_stop() + */ +int +rte_event_dev_stop_flush_callback_register(uint8_t dev_id, + eventdev_stop_flush_t callback, void *userdata); + /** * Close an event device. The device cannot be restarted! * @@ -923,8 +942,8 @@ rte_event_dev_close(uint8_t dev_id); /**< The event generated from ethdev subsystem */ #define RTE_EVENT_TYPE_CRYPTODEV 0x1 /**< The event generated from crypodev subsystem */ -#define RTE_EVENT_TYPE_TIMERDEV 0x2 -/**< The event generated from timerdev subsystem */ +#define RTE_EVENT_TYPE_TIMER 0x2 +/**< The event generated from event timer adapter */ #define RTE_EVENT_TYPE_CPU 0x3 /**< The event generated from cpu for pipelining. * Application may use *sub_event_type* to further classify the event @@ -1096,7 +1115,77 @@ int rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id, uint32_t *caps); -struct rte_eventdev_driver; +#define RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT (1ULL << 0) +/**< This flag is set when the timer mechanism is in HW. */ + +/** + * Retrieve the event device's timer adapter capabilities. + * + * @param dev_id + * The identifier of the device. + * + * @param[out] caps + * A pointer to memory to be filled with event timer adapter capabilities. + * + * @return + * - 0: Success, driver provided event timer adapter capabilities. + * - <0: Error code returned by the driver function. + */ +int __rte_experimental +rte_event_timer_adapter_caps_get(uint8_t dev_id, uint32_t *caps); + +/* Crypto adapter capability bitmap flag */ +#define RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW 0x1 +/**< Flag indicates HW is capable of generating events in + * RTE_EVENT_OP_NEW enqueue operation. Cryptodev will send + * packets to the event device as new events using an internal + * event port. + */ + +#define RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD 0x2 +/**< Flag indicates HW is capable of generating events in + * RTE_EVENT_OP_FORWARD enqueue operation. Cryptodev will send + * packets to the event device as forwarded event using an + * internal event port. + */ + +#define RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND 0x4 +/**< Flag indicates HW is capable of mapping crypto queue pair to + * event queue. + */ + +#define RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA 0x8 +/**< Flag indicates HW/SW suports a mechanism to store and retrieve + * the private data information along with the crypto session. + */ + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Retrieve the event device's crypto adapter capabilities for the + * specified cryptodev device + * + * @param dev_id + * The identifier of the device. + * + * @param cdev_id + * The identifier of the cryptodev device. + * + * @param[out] caps + * A pointer to memory filled with event adapter capabilities. + * It is expected to be pre-allocated & initialized by caller. + * + * @return + * - 0: Success, driver provides event adapter capabilities for the + * cryptodev device. + * - <0: Error code returned by the driver function. + * + */ +int __rte_experimental +rte_event_crypto_adapter_caps_get(uint8_t dev_id, uint8_t cdev_id, + uint32_t *caps); + struct rte_eventdev_ops; struct rte_eventdev; @@ -1152,6 +1241,8 @@ struct rte_eventdev_data { /* Service initialization state */ uint32_t service_id; /* Service ID*/ + void *dev_stop_flush_arg; + /**< User-provided argument for event flush function */ RTE_STD_C11 uint8_t dev_started : 1; @@ -1178,7 +1269,7 @@ struct rte_eventdev { struct rte_eventdev_data *data; /**< Pointer to device data */ - const struct rte_eventdev_ops *dev_ops; + struct rte_eventdev_ops *dev_ops; /**< Functions exported by PMD */ struct rte_device *dev; /**< Device info. supplied by probing */ diff --git a/lib/librte_eventdev/rte_eventdev_pmd.h b/lib/librte_eventdev/rte_eventdev_pmd.h index 31343b51..3fbb4d2b 100644 --- a/lib/librte_eventdev/rte_eventdev_pmd.h +++ b/lib/librte_eventdev/rte_eventdev_pmd.h @@ -26,6 +26,7 @@ extern "C" { #include #include "rte_eventdev.h" +#include "rte_event_timer_adapter_pmd.h" /* Logging Macros */ #define RTE_EDEV_LOG_ERR(...) \ @@ -69,6 +70,9 @@ extern "C" { ((RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID) | \ (RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ)) +#define RTE_EVENT_CRYPTO_ADAPTER_SW_CAP \ + RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA + /**< Ethernet Rx adapter cap to return If the packet transfers from * the ethdev to eventdev use a SW service function */ @@ -448,6 +452,37 @@ typedef int (*eventdev_eth_rx_adapter_caps_get_t) struct rte_event_eth_rx_adapter_queue_conf *queue_conf; +/** + * Retrieve the event device's timer adapter capabilities, as well as the ops + * structure that an event timer adapter should call through to enter the + * driver + * + * @param dev + * Event device pointer + * + * @param flags + * Flags that can be used to determine how to select an event timer + * adapter ops structure + * + * @param[out] caps + * A pointer to memory filled with Rx event adapter capabilities. + * + * @param[out] ops + * A pointer to the ops pointer to set with the address of the desired ops + * structure + * + * @return + * - 0: Success, driver provides Rx event adapter capabilities for the + * ethernet device. + * - <0: Error code returned by the driver function. + * + */ +typedef int (*eventdev_timer_adapter_caps_get_t)( + const struct rte_eventdev *dev, + uint64_t flags, + uint32_t *caps, + const struct rte_event_timer_adapter_ops **ops); + /** * Add ethernet Rx queues to event device. This callback is invoked if * the caps returned from rte_eventdev_eth_rx_adapter_caps_get(, eth_port_id) @@ -585,6 +620,175 @@ typedef int (*eventdev_eth_rx_adapter_stats_reset) */ typedef int (*eventdev_selftest)(void); + +struct rte_cryptodev; + +/** + * This API may change without prior notice + * + * Retrieve the event device's crypto adapter capabilities for the + * specified cryptodev + * + * @param dev + * Event device pointer + * + * @param cdev + * cryptodev pointer + * + * @param[out] caps + * A pointer to memory filled with event adapter capabilities. + * It is expected to be pre-allocated & initialized by caller. + * + * @return + * - 0: Success, driver provides event adapter capabilities for the + * cryptodev. + * - <0: Error code returned by the driver function. + * + */ +typedef int (*eventdev_crypto_adapter_caps_get_t) + (const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev, + uint32_t *caps); + +/** + * This API may change without prior notice + * + * Add crypto queue pair to event device. This callback is invoked if + * the caps returned from rte_event_crypto_adapter_caps_get(, cdev_id) + * has RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_* set. + * + * @param dev + * Event device pointer + * + * @param cdev + * cryptodev pointer + * + * @param queue_pair_id + * cryptodev queue pair identifier. + * + * @param event + * Event information required for binding cryptodev queue pair to event queue. + * This structure will have a valid value for only those HW PMDs supporting + * @see RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND capability. + * + * @return + * - 0: Success, cryptodev queue pair added successfully. + * - <0: Error code returned by the driver function. + * + */ +typedef int (*eventdev_crypto_adapter_queue_pair_add_t) + (const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev, + int32_t queue_pair_id, + const struct rte_event *event); + + +/** + * This API may change without prior notice + * + * Delete crypto queue pair to event device. This callback is invoked if + * the caps returned from rte_event_crypto_adapter_caps_get(, cdev_id) + * has RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_* set. + * + * @param dev + * Event device pointer + * + * @param cdev + * cryptodev pointer + * + * @param queue_pair_id + * cryptodev queue pair identifier. + * + * @return + * - 0: Success, cryptodev queue pair deleted successfully. + * - <0: Error code returned by the driver function. + * + */ +typedef int (*eventdev_crypto_adapter_queue_pair_del_t) + (const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev, + int32_t queue_pair_id); + +/** + * Start crypto adapter. This callback is invoked if + * the caps returned from rte_event_crypto_adapter_caps_get(.., cdev_id) + * has RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_* set and queue pairs + * from cdev_id have been added to the event device. + * + * @param dev + * Event device pointer + * + * @param cdev + * Crypto device pointer + * + * @return + * - 0: Success, crypto adapter started successfully. + * - <0: Error code returned by the driver function. + */ +typedef int (*eventdev_crypto_adapter_start_t) + (const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev); + +/** + * Stop crypto adapter. This callback is invoked if + * the caps returned from rte_event_crypto_adapter_caps_get(.., cdev_id) + * has RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_* set and queue pairs + * from cdev_id have been added to the event device. + * + * @param dev + * Event device pointer + * + * @param cdev + * Crypto device pointer + * + * @return + * - 0: Success, crypto adapter stopped successfully. + * - <0: Error code returned by the driver function. + */ +typedef int (*eventdev_crypto_adapter_stop_t) + (const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev); + +struct rte_event_crypto_adapter_stats; + +/** + * Retrieve crypto adapter statistics. + * + * @param dev + * Event device pointer + * + * @param cdev + * Crypto device pointer + * + * @param[out] stats + * Pointer to stats structure + * + * @return + * Return 0 on success. + */ + +typedef int (*eventdev_crypto_adapter_stats_get) + (const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev, + struct rte_event_crypto_adapter_stats *stats); + +/** + * Reset crypto adapter statistics. + * + * @param dev + * Event device pointer + * + * @param cdev + * Crypto device pointer + * + * @return + * Return 0 on success. + */ + +typedef int (*eventdev_crypto_adapter_stats_reset) + (const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev); + /** Event device operations function pointer table */ struct rte_eventdev_ops { eventdev_info_get_t dev_infos_get; /**< Get device info. */ @@ -640,8 +844,29 @@ struct rte_eventdev_ops { eventdev_eth_rx_adapter_stats_reset eth_rx_adapter_stats_reset; /**< Reset ethernet Rx stats */ + eventdev_timer_adapter_caps_get_t timer_adapter_caps_get; + /**< Get timer adapter capabilities */ + + eventdev_crypto_adapter_caps_get_t crypto_adapter_caps_get; + /**< Get crypto adapter capabilities */ + eventdev_crypto_adapter_queue_pair_add_t crypto_adapter_queue_pair_add; + /**< Add queue pair to crypto adapter */ + eventdev_crypto_adapter_queue_pair_del_t crypto_adapter_queue_pair_del; + /**< Delete queue pair from crypto adapter */ + eventdev_crypto_adapter_start_t crypto_adapter_start; + /**< Start crypto adapter */ + eventdev_crypto_adapter_stop_t crypto_adapter_stop; + /**< Stop crypto adapter */ + eventdev_crypto_adapter_stats_get crypto_adapter_stats_get; + /**< Get crypto stats */ + eventdev_crypto_adapter_stats_reset crypto_adapter_stats_reset; + /**< Reset crypto stats */ + eventdev_selftest dev_selftest; /**< Start eventdev Selftest */ + + eventdev_stop_flush_t dev_stop_flush; + /**< User-provided event flush function */ }; /** diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map index 2aef470b..12835e9f 100644 --- a/lib/librte_eventdev/rte_eventdev_version.map +++ b/lib/librte_eventdev/rte_eventdev_version.map @@ -66,7 +66,6 @@ DPDK_17.11 { rte_event_eth_rx_adapter_stats_get; rte_event_eth_rx_adapter_stats_reset; rte_event_eth_rx_adapter_stop; - } DPDK_17.08; DPDK_18.02 { @@ -74,3 +73,41 @@ DPDK_18.02 { rte_event_dev_selftest; } DPDK_17.11; + +DPDK_18.05 { + global: + + rte_event_dev_stop_flush_callback_register; +} DPDK_18.02; + +EXPERIMENTAL { + global: + + rte_event_crypto_adapter_caps_get; + rte_event_crypto_adapter_create; + rte_event_crypto_adapter_create_ext; + rte_event_crypto_adapter_event_port_get; + rte_event_crypto_adapter_free; + rte_event_crypto_adapter_queue_pair_add; + rte_event_crypto_adapter_queue_pair_del; + rte_event_crypto_adapter_service_id_get; + rte_event_crypto_adapter_start; + rte_event_crypto_adapter_stats_get; + rte_event_crypto_adapter_stats_reset; + rte_event_crypto_adapter_stop; + rte_event_eth_rx_adapter_cb_register; + rte_event_timer_adapter_caps_get; + rte_event_timer_adapter_create; + rte_event_timer_adapter_create_ext; + rte_event_timer_adapter_free; + rte_event_timer_adapter_get_info; + rte_event_timer_adapter_lookup; + rte_event_timer_adapter_service_id_get; + rte_event_timer_adapter_start; + rte_event_timer_adapter_stats_get; + rte_event_timer_adapter_stats_reset; + rte_event_timer_adapter_stop; + rte_event_timer_arm_burst; + rte_event_timer_arm_tmo_tick_burst; + rte_event_timer_cancel_burst; +}; diff --git a/lib/librte_flow_classify/rte_flow_classify.c b/lib/librte_flow_classify/rte_flow_classify.c index 7edb2f15..4c3469da 100644 --- a/lib/librte_flow_classify/rte_flow_classify.c +++ b/lib/librte_flow_classify/rte_flow_classify.c @@ -635,9 +635,7 @@ action_apply(struct rte_flow_classifier *cls, } if (count) { ret = 0; - ntuple_stats = - (struct rte_flow_classify_ipv4_5tuple_stats *) - stats->stats; + ntuple_stats = stats->stats; ntuple_stats->counter1 = count; ntuple_stats->ipv4_5tuple = rule->rules.u.ipv4_5tuple; } @@ -675,10 +673,7 @@ rte_flow_classifier_query(struct rte_flow_classifier *cls, return ret; } -RTE_INIT(librte_flow_classify_init_log); - -static void -librte_flow_classify_init_log(void) +RTE_INIT(librte_flow_classify_init_log) { librte_flow_classify_logtype = rte_log_register("lib.flow_classify"); diff --git a/lib/librte_flow_classify/rte_flow_classify_parse.c b/lib/librte_flow_classify/rte_flow_classify_parse.c index 10eaf043..f65ceaf7 100644 --- a/lib/librte_flow_classify/rte_flow_classify_parse.c +++ b/lib/librte_flow_classify/rte_flow_classify_parse.c @@ -279,7 +279,7 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr, } - ipv4_mask = (const struct rte_flow_item_ipv4 *)item->mask; + ipv4_mask = item->mask; /** * Only support src & dst addresses, protocol, * others should be masked. @@ -301,7 +301,7 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr, filter->src_ip_mask = ipv4_mask->hdr.src_addr; filter->proto_mask = ipv4_mask->hdr.next_proto_id; - ipv4_spec = (const struct rte_flow_item_ipv4 *)item->spec; + ipv4_spec = item->spec; filter->dst_ip = ipv4_spec->hdr.dst_addr; filter->src_ip = ipv4_spec->hdr.src_addr; filter->proto = ipv4_spec->hdr.next_proto_id; @@ -339,7 +339,7 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr, } if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { - tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + tcp_mask = item->mask; /** * Only support src & dst ports, tcp flags, @@ -373,12 +373,12 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr, return -EINVAL; } - tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + tcp_spec = item->spec; filter->dst_port = tcp_spec->hdr.dst_port; filter->src_port = tcp_spec->hdr.src_port; filter->tcp_flags = tcp_spec->hdr.tcp_flags; } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { - udp_mask = (const struct rte_flow_item_udp *)item->mask; + udp_mask = item->mask; /** * Only support src & dst ports, @@ -397,11 +397,11 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr, filter->dst_port_mask = udp_mask->hdr.dst_port; filter->src_port_mask = udp_mask->hdr.src_port; - udp_spec = (const struct rte_flow_item_udp *)item->spec; + udp_spec = item->spec; filter->dst_port = udp_spec->hdr.dst_port; filter->src_port = udp_spec->hdr.src_port; } else { - sctp_mask = (const struct rte_flow_item_sctp *)item->mask; + sctp_mask = item->mask; /** * Only support src & dst ports, @@ -420,7 +420,7 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr, filter->dst_port_mask = sctp_mask->hdr.dst_port; filter->src_port_mask = sctp_mask->hdr.src_port; - sctp_spec = (const struct rte_flow_item_sctp *)item->spec; + sctp_spec = item->spec; filter->dst_port = sctp_spec->hdr.dst_port; filter->src_port = sctp_spec->hdr.src_port; } @@ -480,12 +480,12 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr, switch (act->type) { case RTE_FLOW_ACTION_TYPE_COUNT: action.action_mask |= 1LLU << RTE_FLOW_ACTION_TYPE_COUNT; - count = (const struct rte_flow_action_count *)act->conf; + count = act->conf; memcpy(&action.act.counter, count, sizeof(action.act.counter)); break; case RTE_FLOW_ACTION_TYPE_MARK: action.action_mask |= 1LLU << RTE_FLOW_ACTION_TYPE_MARK; - mark_spec = (const struct rte_flow_action_mark *)act->conf; + mark_spec = act->conf; memcpy(&action.act.mark, mark_spec, sizeof(action.act.mark)); break; default: @@ -502,12 +502,12 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr, switch (act->type) { case RTE_FLOW_ACTION_TYPE_COUNT: action.action_mask |= 1LLU << RTE_FLOW_ACTION_TYPE_COUNT; - count = (const struct rte_flow_action_count *)act->conf; + count = act->conf; memcpy(&action.act.counter, count, sizeof(action.act.counter)); break; case RTE_FLOW_ACTION_TYPE_MARK: action.action_mask |= 1LLU << RTE_FLOW_ACTION_TYPE_MARK; - mark_spec = (const struct rte_flow_action_mark *)act->conf; + mark_spec = act->conf; memcpy(&action.act.mark, mark_spec, sizeof(action.act.mark)); break; case RTE_FLOW_ACTION_TYPE_END: diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile index 3648ec09..1fac53a8 100644 --- a/lib/librte_gso/Makefile +++ b/lib/librte_gso/Makefile @@ -19,6 +19,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c +SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_udp4.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h index 5ca59745..6cd764ff 100644 --- a/lib/librte_gso/gso_common.h +++ b/lib/librte_gso/gso_common.h @@ -31,6 +31,9 @@ (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \ PKT_TX_TUNNEL_GRE)) +#define IS_IPV4_UDP(flag) (((flag) & (PKT_TX_UDP_SEG | PKT_TX_IPV4)) == \ + (PKT_TX_UDP_SEG | PKT_TX_IPV4)) + /** * Internal function which updates the UDP header of a packet, following * segmentation. This is required to update the header's datagram length field. diff --git a/lib/librte_gso/gso_udp4.c b/lib/librte_gso/gso_udp4.c new file mode 100644 index 00000000..927dee12 --- /dev/null +++ b/lib/librte_gso/gso_udp4.c @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include "gso_common.h" +#include "gso_udp4.h" + +#define IPV4_HDR_MF_BIT (1U << 13) + +static inline void +update_ipv4_udp_headers(struct rte_mbuf *pkt, struct rte_mbuf **segs, + uint16_t nb_segs) +{ + struct ipv4_hdr *ipv4_hdr; + uint16_t frag_offset = 0, is_mf; + uint16_t l2_hdrlen = pkt->l2_len, l3_hdrlen = pkt->l3_len; + uint16_t tail_idx = nb_segs - 1, length, i; + + /* + * Update IP header fields for output segments. Specifically, + * keep the same IP id, update fragment offset and total + * length. + */ + for (i = 0; i < nb_segs; i++) { + ipv4_hdr = rte_pktmbuf_mtod_offset(segs[i], struct ipv4_hdr *, + l2_hdrlen); + length = segs[i]->pkt_len - l2_hdrlen; + ipv4_hdr->total_length = rte_cpu_to_be_16(length); + + is_mf = i < tail_idx ? IPV4_HDR_MF_BIT : 0; + ipv4_hdr->fragment_offset = + rte_cpu_to_be_16(frag_offset | is_mf); + frag_offset += ((length - l3_hdrlen) >> 3); + } +} + +int +gso_udp4_segment(struct rte_mbuf *pkt, + uint16_t gso_size, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out) +{ + struct ipv4_hdr *ipv4_hdr; + uint16_t pyld_unit_size, hdr_offset; + uint16_t frag_off; + int ret; + + /* Don't process the fragmented packet */ + ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *, + pkt->l2_len); + frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); + if (unlikely(IS_FRAGMENTED(frag_off))) { + pkts_out[0] = pkt; + return 1; + } + + /* + * UDP fragmentation is the same as IP fragmentation. + * Except the first one, other output packets just have l2 + * and l3 headers. + */ + hdr_offset = pkt->l2_len + pkt->l3_len; + + /* Don't process the packet without data. */ + if (unlikely(hdr_offset + pkt->l4_len >= pkt->pkt_len)) { + pkts_out[0] = pkt; + return 1; + } + + pyld_unit_size = gso_size - hdr_offset; + + /* Segment the payload */ + ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool, + indirect_pool, pkts_out, nb_pkts_out); + if (ret > 1) + update_ipv4_udp_headers(pkt, pkts_out, ret); + + return ret; +} diff --git a/lib/librte_gso/gso_udp4.h b/lib/librte_gso/gso_udp4.h new file mode 100644 index 00000000..b2a2908e --- /dev/null +++ b/lib/librte_gso/gso_udp4.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _GSO_UDP4_H_ +#define _GSO_UDP4_H_ + +#include +#include + +/** + * Segment an UDP/IPv4 packet. This function doesn't check if the input + * packet has correct checksums, and doesn't update checksums for output + * GSO segments. Furthermore, it doesn't process IP fragment packets. + * + * @param pkt + * The packet mbuf to segment. + * @param gso_size + * The max length of a GSO segment, measured in bytes. + * @param direct_pool + * MBUF pool used for allocating direct buffers for output segments. + * @param indirect_pool + * MBUF pool used for allocating indirect buffers for output segments. + * @param pkts_out + * Pointer array used to store the MBUF addresses of output GSO + * segments, when the function succeeds. If the memory space in + * pkts_out is insufficient, it fails and returns -EINVAL. + * @param nb_pkts_out + * The max number of items that 'pkts_out' can keep. + * + * @return + * - The number of GSO segments filled in pkts_out on success. + * - Return -ENOMEM if run out of memory in MBUF pools. + * - Return -EINVAL for invalid parameters. + */ +int gso_udp4_segment(struct rte_mbuf *pkt, + uint16_t gso_size, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out); +#endif diff --git a/lib/librte_gso/meson.build b/lib/librte_gso/meson.build index 056534fb..ad8dd858 100644 --- a/lib/librte_gso/meson.build +++ b/lib/librte_gso/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -sources = files('gso_common.c', 'gso_tcp4.c', +sources = files('gso_common.c', 'gso_tcp4.c', 'gso_udp4.c', 'gso_tunnel_tcp4.c', 'rte_gso.c') headers = files('rte_gso.h') deps += ['ethdev'] diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c index a44e3d43..751b5b62 100644 --- a/lib/librte_gso/rte_gso.c +++ b/lib/librte_gso/rte_gso.c @@ -11,6 +11,17 @@ #include "gso_common.h" #include "gso_tcp4.h" #include "gso_tunnel_tcp4.h" +#include "gso_udp4.h" + +#define ILLEGAL_UDP_GSO_CTX(ctx) \ + ((((ctx)->gso_types & DEV_TX_OFFLOAD_UDP_TSO) == 0) || \ + (ctx)->gso_size < RTE_GSO_UDP_SEG_SIZE_MIN) + +#define ILLEGAL_TCP_GSO_CTX(ctx) \ + ((((ctx)->gso_types & (DEV_TX_OFFLOAD_TCP_TSO | \ + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | \ + DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0) || \ + (ctx)->gso_size < RTE_GSO_SEG_SIZE_MIN) int rte_gso_segment(struct rte_mbuf *pkt, @@ -27,14 +38,12 @@ rte_gso_segment(struct rte_mbuf *pkt, if (pkt == NULL || pkts_out == NULL || gso_ctx == NULL || nb_pkts_out < 1 || - gso_ctx->gso_size < RTE_GSO_SEG_SIZE_MIN || - ((gso_ctx->gso_types & (DEV_TX_OFFLOAD_TCP_TSO | - DEV_TX_OFFLOAD_VXLAN_TNL_TSO | - DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0)) + (ILLEGAL_UDP_GSO_CTX(gso_ctx) && + ILLEGAL_TCP_GSO_CTX(gso_ctx))) return -EINVAL; if (gso_ctx->gso_size >= pkt->pkt_len) { - pkt->ol_flags &= (~PKT_TX_TCP_SEG); + pkt->ol_flags &= (~(PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)); pkts_out[0] = pkt; return 1; } @@ -59,6 +68,11 @@ rte_gso_segment(struct rte_mbuf *pkt, ret = gso_tcp4_segment(pkt, gso_size, ipid_delta, direct_pool, indirect_pool, pkts_out, nb_pkts_out); + } else if (IS_IPV4_UDP(pkt->ol_flags) && + (gso_ctx->gso_types & DEV_TX_OFFLOAD_UDP_TSO)) { + pkt->ol_flags &= (~PKT_TX_UDP_SEG); + ret = gso_udp4_segment(pkt, gso_size, direct_pool, + indirect_pool, pkts_out, nb_pkts_out); } else { /* unsupported packet, skip */ pkts_out[0] = pkt; diff --git a/lib/librte_gso/rte_gso.h b/lib/librte_gso/rte_gso.h index f4abd61c..a626a11e 100644 --- a/lib/librte_gso/rte_gso.h +++ b/lib/librte_gso/rte_gso.h @@ -17,10 +17,14 @@ extern "C" { #include #include -/* Minimum GSO segment size. */ +/* Minimum GSO segment size for TCP based packets. */ #define RTE_GSO_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \ sizeof(struct ipv4_hdr) + sizeof(struct tcp_hdr) + 1) +/* Minimum GSO segment size for UDP based packets. */ +#define RTE_GSO_UDP_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \ + sizeof(struct ipv4_hdr) + sizeof(struct udp_hdr) + 1) + /* GSO flags for rte_gso_ctx. */ #define RTE_GSO_FLAG_IPID_FIXED (1ULL << 0) /**< Use fixed IP ids for output GSO segments. Setting diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index e139e1d7..efc06ede 100644 --- a/lib/librte_hash/meson.build +++ b/lib/librte_hash/meson.build @@ -6,7 +6,6 @@ headers = files('rte_cmp_arm64.h', 'rte_cmp_x86.h', 'rte_crc_arm64.h', 'rte_cuckoo_hash.h', - 'rte_cuckoo_hash_x86.h', 'rte_fbk_hash.h', 'rte_hash_crc.h', 'rte_hash.h', diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 9b1387b5..f7b86c8c 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -31,9 +31,6 @@ #include "rte_hash.h" #include "rte_cuckoo_hash.h" -#if defined(RTE_ARCH_X86) -#include "rte_cuckoo_hash_x86.h" -#endif TAILQ_HEAD(rte_hash_list, rte_tailq_entry); @@ -93,8 +90,10 @@ rte_hash_create(const struct rte_hash_parameters *params) void *buckets = NULL; char ring_name[RTE_RING_NAMESIZE]; unsigned num_key_slots; - unsigned hw_trans_mem_support = 0; unsigned i; + unsigned int hw_trans_mem_support = 0, multi_writer_support = 0; + unsigned int readwrite_concur_support = 0; + rte_hash_function default_hash_func = (rte_hash_function)rte_jhash; hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); @@ -107,7 +106,6 @@ rte_hash_create(const struct rte_hash_parameters *params) /* Check for valid parameters */ if ((params->entries > RTE_HASH_ENTRIES_MAX) || (params->entries < RTE_HASH_BUCKET_ENTRIES) || - !rte_is_power_of_2(RTE_HASH_BUCKET_ENTRIES) || (params->key_len == 0)) { rte_errno = EINVAL; RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n"); @@ -118,21 +116,29 @@ rte_hash_create(const struct rte_hash_parameters *params) if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT) hw_trans_mem_support = 1; + if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) + multi_writer_support = 1; + + if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) { + readwrite_concur_support = 1; + multi_writer_support = 1; + } + /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */ - if (hw_trans_mem_support) + if (multi_writer_support) /* * Increase number of slots by total number of indices * that can be stored in the lcore caches * except for the first cache */ num_key_slots = params->entries + (RTE_MAX_LCORE - 1) * - LCORE_CACHE_SIZE + 1; + (LCORE_CACHE_SIZE - 1) + 1; else num_key_slots = params->entries + 1; snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name); /* Create ring (Dummy slot index is not enqueued) */ - r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1), + r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots), params->socket_id, 0); if (r == NULL) { RTE_LOG(ERR, HASH, "memory allocation failed\n"); @@ -233,7 +239,7 @@ rte_hash_create(const struct rte_hash_parameters *params) h->cmp_jump_table_idx = KEY_OTHER_BYTES; #endif - if (hw_trans_mem_support) { + if (multi_writer_support) { h->local_free_slots = rte_zmalloc_socket(NULL, sizeof(struct lcore_cache) * RTE_MAX_LCORE, RTE_CACHE_LINE_SIZE, params->socket_id); @@ -261,6 +267,8 @@ rte_hash_create(const struct rte_hash_parameters *params) h->key_store = k; h->free_slots = r; h->hw_trans_mem_support = hw_trans_mem_support; + h->multi_writer_support = multi_writer_support; + h->readwrite_concur_support = readwrite_concur_support; #if defined(RTE_ARCH_X86) if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) @@ -271,24 +279,20 @@ rte_hash_create(const struct rte_hash_parameters *params) #endif h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR; - /* Turn on multi-writer only with explicit flat from user and TM + /* Turn on multi-writer only with explicit flag from user and TM * support. */ - if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) { - if (h->hw_trans_mem_support) { - h->add_key = ADD_KEY_MULTIWRITER_TM; - } else { - h->add_key = ADD_KEY_MULTIWRITER; - h->multiwriter_lock = rte_malloc(NULL, - sizeof(rte_spinlock_t), - LCORE_CACHE_SIZE); - rte_spinlock_init(h->multiwriter_lock); - } - } else - h->add_key = ADD_KEY_SINGLEWRITER; + if (h->multi_writer_support) { + h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t), + RTE_CACHE_LINE_SIZE); + if (h->readwrite_lock == NULL) + goto err_unlock; + + rte_rwlock_init(h->readwrite_lock); + } /* Populate free slots ring. Entry zero is reserved for key misses. */ - for (i = 1; i < params->entries + 1; i++) + for (i = 1; i < num_key_slots; i++) rte_ring_sp_enqueue(r, (void *)((uintptr_t) i)); te->data = (void *) h; @@ -335,11 +339,10 @@ rte_hash_free(struct rte_hash *h) rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); - if (h->hw_trans_mem_support) + if (h->multi_writer_support) { rte_free(h->local_free_slots); - - if (h->add_key == ADD_KEY_MULTIWRITER) - rte_free(h->multiwriter_lock); + rte_free(h->readwrite_lock); + } rte_ring_free(h->free_slots); rte_free(h->key_store); rte_free(h->buckets); @@ -366,15 +369,78 @@ rte_hash_secondary_hash(const hash_sig_t primary_hash) return primary_hash ^ ((tag + 1) * alt_bits_xor); } +int32_t +rte_hash_count(const struct rte_hash *h) +{ + uint32_t tot_ring_cnt, cached_cnt = 0; + uint32_t i, ret; + + if (h == NULL) + return -EINVAL; + + if (h->multi_writer_support) { + tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) * + (LCORE_CACHE_SIZE - 1); + for (i = 0; i < RTE_MAX_LCORE; i++) + cached_cnt += h->local_free_slots[i].len; + + ret = tot_ring_cnt - rte_ring_count(h->free_slots) - + cached_cnt; + } else { + tot_ring_cnt = h->entries; + ret = tot_ring_cnt - rte_ring_count(h->free_slots); + } + return ret; +} + +/* Read write locks implemented using rte_rwlock */ +static inline void +__hash_rw_writer_lock(const struct rte_hash *h) +{ + if (h->multi_writer_support && h->hw_trans_mem_support) + rte_rwlock_write_lock_tm(h->readwrite_lock); + else if (h->multi_writer_support) + rte_rwlock_write_lock(h->readwrite_lock); +} + + +static inline void +__hash_rw_reader_lock(const struct rte_hash *h) +{ + if (h->readwrite_concur_support && h->hw_trans_mem_support) + rte_rwlock_read_lock_tm(h->readwrite_lock); + else if (h->readwrite_concur_support) + rte_rwlock_read_lock(h->readwrite_lock); +} + +static inline void +__hash_rw_writer_unlock(const struct rte_hash *h) +{ + if (h->multi_writer_support && h->hw_trans_mem_support) + rte_rwlock_write_unlock_tm(h->readwrite_lock); + else if (h->multi_writer_support) + rte_rwlock_write_unlock(h->readwrite_lock); +} + +static inline void +__hash_rw_reader_unlock(const struct rte_hash *h) +{ + if (h->readwrite_concur_support && h->hw_trans_mem_support) + rte_rwlock_read_unlock_tm(h->readwrite_lock); + else if (h->readwrite_concur_support) + rte_rwlock_read_unlock(h->readwrite_lock); +} + void rte_hash_reset(struct rte_hash *h) { void *ptr; - unsigned i; + uint32_t tot_ring_cnt, i; if (h == NULL) return; + __hash_rw_writer_lock(h); memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket)); memset(h->key_store, 0, h->key_entry_size * (h->entries + 1)); @@ -383,97 +449,260 @@ rte_hash_reset(struct rte_hash *h) rte_pause(); /* Repopulate the free slots ring. Entry zero is reserved for key misses */ - for (i = 1; i < h->entries + 1; i++) + if (h->multi_writer_support) + tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) * + (LCORE_CACHE_SIZE - 1); + else + tot_ring_cnt = h->entries; + + for (i = 1; i < tot_ring_cnt + 1; i++) rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i)); - if (h->hw_trans_mem_support) { + if (h->multi_writer_support) { /* Reset local caches per lcore */ for (i = 0; i < RTE_MAX_LCORE; i++) h->local_free_slots[i].len = 0; } + __hash_rw_writer_unlock(h); } -/* Search for an entry that can be pushed to its alternative location */ -static inline int -make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt, - unsigned int *nr_pushes) +/* + * Function called to enqueue back an index in the cache/ring, + * as slot has not being used and it can be used in the + * next addition attempt. + */ +static inline void +enqueue_slot_back(const struct rte_hash *h, + struct lcore_cache *cached_free_slots, + void *slot_id) { - unsigned i, j; - int ret; - uint32_t next_bucket_idx; - struct rte_hash_bucket *next_bkt[RTE_HASH_BUCKET_ENTRIES]; + if (h->multi_writer_support) { + cached_free_slots->objs[cached_free_slots->len] = slot_id; + cached_free_slots->len++; + } else + rte_ring_sp_enqueue(h->free_slots, slot_id); +} + +/* Search a key from bucket and update its data */ +static inline int32_t +search_and_update(const struct rte_hash *h, void *data, const void *key, + struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash) +{ + int i; + struct rte_hash_key *k, *keys = h->key_store; - /* - * Push existing item (search for bucket with space in - * alternative locations) to its alternative location - */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - /* Search for space in alternative locations */ - next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask; - next_bkt[i] = &h->buckets[next_bucket_idx]; - for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) { - if (next_bkt[i]->key_idx[j] == EMPTY_SLOT) - break; + if (bkt->sig_current[i] == sig && + bkt->sig_alt[i] == alt_hash) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (rte_hash_cmp_eq(key, k->key, h) == 0) { + /* Update data */ + k->pdata = data; + /* + * Return index where key is stored, + * subtracting the first dummy index + */ + return bkt->key_idx[i] - 1; + } } - - if (j != RTE_HASH_BUCKET_ENTRIES) - break; } + return -1; +} - /* Alternative location has spare room (end of recursive function) */ - if (i != RTE_HASH_BUCKET_ENTRIES) { - next_bkt[i]->sig_alt[j] = bkt->sig_current[i]; - next_bkt[i]->sig_current[j] = bkt->sig_alt[i]; - next_bkt[i]->key_idx[j] = bkt->key_idx[i]; - return i; +/* Only tries to insert at one bucket (@prim_bkt) without trying to push + * buckets around. + * return 1 if matching existing key, return 0 if succeeds, return -1 for no + * empty entry. + */ +static inline int32_t +rte_hash_cuckoo_insert_mw(const struct rte_hash *h, + struct rte_hash_bucket *prim_bkt, + struct rte_hash_bucket *sec_bkt, + const struct rte_hash_key *key, void *data, + hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx, + int32_t *ret_val) +{ + unsigned int i; + struct rte_hash_bucket *cur_bkt = prim_bkt; + int32_t ret; + + __hash_rw_writer_lock(h); + /* Check if key was inserted after last check but before this + * protected region in case of inserting duplicated keys. + */ + ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash); + if (ret != -1) { + __hash_rw_writer_unlock(h); + *ret_val = ret; + return 1; + } + ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig); + if (ret != -1) { + __hash_rw_writer_unlock(h); + *ret_val = ret; + return 1; } - /* Pick entry that has not been pushed yet */ - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) - if (bkt->flag[i] == 0) + /* Insert new entry if there is room in the primary + * bucket. + */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + /* Check if slot is available */ + if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { + prim_bkt->sig_current[i] = sig; + prim_bkt->sig_alt[i] = alt_hash; + prim_bkt->key_idx[i] = new_idx; break; + } + } + __hash_rw_writer_unlock(h); - /* All entries have been pushed, so entry cannot be added */ - if (i == RTE_HASH_BUCKET_ENTRIES || ++(*nr_pushes) > RTE_HASH_MAX_PUSHES) - return -ENOSPC; + if (i != RTE_HASH_BUCKET_ENTRIES) + return 0; - /* Set flag to indicate that this entry is going to be pushed */ - bkt->flag[i] = 1; + /* no empty entry */ + return -1; +} - /* Need room in alternative bucket to insert the pushed entry */ - ret = make_space_bucket(h, next_bkt[i], nr_pushes); - /* - * After recursive function. - * Clear flags and insert the pushed entry - * in its alternative location if successful, - * or return error +/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill + * the path head with new entry (sig, alt_hash, new_idx) + * return 1 if matched key found, return -1 if cuckoo path invalided and fail, + * return 0 if succeeds. + */ +static inline int +rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, + struct rte_hash_bucket *bkt, + struct rte_hash_bucket *alt_bkt, + const struct rte_hash_key *key, void *data, + struct queue_node *leaf, uint32_t leaf_slot, + hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx, + int32_t *ret_val) +{ + uint32_t prev_alt_bkt_idx; + struct rte_hash_bucket *cur_bkt = bkt; + struct queue_node *prev_node, *curr_node = leaf; + struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt; + uint32_t prev_slot, curr_slot = leaf_slot; + int32_t ret; + + __hash_rw_writer_lock(h); + + /* In case empty slot was gone before entering protected region */ + if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) { + __hash_rw_writer_unlock(h); + return -1; + } + + /* Check if key was inserted after last check but before this + * protected region. */ - bkt->flag[i] = 0; - if (ret >= 0) { - next_bkt[i]->sig_alt[ret] = bkt->sig_current[i]; - next_bkt[i]->sig_current[ret] = bkt->sig_alt[i]; - next_bkt[i]->key_idx[ret] = bkt->key_idx[i]; - return i; - } else - return ret; + ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash); + if (ret != -1) { + __hash_rw_writer_unlock(h); + *ret_val = ret; + return 1; + } + + ret = search_and_update(h, data, key, alt_bkt, alt_hash, sig); + if (ret != -1) { + __hash_rw_writer_unlock(h); + *ret_val = ret; + return 1; + } + + while (likely(curr_node->prev != NULL)) { + prev_node = curr_node->prev; + prev_bkt = prev_node->bkt; + prev_slot = curr_node->prev_slot; + + prev_alt_bkt_idx = + prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask; + + if (unlikely(&h->buckets[prev_alt_bkt_idx] + != curr_bkt)) { + /* revert it to empty, otherwise duplicated keys */ + curr_bkt->key_idx[curr_slot] = EMPTY_SLOT; + __hash_rw_writer_unlock(h); + return -1; + } + + /* Need to swap current/alt sig to allow later + * Cuckoo insert to move elements back to its + * primary bucket if available + */ + curr_bkt->sig_alt[curr_slot] = + prev_bkt->sig_current[prev_slot]; + curr_bkt->sig_current[curr_slot] = + prev_bkt->sig_alt[prev_slot]; + curr_bkt->key_idx[curr_slot] = + prev_bkt->key_idx[prev_slot]; + + curr_slot = prev_slot; + curr_node = prev_node; + curr_bkt = curr_node->bkt; + } + + curr_bkt->sig_current[curr_slot] = sig; + curr_bkt->sig_alt[curr_slot] = alt_hash; + curr_bkt->key_idx[curr_slot] = new_idx; + + __hash_rw_writer_unlock(h); + + return 0; } /* - * Function called to enqueue back an index in the cache/ring, - * as slot has not being used and it can be used in the - * next addition attempt. + * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe + * Cuckoo */ -static inline void -enqueue_slot_back(const struct rte_hash *h, - struct lcore_cache *cached_free_slots, - void *slot_id) +static inline int +rte_hash_cuckoo_make_space_mw(const struct rte_hash *h, + struct rte_hash_bucket *bkt, + struct rte_hash_bucket *sec_bkt, + const struct rte_hash_key *key, void *data, + hash_sig_t sig, hash_sig_t alt_hash, + uint32_t new_idx, int32_t *ret_val) { - if (h->hw_trans_mem_support) { - cached_free_slots->objs[cached_free_slots->len] = slot_id; - cached_free_slots->len++; - } else - rte_ring_sp_enqueue(h->free_slots, slot_id); + unsigned int i; + struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN]; + struct queue_node *tail, *head; + struct rte_hash_bucket *curr_bkt, *alt_bkt; + + tail = queue; + head = queue + 1; + tail->bkt = bkt; + tail->prev = NULL; + tail->prev_slot = -1; + + /* Cuckoo bfs Search */ + while (likely(tail != head && head < + queue + RTE_HASH_BFS_QUEUE_MAX_LEN - + RTE_HASH_BUCKET_ENTRIES)) { + curr_bkt = tail->bkt; + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (curr_bkt->key_idx[i] == EMPTY_SLOT) { + int32_t ret = rte_hash_cuckoo_move_insert_mw(h, + bkt, sec_bkt, key, data, + tail, i, sig, alt_hash, + new_idx, ret_val); + if (likely(ret != -1)) + return ret; + } + + /* Enqueue new node and keep prev node info */ + alt_bkt = &(h->buckets[curr_bkt->sig_alt[i] + & h->bucket_bitmask]); + head->bkt = alt_bkt; + head->prev = tail; + head->prev_slot = i; + head++; + } + tail++; + } + + return -ENOSPC; } static inline int32_t @@ -482,19 +711,15 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, { hash_sig_t alt_hash; uint32_t prim_bucket_idx, sec_bucket_idx; - unsigned i; struct rte_hash_bucket *prim_bkt, *sec_bkt; - struct rte_hash_key *new_k, *k, *keys = h->key_store; + struct rte_hash_key *new_k, *keys = h->key_store; void *slot_id = NULL; uint32_t new_idx; int ret; unsigned n_slots; unsigned lcore_id; struct lcore_cache *cached_free_slots = NULL; - unsigned int nr_pushes = 0; - - if (h->add_key == ADD_KEY_MULTIWRITER) - rte_spinlock_lock(h->multiwriter_lock); + int32_t ret_val; prim_bucket_idx = sig & h->bucket_bitmask; prim_bkt = &h->buckets[prim_bucket_idx]; @@ -505,8 +730,24 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, sec_bkt = &h->buckets[sec_bucket_idx]; rte_prefetch0(sec_bkt); - /* Get a new slot for storing the new key */ - if (h->hw_trans_mem_support) { + /* Check if key is already inserted in primary location */ + __hash_rw_writer_lock(h); + ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash); + if (ret != -1) { + __hash_rw_writer_unlock(h); + return ret; + } + + /* Check if key is already inserted in secondary location */ + ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig); + if (ret != -1) { + __hash_rw_writer_unlock(h); + return ret; + } + __hash_rw_writer_unlock(h); + + /* Did not find a match, so get a new slot for storing the new key */ + if (h->multi_writer_support) { lcore_id = rte_lcore_id(); cached_free_slots = &h->local_free_slots[lcore_id]; /* Try to get a free slot from the local cache */ @@ -516,8 +757,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, cached_free_slots->objs, LCORE_CACHE_SIZE, NULL); if (n_slots == 0) { - ret = -ENOSPC; - goto failure; + return -ENOSPC; } cached_free_slots->len += n_slots; @@ -528,122 +768,50 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, slot_id = cached_free_slots->objs[cached_free_slots->len]; } else { if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) { - ret = -ENOSPC; - goto failure; + return -ENOSPC; } } new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size); - rte_prefetch0(new_k); new_idx = (uint32_t)((uintptr_t) slot_id); - - /* Check if key is already inserted in primary location */ - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (prim_bkt->sig_current[i] == sig && - prim_bkt->sig_alt[i] == alt_hash) { - k = (struct rte_hash_key *) ((char *)keys + - prim_bkt->key_idx[i] * h->key_entry_size); - if (rte_hash_cmp_eq(key, k->key, h) == 0) { - /* Enqueue index of free slot back in the ring. */ - enqueue_slot_back(h, cached_free_slots, slot_id); - /* Update data */ - k->pdata = data; - /* - * Return index where key is stored, - * subtracting the first dummy index - */ - return prim_bkt->key_idx[i] - 1; - } - } - } - - /* Check if key is already inserted in secondary location */ - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (sec_bkt->sig_alt[i] == sig && - sec_bkt->sig_current[i] == alt_hash) { - k = (struct rte_hash_key *) ((char *)keys + - sec_bkt->key_idx[i] * h->key_entry_size); - if (rte_hash_cmp_eq(key, k->key, h) == 0) { - /* Enqueue index of free slot back in the ring. */ - enqueue_slot_back(h, cached_free_slots, slot_id); - /* Update data */ - k->pdata = data; - /* - * Return index where key is stored, - * subtracting the first dummy index - */ - return sec_bkt->key_idx[i] - 1; - } - } - } - /* Copy key */ rte_memcpy(new_k->key, key, h->key_len); new_k->pdata = data; -#if defined(RTE_ARCH_X86) /* currently only x86 support HTM */ - if (h->add_key == ADD_KEY_MULTIWRITER_TM) { - ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt, - sig, alt_hash, new_idx); - if (ret >= 0) - return new_idx - 1; - /* Primary bucket full, need to make space for new entry */ - ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig, - alt_hash, new_idx); + /* Find an empty slot and insert */ + ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data, + sig, alt_hash, new_idx, &ret_val); + if (ret == 0) + return new_idx - 1; + else if (ret == 1) { + enqueue_slot_back(h, cached_free_slots, slot_id); + return ret_val; + } - if (ret >= 0) - return new_idx - 1; + /* Primary bucket full, need to make space for new entry */ + ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data, + sig, alt_hash, new_idx, &ret_val); + if (ret == 0) + return new_idx - 1; + else if (ret == 1) { + enqueue_slot_back(h, cached_free_slots, slot_id); + return ret_val; + } - /* Also search secondary bucket to get better occupancy */ - ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig, - alt_hash, new_idx); + /* Also search secondary bucket to get better occupancy */ + ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data, + alt_hash, sig, new_idx, &ret_val); - if (ret >= 0) - return new_idx - 1; + if (ret == 0) + return new_idx - 1; + else if (ret == 1) { + enqueue_slot_back(h, cached_free_slots, slot_id); + return ret_val; } else { -#endif - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - /* Check if slot is available */ - if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { - prim_bkt->sig_current[i] = sig; - prim_bkt->sig_alt[i] = alt_hash; - prim_bkt->key_idx[i] = new_idx; - break; - } - } - - if (i != RTE_HASH_BUCKET_ENTRIES) { - if (h->add_key == ADD_KEY_MULTIWRITER) - rte_spinlock_unlock(h->multiwriter_lock); - return new_idx - 1; - } - - /* Primary bucket full, need to make space for new entry - * After recursive function. - * Insert the new entry in the position of the pushed entry - * if successful or return error and - * store the new slot back in the ring - */ - ret = make_space_bucket(h, prim_bkt, &nr_pushes); - if (ret >= 0) { - prim_bkt->sig_current[ret] = sig; - prim_bkt->sig_alt[ret] = alt_hash; - prim_bkt->key_idx[ret] = new_idx; - if (h->add_key == ADD_KEY_MULTIWRITER) - rte_spinlock_unlock(h->multiwriter_lock); - return new_idx - 1; - } -#if defined(RTE_ARCH_X86) + enqueue_slot_back(h, cached_free_slots, slot_id); + return ret; } -#endif - /* Error in addition, store new slot back in the ring and return error */ - enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx)); - -failure: - if (h->add_key == ADD_KEY_MULTIWRITER) - rte_spinlock_unlock(h->multiwriter_lock); - return ret; } int32_t @@ -688,20 +856,15 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data) else return ret; } + +/* Search one bucket to find the match key */ static inline int32_t -__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, - hash_sig_t sig, void **data) +search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig, + void **data, const struct rte_hash_bucket *bkt) { - uint32_t bucket_idx; - hash_sig_t alt_hash; - unsigned i; - struct rte_hash_bucket *bkt; + int i; struct rte_hash_key *k, *keys = h->key_store; - bucket_idx = sig & h->bucket_bitmask; - bkt = &h->buckets[bucket_idx]; - - /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { if (bkt->sig_current[i] == sig && bkt->key_idx[i] != EMPTY_SLOT) { @@ -718,30 +881,41 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, } } } + return -1; +} + +static inline int32_t +__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig, void **data) +{ + uint32_t bucket_idx; + hash_sig_t alt_hash; + struct rte_hash_bucket *bkt; + int ret; + + bucket_idx = sig & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + __hash_rw_reader_lock(h); + + /* Check if key is in primary location */ + ret = search_one_bucket(h, key, sig, data, bkt); + if (ret != -1) { + __hash_rw_reader_unlock(h); + return ret; + } /* Calculate secondary hash */ alt_hash = rte_hash_secondary_hash(sig); bucket_idx = alt_hash & h->bucket_bitmask; bkt = &h->buckets[bucket_idx]; /* Check if key is in secondary location */ - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->sig_current[i] == alt_hash && - bkt->sig_alt[i] == sig) { - k = (struct rte_hash_key *) ((char *)keys + - bkt->key_idx[i] * h->key_entry_size); - if (rte_hash_cmp_eq(key, k->key, h) == 0) { - if (data != NULL) - *data = k->pdata; - /* - * Return index where key is stored, - * subtracting the first dummy index - */ - return bkt->key_idx[i] - 1; - } - } + ret = search_one_bucket(h, key, alt_hash, data, bkt); + if (ret != -1) { + __hash_rw_reader_unlock(h); + return ret; } - + __hash_rw_reader_unlock(h); return -ENOENT; } @@ -783,7 +957,7 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) bkt->sig_current[i] = NULL_SIGNATURE; bkt->sig_alt[i] = NULL_SIGNATURE; - if (h->hw_trans_mem_support) { + if (h->multi_writer_support) { lcore_id = rte_lcore_id(); cached_free_slots = &h->local_free_slots[lcore_id]; /* Cache full, need to free it. */ @@ -804,20 +978,15 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) } } +/* Search one bucket and remove the matched key */ static inline int32_t -__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, - hash_sig_t sig) +search_and_remove(const struct rte_hash *h, const void *key, + struct rte_hash_bucket *bkt, hash_sig_t sig) { - uint32_t bucket_idx; - hash_sig_t alt_hash; - unsigned i; - struct rte_hash_bucket *bkt; struct rte_hash_key *k, *keys = h->key_store; + unsigned int i; int32_t ret; - bucket_idx = sig & h->bucket_bitmask; - bkt = &h->buckets[bucket_idx]; - /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { if (bkt->sig_current[i] == sig && @@ -837,32 +1006,42 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, } } } + return -1; +} + +static inline int32_t +__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig) +{ + uint32_t bucket_idx; + hash_sig_t alt_hash; + struct rte_hash_bucket *bkt; + int32_t ret; + + bucket_idx = sig & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + + __hash_rw_writer_lock(h); + /* look for key in primary bucket */ + ret = search_and_remove(h, key, bkt, sig); + if (ret != -1) { + __hash_rw_writer_unlock(h); + return ret; + } /* Calculate secondary hash */ alt_hash = rte_hash_secondary_hash(sig); bucket_idx = alt_hash & h->bucket_bitmask; bkt = &h->buckets[bucket_idx]; - /* Check if key is in secondary location */ - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->sig_current[i] == alt_hash && - bkt->key_idx[i] != EMPTY_SLOT) { - k = (struct rte_hash_key *) ((char *)keys + - bkt->key_idx[i] * h->key_entry_size); - if (rte_hash_cmp_eq(key, k->key, h) == 0) { - remove_entry(h, bkt, i); - - /* - * Return index where key is stored, - * subtracting the first dummy index - */ - ret = bkt->key_idx[i] - 1; - bkt->key_idx[i] = EMPTY_SLOT; - return ret; - } - } + /* look for key in secondary bucket */ + ret = search_and_remove(h, key, bkt, alt_hash); + if (ret != -1) { + __hash_rw_writer_unlock(h); + return ret; } + __hash_rw_writer_unlock(h); return -ENOENT; } @@ -1004,6 +1183,7 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, rte_prefetch0(secondary_bkt[i]); } + __hash_rw_reader_lock(h); /* Compare signatures and prefetch key slot of first hit */ for (i = 0; i < num_keys; i++) { compare_signatures(&prim_hitmask[i], &sec_hitmask[i], @@ -1086,6 +1266,8 @@ next_key: continue; } + __hash_rw_reader_unlock(h); + if (hit_mask != NULL) *hit_mask = hits; } @@ -1144,7 +1326,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES; idx = *next % RTE_HASH_BUCKET_ENTRIES; } - + __hash_rw_reader_lock(h); /* Get position of entry in key table */ position = h->buckets[bucket_idx].key_idx[idx]; next_key = (struct rte_hash_key *) ((char *)h->key_store + @@ -1153,6 +1335,8 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32 *key = next_key->key; *data = next_key->pdata; + __hash_rw_reader_unlock(h); + /* Increment iterator */ (*next)++; diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h index 7a54e555..b43f467d 100644 --- a/lib/librte_hash/rte_cuckoo_hash.h +++ b/lib/librte_hash/rte_cuckoo_hash.h @@ -88,15 +88,14 @@ const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { #endif -enum add_key_case { - ADD_KEY_SINGLEWRITER = 0, - ADD_KEY_MULTIWRITER, - ADD_KEY_MULTIWRITER_TM, -}; /** Number of items per bucket. */ #define RTE_HASH_BUCKET_ENTRIES 8 +#if !RTE_IS_POWER_OF_2(RTE_HASH_BUCKET_ENTRIES) +#error RTE_HASH_BUCKET_ENTRIES must be a power of 2 +#endif + #define NULL_SIGNATURE 0 #define EMPTY_SLOT 0 @@ -155,18 +154,20 @@ struct rte_hash { struct rte_ring *free_slots; /**< Ring that stores all indexes of the free slots in the key table */ - uint8_t hw_trans_mem_support; - /**< Hardware transactional memory support */ + struct lcore_cache *local_free_slots; /**< Local cache per lcore, storing some indexes of the free slots */ - enum add_key_case add_key; /**< Multi-writer hash add behavior */ - - rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */ /* Fields used in lookup */ uint32_t key_len __rte_cache_aligned; /**< Length of hash key. */ + uint8_t hw_trans_mem_support; + /**< If hardware transactional memory is used. */ + uint8_t multi_writer_support; + /**< If multi-writer support is enabled. */ + uint8_t readwrite_concur_support; + /**< If read-write concurrency support is enabled */ rte_hash_function hash_func; /**< Function used to calculate hash. */ uint32_t hash_func_init_val; /**< Init value used by hash_func. */ rte_hash_cmp_eq_t rte_hash_custom_cmp_eq; @@ -184,6 +185,7 @@ struct rte_hash { /**< Table with buckets storing all the hash values and key indexes * to the key table. */ + rte_rwlock_t *readwrite_lock; /**< Read-write lock thread-safety. */ } __rte_cache_aligned; struct queue_node { diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h deleted file mode 100644 index 2c5b017e..00000000 --- a/lib/librte_hash/rte_cuckoo_hash_x86.h +++ /dev/null @@ -1,164 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2016 Intel Corporation - */ - -/* rte_cuckoo_hash_x86.h - * This file holds all x86 specific Cuckoo Hash functions - */ - -/* Only tries to insert at one bucket (@prim_bkt) without trying to push - * buckets around - */ -static inline unsigned -rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt, - hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx) -{ - unsigned i, status; - unsigned try = 0; - - while (try < RTE_HASH_TSX_MAX_RETRY) { - status = rte_xbegin(); - if (likely(status == RTE_XBEGIN_STARTED)) { - /* Insert new entry if there is room in the primary - * bucket. - */ - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - /* Check if slot is available */ - if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { - prim_bkt->sig_current[i] = sig; - prim_bkt->sig_alt[i] = alt_hash; - prim_bkt->key_idx[i] = new_idx; - break; - } - } - rte_xend(); - - if (i != RTE_HASH_BUCKET_ENTRIES) - return 0; - - break; /* break off try loop if transaction commits */ - } else { - /* If we abort we give up this cuckoo path. */ - try++; - rte_pause(); - } - } - - return -1; -} - -/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill - * the path head with new entry (sig, alt_hash, new_idx) - */ -static inline int -rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, - struct queue_node *leaf, uint32_t leaf_slot, - hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx) -{ - unsigned try = 0; - unsigned status; - uint32_t prev_alt_bkt_idx; - - struct queue_node *prev_node, *curr_node = leaf; - struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt; - uint32_t prev_slot, curr_slot = leaf_slot; - - while (try < RTE_HASH_TSX_MAX_RETRY) { - status = rte_xbegin(); - if (likely(status == RTE_XBEGIN_STARTED)) { - while (likely(curr_node->prev != NULL)) { - prev_node = curr_node->prev; - prev_bkt = prev_node->bkt; - prev_slot = curr_node->prev_slot; - - prev_alt_bkt_idx - = prev_bkt->sig_alt[prev_slot] - & h->bucket_bitmask; - - if (unlikely(&h->buckets[prev_alt_bkt_idx] - != curr_bkt)) { - rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED); - } - - /* Need to swap current/alt sig to allow later - * Cuckoo insert to move elements back to its - * primary bucket if available - */ - curr_bkt->sig_alt[curr_slot] = - prev_bkt->sig_current[prev_slot]; - curr_bkt->sig_current[curr_slot] = - prev_bkt->sig_alt[prev_slot]; - curr_bkt->key_idx[curr_slot] - = prev_bkt->key_idx[prev_slot]; - - curr_slot = prev_slot; - curr_node = prev_node; - curr_bkt = curr_node->bkt; - } - - curr_bkt->sig_current[curr_slot] = sig; - curr_bkt->sig_alt[curr_slot] = alt_hash; - curr_bkt->key_idx[curr_slot] = new_idx; - - rte_xend(); - - return 0; - } - - /* If we abort we give up this cuckoo path, since most likely it's - * no longer valid as TSX detected data conflict - */ - try++; - rte_pause(); - } - - return -1; -} - -/* - * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe - * Cuckoo - */ -static inline int -rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h, - struct rte_hash_bucket *bkt, - hash_sig_t sig, hash_sig_t alt_hash, - uint32_t new_idx) -{ - unsigned i; - struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN]; - struct queue_node *tail, *head; - struct rte_hash_bucket *curr_bkt, *alt_bkt; - - tail = queue; - head = queue + 1; - tail->bkt = bkt; - tail->prev = NULL; - tail->prev_slot = -1; - - /* Cuckoo bfs Search */ - while (likely(tail != head && head < - queue + RTE_HASH_BFS_QUEUE_MAX_LEN - - RTE_HASH_BUCKET_ENTRIES)) { - curr_bkt = tail->bkt; - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (curr_bkt->key_idx[i] == EMPTY_SLOT) { - if (likely(rte_hash_cuckoo_move_insert_mw_tm(h, - tail, i, sig, - alt_hash, new_idx) == 0)) - return 0; - } - - /* Enqueue new node and keep prev node info */ - alt_bkt = &(h->buckets[curr_bkt->sig_alt[i] - & h->bucket_bitmask]); - head->bkt = alt_bkt; - head->prev = tail; - head->prev_slot = i; - head++; - } - tail++; - } - - return -ENOSPC; -} diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h index 3beaca71..9e7d9315 100644 --- a/lib/librte_hash/rte_hash.h +++ b/lib/librte_hash/rte_hash.h @@ -34,6 +34,9 @@ extern "C" { /** Default behavior of insertion, single writer/multi writer */ #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02 +/** Flag to support reader writer concurrency */ +#define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04 + /** Signature of key that is stored internally. */ typedef uint32_t hash_sig_t; @@ -123,10 +126,23 @@ rte_hash_free(struct rte_hash *h); void rte_hash_reset(struct rte_hash *h); +/** + * Return the number of keys in the hash table + * @param h + * Hash table to query from + * @return + * - -EINVAL if parameters are invalid + * - A value indicating how many keys were inserted in the table. + */ +int32_t +rte_hash_count(const struct rte_hash *h); + /** * Add a key-value pair to an existing hash table. * This operation is not multi-thread safe - * and should only be called from one thread. + * and should only be called from one thread by default. + * Thread safety can be enabled by setting flag during + * table creation. * * @param h * Hash table to add the key to. @@ -146,7 +162,9 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data); * Add a key-value pair with a pre-computed hash value * to an existing hash table. * This operation is not multi-thread safe - * and should only be called from one thread. + * and should only be called from one thread by default. + * Thread safety can be enabled by setting flag during + * table creation. * * @param h * Hash table to add the key to. @@ -167,7 +185,9 @@ rte_hash_add_key_with_hash_data(const struct rte_hash *h, const void *key, /** * Add a key to an existing hash table. This operation is not multi-thread safe - * and should only be called from one thread. + * and should only be called from one thread by default. + * Thread safety can be enabled by setting flag during + * table creation. * * @param h * Hash table to add the key to. @@ -185,7 +205,9 @@ rte_hash_add_key(const struct rte_hash *h, const void *key); /** * Add a key to an existing hash table. * This operation is not multi-thread safe - * and should only be called from one thread. + * and should only be called from one thread by default. + * Thread safety can be enabled by setting flag during + * table creation. * * @param h * Hash table to add the key to. @@ -205,7 +227,9 @@ rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t /** * Remove a key from an existing hash table. * This operation is not multi-thread safe - * and should only be called from one thread. + * and should only be called from one thread by default. + * Thread safety can be enabled by setting flag during + * table creation. * * @param h * Hash table to remove the key from. @@ -224,7 +248,9 @@ rte_hash_del_key(const struct rte_hash *h, const void *key); /** * Remove a key from an existing hash table. * This operation is not multi-thread safe - * and should only be called from one thread. + * and should only be called from one thread by default. + * Thread safety can be enabled by setting flag during + * table creation. * * @param h * Hash table to remove the key from. @@ -244,7 +270,9 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t /** * Find a key in the hash table given the position. - * This operation is multi-thread safe. + * This operation is multi-thread safe with regarding to other lookup threads. + * Read-write concurrency can be enabled by setting flag during + * table creation. * * @param h * Hash table to get the key from. @@ -254,8 +282,8 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t * Output containing a pointer to the key * @return * - 0 if retrieved successfully - * - EINVAL if the parameters are invalid. - * - ENOENT if no valid key is found in the given position. + * - -EINVAL if the parameters are invalid. + * - -ENOENT if no valid key is found in the given position. */ int rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position, @@ -263,7 +291,9 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position, /** * Find a key-value pair in the hash table. - * This operation is multi-thread safe. + * This operation is multi-thread safe with regarding to other lookup threads. + * Read-write concurrency can be enabled by setting flag during + * table creation. * * @param h * Hash table to look in. @@ -272,9 +302,11 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position, * @param data * Output with pointer to data returned from the hash table. * @return - * 0 if successful lookup - * - EINVAL if the parameters are invalid. - * - ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. */ int rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data); @@ -282,7 +314,9 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data); /** * Find a key-value pair with a pre-computed hash value * to an existing hash table. - * This operation is multi-thread safe. + * This operation is multi-thread safe with regarding to other lookup threads. + * Read-write concurrency can be enabled by setting flag during + * table creation. * * @param h * Hash table to look in. @@ -293,9 +327,11 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data); * @param data * Output with pointer to data returned from the hash table. * @return - * 0 if successful lookup - * - EINVAL if the parameters are invalid. - * - ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. */ int rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key, @@ -303,7 +339,9 @@ rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key, /** * Find a key in the hash table. - * This operation is multi-thread safe. + * This operation is multi-thread safe with regarding to other lookup threads. + * Read-write concurrency can be enabled by setting flag during + * table creation. * * @param h * Hash table to look in. @@ -321,14 +359,16 @@ rte_hash_lookup(const struct rte_hash *h, const void *key); /** * Find a key in the hash table. - * This operation is multi-thread safe. + * This operation is multi-thread safe with regarding to other lookup threads. + * Read-write concurrency can be enabled by setting flag during + * table creation. * * @param h * Hash table to look in. * @param key * Key to find. * @param sig - * Hash value to remove from the hash table. + * Precomputed hash value for 'key'. * @return * - -EINVAL if the parameters are invalid. * - -ENOENT if the key is not found. @@ -356,7 +396,9 @@ rte_hash_hash(const struct rte_hash *h, const void *key); /** * Find multiple keys in the hash table. - * This operation is multi-thread safe. + * This operation is multi-thread safe with regarding to other lookup threads. + * Read-write concurrency can be enabled by setting flag during + * table creation. * * @param h * Hash table to look in. @@ -377,7 +419,9 @@ rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys, /** * Find multiple keys in the hash table. - * This operation is multi-thread safe. + * This operation is multi-thread safe with regarding to other lookup threads. + * Read-write concurrency can be enabled by setting flag during + * table creation. * * @param h * Hash table to look in. diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index 479f84b1..cf28031b 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -338,14 +338,13 @@ crc32c_1word(uint32_t data, uint32_t init_val) static inline uint32_t crc32c_2words(uint64_t data, uint32_t init_val) { + uint32_t crc, term1, term2; union { uint64_t u64; uint32_t u32[2]; } d; d.u64 = data; - uint32_t crc, term1, term2; - crc = init_val; crc ^= d.u32[0]; @@ -399,9 +398,9 @@ crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val) } d; d.u64 = data; - init_val = crc32c_sse42_u32(d.u32[0], init_val); - init_val = crc32c_sse42_u32(d.u32[1], init_val); - return init_val; + init_val = crc32c_sse42_u32(d.u32[0], (uint32_t)init_val); + init_val = crc32c_sse42_u32(d.u32[1], (uint32_t)init_val); + return (uint32_t)init_val; } #endif @@ -413,7 +412,7 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val) "crc32q %[data], %[init_val];" : [init_val] "+r" (init_val) : [data] "rm" (data)); - return init_val; + return (uint32_t)init_val; } #endif diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map index 52a2576f..e216ac8e 100644 --- a/lib/librte_hash/rte_hash_version.map +++ b/lib/librte_hash/rte_hash_version.map @@ -45,3 +45,11 @@ DPDK_16.07 { rte_hash_get_key_with_position; } DPDK_2.2; + + +DPDK_18.08 { + global: + + rte_hash_count; + +} DPDK_16.07; diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c index 7397aa69..2560c771 100644 --- a/lib/librte_ip_frag/ip_frag_internal.c +++ b/lib/librte_ip_frag/ip_frag_internal.c @@ -152,7 +152,7 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr, fp->frags[IP_LAST_FRAG_IDX].len); else IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" - "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " "total_size: %u, frag_size: %u, last_idx: %u\n" "first fragment: ofs: %u, len: %u\n" "last fragment: ofs: %u, len: %u\n\n", @@ -210,7 +210,7 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr, fp->frags[IP_LAST_FRAG_IDX].len); else IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" - "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " "total_size: %u, frag_size: %u, last_idx: %u\n" "first fragment: ofs: %u, len: %u\n" "last fragment: ofs: %u, len: %u\n\n", @@ -331,7 +331,7 @@ ip_frag_lookup(struct rte_ip_frag_tbl *tbl, if (p1->key.key_len == IPV4_KEYLEN) IP_FRAG_LOG(DEBUG, "%s:%d:\n" "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv6_frag_pkt line0: %p, index: %u from %u\n" + "ipv4_frag_pkt line0: %p, index: %u from %u\n" "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", __func__, __LINE__, tbl, tbl->max_entries, tbl->use_entries, @@ -357,7 +357,7 @@ ip_frag_lookup(struct rte_ip_frag_tbl *tbl, if (p2->key.key_len == IPV4_KEYLEN) IP_FRAG_LOG(DEBUG, "%s:%d:\n" "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv6_frag_pkt line1: %p, index: %u from %u\n" + "ipv4_frag_pkt line1: %p, index: %u from %u\n" "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", __func__, __LINE__, tbl, tbl->max_entries, tbl->use_entries, diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c index 82e831ca..4956b99e 100644 --- a/lib/librte_ip_frag/rte_ipv4_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -59,7 +59,9 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) /* chain with the first fragment. */ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + fp->frags[curr_idx].mb = NULL; m = fp->frags[IP_FIRST_FRAG_IDX].mb; + fp->frags[IP_FIRST_FRAG_IDX].mb = NULL; /* update mbuf fields for reassembled packet. */ m->ol_flags |= PKT_TX_IP_CKSUM; diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c index 3479fabb..db249fe6 100644 --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c @@ -82,7 +82,9 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) /* chain with the first fragment. */ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + fp->frags[curr_idx].mb = NULL; m = fp->frags[IP_FIRST_FRAG_IDX].mb; + fp->frags[IP_FIRST_FRAG_IDX].mb = NULL; /* update mbuf fields for reassembled packet. */ m->ol_flags |= PKT_TX_IP_CKSUM; diff --git a/lib/librte_kni/meson.build b/lib/librte_kni/meson.build index c4b21961..a738a033 100644 --- a/lib/librte_kni/meson.build +++ b/lib/librte_kni/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -if host_machine.system() != 'linux' +if host_machine.system() != 'linux' or cc.sizeof('void *') == 4 build = false endif version = 2 diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index 28674115..65f6a2b0 100644 --- a/lib/librte_kni/rte_kni.c +++ b/lib/librte_kni/rte_kni.c @@ -510,7 +510,7 @@ kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[]) { int ret = 0; - if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) { + if (!rte_eth_dev_is_valid_port(port_id)) { RTE_LOG(ERR, KNI, "Invalid port id %d\n", port_id); return -EINVAL; } @@ -530,7 +530,7 @@ kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[]) static int kni_config_promiscusity(uint16_t port_id, uint8_t to_on) { - if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) { + if (!rte_eth_dev_is_valid_port(port_id)) { RTE_LOG(ERR, KNI, "Invalid port id %d\n", port_id); return -EINVAL; } @@ -715,6 +715,9 @@ rte_kni_get(const char *name) struct rte_kni_memzone_slot *it; struct rte_kni *kni; + if (name == NULL || name[0] == '\0') + return NULL; + /* Note: could be improved perf-wise if necessary */ for (i = 0; i < kni_memzone_pool.max_ifaces; i++) { it = &kni_memzone_pool.slots[i]; diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile index 4eaa9334..87593954 100644 --- a/lib/librte_kvargs/Makefile +++ b/lib/librte_kvargs/Makefile @@ -1,35 +1,5 @@ -# BSD LICENSE -# +# SPDX-License-Identifier: BSD-3-Clause # Copyright 2014 6WIND S.A. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of 6WIND S.A. nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -# OF THE POSSIBILITY OF SUCH DAMAGE. include $(RTE_SDK)/mk/rte.vars.mk @@ -37,7 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_kvargs.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -LDLIBS += -lrte_eal +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include EXPORT_MAP := rte_kvargs_version.map diff --git a/lib/librte_kvargs/meson.build b/lib/librte_kvargs/meson.build index 0c5b9cb2..acd3e543 100644 --- a/lib/librte_kvargs/meson.build +++ b/lib/librte_kvargs/meson.build @@ -1,6 +1,11 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation +includes = [global_inc] +includes += include_directories('../librte_eal/common/include') + version = 1 sources = files('rte_kvargs.c') headers = files('rte_kvargs.h') + +deps += 'compat' diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c index 9662375e..a28f7694 100644 --- a/lib/librte_kvargs/rte_kvargs.c +++ b/lib/librte_kvargs/rte_kvargs.c @@ -6,14 +6,13 @@ #include #include -#include #include #include "rte_kvargs.h" /* * Receive a string with a list of arguments following the pattern - * key=value;key=value;... and insert them into the list. + * key=value,key=value,... and insert them into the list. * strtok() is used so the params string will be copied to be modified. */ static int @@ -28,29 +27,22 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params) * to pass to rte_strsplit */ kvlist->str = strdup(params); - if (kvlist->str == NULL) { - RTE_LOG(ERR, PMD, "Cannot parse arguments: not enough memory\n"); + if (kvlist->str == NULL) return -1; - } /* browse each key/value pair and add it in kvlist */ str = kvlist->str; while ((str = strtok_r(str, RTE_KVARGS_PAIRS_DELIM, &ctx1)) != NULL) { i = kvlist->count; - if (i >= RTE_KVARGS_MAX) { - RTE_LOG(ERR, PMD, "Cannot parse arguments: list full\n"); + if (i >= RTE_KVARGS_MAX) return -1; - } kvlist->pairs[i].key = strtok_r(str, RTE_KVARGS_KV_DELIM, &ctx2); kvlist->pairs[i].value = strtok_r(NULL, RTE_KVARGS_KV_DELIM, &ctx2); - if (kvlist->pairs[i].key == NULL || kvlist->pairs[i].value == NULL) { - RTE_LOG(ERR, PMD, - "Cannot parse arguments: wrong key or value\n" - "params=<%s>\n", params); + if (kvlist->pairs[i].key == NULL || + kvlist->pairs[i].value == NULL) return -1; - } kvlist->count++; str = NULL; @@ -89,12 +81,8 @@ check_for_valid_keys(struct rte_kvargs *kvlist, for (i = 0; i < kvlist->count; i++) { pair = &kvlist->pairs[i]; ret = is_valid_key(valid, pair->key); - if (!ret) { - RTE_LOG(ERR, PMD, - "Error parsing device, invalid key <%s>\n", - pair->key); + if (!ret) return -1; - } } return 0; } @@ -154,7 +142,7 @@ rte_kvargs_free(struct rte_kvargs *kvlist) } /* - * Parse the arguments "key=value;key=value;..." string and return + * Parse the arguments "key=value,key=value,..." string and return * an allocated structure that contains a key/value list. Also * check if only valid keys were used. */ @@ -180,3 +168,38 @@ rte_kvargs_parse(const char *args, const char * const valid_keys[]) return kvlist; } + +__rte_experimental +struct rte_kvargs * +rte_kvargs_parse_delim(const char *args, const char * const valid_keys[], + const char *valid_ends) +{ + struct rte_kvargs *kvlist = NULL; + char *copy; + size_t len; + + if (valid_ends == NULL) + return rte_kvargs_parse(args, valid_keys); + + copy = strdup(args); + if (copy == NULL) + return NULL; + + len = strcspn(copy, valid_ends); + copy[len] = '\0'; + + kvlist = rte_kvargs_parse(copy, valid_keys); + + free(copy); + return kvlist; +} + +__rte_experimental +int +rte_kvargs_strcmp(const char *key __rte_unused, + const char *value, void *opaque) +{ + const char *str = opaque; + + return -abs(strcmp(str, value)); +} diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h index 51b8120b..fc041956 100644 --- a/lib/librte_kvargs/rte_kvargs.h +++ b/lib/librte_kvargs/rte_kvargs.h @@ -25,6 +25,8 @@ extern "C" { #endif +#include + /** Maximum number of key/value associations */ #define RTE_KVARGS_MAX 32 @@ -71,6 +73,36 @@ struct rte_kvargs { struct rte_kvargs *rte_kvargs_parse(const char *args, const char *const valid_keys[]); +/** + * Allocate a rte_kvargs and store key/value associations from a string. + * This version will consider any byte from valid_ends as a possible + * terminating character, and will not parse beyond any of their occurrence. + * + * The function allocates and fills an rte_kvargs structure from a given + * string whose format is key1=value1,key2=value2,... + * + * The structure can be freed with rte_kvargs_free(). + * + * @param args + * The input string containing the key/value associations + * + * @param valid_keys + * A list of valid keys (table of const char *, the last must be NULL). + * This argument is ignored if NULL + * + * @param valid_ends + * Acceptable terminating characters. + * If NULL, the behavior is the same as ``rte_kvargs_parse``. + * + * @return + * - A pointer to an allocated rte_kvargs structure on success + * - NULL on error + */ +__rte_experimental +struct rte_kvargs *rte_kvargs_parse_delim(const char *args, + const char *const valid_keys[], + const char *valid_ends); + /** * Free a rte_kvargs structure * @@ -121,6 +153,32 @@ int rte_kvargs_process(const struct rte_kvargs *kvlist, unsigned rte_kvargs_count(const struct rte_kvargs *kvlist, const char *key_match); +/** + * Generic kvarg handler for string comparison. + * + * This function can be used for a generic string comparison processing + * on a list of kvargs. + * + * @param key + * kvarg pair key. + * + * @param value + * kvarg pair value. + * + * @param opaque + * Opaque pointer to a string. + * + * @return + * 0 if the strings match. + * !0 otherwise or on error. + * + * Unless strcmp, comparison ordering is not kept. + * In order for rte_kvargs_process to stop processing on match error, + * a negative value is returned even if strcmp had returned a positive one. + */ +__rte_experimental +int rte_kvargs_strcmp(const char *key, const char *value, void *opaque); + #ifdef __cplusplus } #endif diff --git a/lib/librte_kvargs/rte_kvargs_version.map b/lib/librte_kvargs/rte_kvargs_version.map index 2030ec46..8f4b4e3f 100644 --- a/lib/librte_kvargs/rte_kvargs_version.map +++ b/lib/librte_kvargs/rte_kvargs_version.map @@ -8,3 +8,11 @@ DPDK_2.0 { local: *; }; + +EXPERIMENTAL { + global: + + rte_kvargs_parse_delim; + rte_kvargs_strcmp; + +} DPDK_2.0; diff --git a/lib/librte_latencystats/rte_latencystats.c b/lib/librte_latencystats/rte_latencystats.c index 66330203..1fdec68e 100644 --- a/lib/librte_latencystats/rte_latencystats.c +++ b/lib/librte_latencystats/rte_latencystats.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2017 Intel Corporation + * Copyright(c) 2018 Intel Corporation */ #include @@ -46,7 +46,7 @@ struct rte_latency_stats { static struct rte_latency_stats *glob_stats; struct rxtx_cbs { - struct rte_eth_rxtx_callback *cb; + const struct rte_eth_rxtx_callback *cb; }; static struct rxtx_cbs rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; @@ -201,7 +201,6 @@ rte_latencystats_init(uint64_t app_samp_intvl, uint16_t pid; uint16_t qid; struct rxtx_cbs *cbs = NULL; - const uint16_t nb_ports = rte_eth_dev_count(); const char *ptr_strings[NUM_LATENCY_STATS] = {0}; const struct rte_memzone *mz = NULL; const unsigned int flags = 0; @@ -234,7 +233,7 @@ rte_latencystats_init(uint64_t app_samp_intvl, } /** Register Rx/Tx callbacks */ - for (pid = 0; pid < nb_ports; pid++) { + RTE_ETH_FOREACH_DEV(pid) { struct rte_eth_dev_info dev_info; rte_eth_dev_info_get(pid, &dev_info); for (qid = 0; qid < dev_info.nb_rx_queues; qid++) { @@ -266,10 +265,10 @@ rte_latencystats_uninit(void) uint16_t qid; int ret = 0; struct rxtx_cbs *cbs = NULL; - const uint16_t nb_ports = rte_eth_dev_count(); + const struct rte_memzone *mz = NULL; /** De register Rx/Tx callbacks */ - for (pid = 0; pid < nb_ports; pid++) { + RTE_ETH_FOREACH_DEV(pid) { struct rte_eth_dev_info dev_info; rte_eth_dev_info_get(pid, &dev_info); for (qid = 0; qid < dev_info.nb_rx_queues; qid++) { @@ -290,6 +289,11 @@ rte_latencystats_uninit(void) } } + /* free up the memzone */ + mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS); + if (mz) + rte_memzone_free(mz); + return 0; } diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile index 367568ae..e2b98a25 100644 --- a/lib/librte_mbuf/Makefile +++ b/lib/librte_mbuf/Makefile @@ -6,13 +6,12 @@ include $(RTE_SDK)/mk/rte.vars.mk # library name LIB = librte_mbuf.a -CFLAGS += -DALLOW_EXPERIMENTAL_API CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 LDLIBS += -lrte_eal -lrte_mempool EXPORT_MAP := rte_mbuf_version.map -LIBABIVER := 3 +LIBABIVER := 4 # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build index 869c17c1..45ffb0db 100644 --- a/lib/librte_mbuf/meson.build +++ b/lib/librte_mbuf/meson.build @@ -2,7 +2,6 @@ # Copyright(c) 2017 Intel Corporation version = 3 -allow_experimental_apis = true sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c') headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h') deps += ['mempool'] diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c index 091d388d..e714c5a5 100644 --- a/lib/librte_mbuf/rte_mbuf.c +++ b/lib/librte_mbuf/rte_mbuf.c @@ -32,21 +32,6 @@ #include #include -/* - * ctrlmbuf constructor, given as a callback function to - * rte_mempool_obj_iter() or rte_mempool_create() - */ -void -rte_ctrlmbuf_init(struct rte_mempool *mp, - __attribute__((unused)) void *opaque_arg, - void *_m, - __attribute__((unused)) unsigned i) -{ - struct rte_mbuf *m = _m; - rte_pktmbuf_init(mp, opaque_arg, _m, i); - m->ol_flags |= CTRL_MBUF_FLAG; -} - /* * pktmbuf pool constructor, given as a callback function to * rte_mempool_create(), or called directly if using @@ -122,7 +107,7 @@ rte_pktmbuf_init(struct rte_mempool *mp, } /* Helper to create a mbuf pool with given mempool ops name*/ -struct rte_mempool * __rte_experimental +struct rte_mempool * rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n, unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size, int socket_id, const char *ops_name) @@ -405,6 +390,9 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask) case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP"; case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE"; case PKT_TX_TUNNEL_MPLSINUDP: return "PKT_TX_TUNNEL_MPLSINUDP"; + case PKT_TX_TUNNEL_VXLAN_GPE: return "PKT_TX_TUNNEL_VXLAN_GPE"; + case PKT_TX_TUNNEL_IP: return "PKT_TX_TUNNEL_IP"; + case PKT_TX_TUNNEL_UDP: return "PKT_TX_TUNNEL_UDP"; case PKT_TX_MACSEC: return "PKT_TX_MACSEC"; case PKT_TX_SEC_OFFLOAD: return "PKT_TX_SEC_OFFLOAD"; default: return NULL; @@ -439,6 +427,12 @@ rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) "PKT_TX_TUNNEL_NONE" }, { PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK, "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_VXLAN_GPE, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_IP, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_UDP, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, { PKT_TX_MACSEC, PKT_TX_MACSEC, NULL }, { PKT_TX_SEC_OFFLOAD, PKT_TX_SEC_OFFLOAD, NULL }, }; diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index 62740254..9ce5d76d 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -201,15 +201,44 @@ extern "C" { /** * Bits 45:48 used for the tunnel type. - * When doing Tx offload like TSO or checksum, the HW needs to configure the - * tunnel type into the HW descriptors. + * The tunnel type must be specified for TSO or checksum on the inner part + * of tunnel packets. + * These flags can be used with PKT_TX_TCP_SEG for TSO, or PKT_TX_xxx_CKSUM. + * The mbuf fields for inner and outer header lengths are required: + * outer_l2_len, outer_l3_len, l2_len, l3_len, l4_len and tso_segsz for TSO. */ #define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45) #define PKT_TX_TUNNEL_GRE (0x2ULL << 45) #define PKT_TX_TUNNEL_IPIP (0x3ULL << 45) #define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45) -/**< TX packet with MPLS-in-UDP RFC 7510 header. */ +/** TX packet with MPLS-in-UDP RFC 7510 header. */ #define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45) +#define PKT_TX_TUNNEL_VXLAN_GPE (0x6ULL << 45) +/** + * Generic IP encapsulated tunnel type, used for TSO and checksum offload. + * It can be used for tunnels which are not standards or listed above. + * It is preferred to use specific tunnel flags like PKT_TX_TUNNEL_GRE + * or PKT_TX_TUNNEL_IPIP if possible. + * The ethdev must be configured with DEV_TX_OFFLOAD_IP_TNL_TSO. + * Outer and inner checksums are done according to the existing flags like + * PKT_TX_xxx_CKSUM. + * Specific tunnel headers that contain payload length, sequence id + * or checksum are not expected to be updated. + */ +#define PKT_TX_TUNNEL_IP (0xDULL << 45) +/** + * Generic UDP encapsulated tunnel type, used for TSO and checksum offload. + * UDP tunnel type implies outer IP layer. + * It can be used for tunnels which are not standards or listed above. + * It is preferred to use specific tunnel flags like PKT_TX_TUNNEL_VXLAN + * if possible. + * The ethdev must be configured with DEV_TX_OFFLOAD_UDP_TNL_TSO. + * Outer and inner checksums are done according to the existing flags like + * PKT_TX_xxx_CKSUM. + * Specific tunnel headers that contain payload length, sequence id + * or checksum are not expected to be updated. + */ +#define PKT_TX_TUNNEL_UDP (0xEULL << 45) /* add new TX TUNNEL type here */ #define PKT_TX_TUNNEL_MASK (0xFULL << 45) @@ -226,12 +255,8 @@ extern "C" { * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies * PKT_TX_TCP_CKSUM) * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 - * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum - * to 0 in the packet + * - if it's IPv4, set the PKT_TX_IP_CKSUM flag * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz - * - calculate the pseudo header checksum without taking ip_len in account, - * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and - * rte_ipv6_phdr_cksum() that can be used as helpers. */ #define PKT_TX_TCP_SEG (1ULL << 50) @@ -244,9 +269,6 @@ extern "C" { * - fill l2_len and l3_len in mbuf * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 - * - calculate the pseudo header checksum and set it in the L4 header (only - * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum(). - * For SCTP, set the crc field to 0. */ #define PKT_TX_L4_NO_CKSUM (0ULL << 52) /**< Disable L4 cksum of TX pkt. */ #define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */ @@ -258,7 +280,6 @@ extern "C" { * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should * also be set by the application, although a PMD will only check * PKT_TX_IP_CKSUM. - * - set the IP checksum field in the packet to 0 * - fill the mbuf offload information: l2_len, l3_len */ #define PKT_TX_IP_CKSUM (1ULL << 54) @@ -288,10 +309,8 @@ extern "C" { /** * Offload the IP checksum of an external header in the hardware. The - * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh - * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the - * packet must be set to 0. - * - set the outer IP checksum field in the packet to 0 + * flag PKT_TX_OUTER_IPV4 should also be set by the application, although + * a PMD will only check PKT_TX_OUTER_IP_CKSUM. * - fill the mbuf offload information: outer_l2_len, outer_l3_len */ #define PKT_TX_OUTER_IP_CKSUM (1ULL << 58) @@ -326,13 +345,13 @@ extern "C" { PKT_TX_MACSEC | \ PKT_TX_SEC_OFFLOAD) -#define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */ +/** + * Mbuf having an external buffer attached. shinfo in mbuf must be filled. + */ +#define EXT_ATTACHED_MBUF (1ULL << 61) #define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */ -/* Use final bit of flags to indicate a control mbuf */ -#define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ - /** Alignment constraint of mbuf private area. */ #define RTE_MBUF_PRIV_ALIGN 8 @@ -569,8 +588,27 @@ struct rte_mbuf { /** Sequence number. See also rte_reorder_insert(). */ uint32_t seqn; + /** Shared data for external buffer attached to mbuf. See + * rte_pktmbuf_attach_extbuf(). + */ + struct rte_mbuf_ext_shared_info *shinfo; + } __rte_cache_aligned; +/** + * Function typedef of callback to free externally attached buffer. + */ +typedef void (*rte_mbuf_extbuf_free_callback_t)(void *addr, void *opaque); + +/** + * Shared data at the end of an external buffer. + */ +struct rte_mbuf_ext_shared_info { + rte_mbuf_extbuf_free_callback_t free_cb; /**< Free callback function */ + void *fcb_opaque; /**< Free callback argument */ + rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */ +}; + /**< Maximum number of nb_segs allowed. */ #define RTE_MBUF_MAX_NB_SEGS UINT16_MAX @@ -691,14 +729,53 @@ rte_mbuf_to_baddr(struct rte_mbuf *md) } /** - * Returns TRUE if given mbuf is indirect, or FALSE otherwise. + * Return the starting address of the private data area embedded in + * the given mbuf. + * + * Note that no check is made to ensure that a private data area + * actually exists in the supplied mbuf. + * + * @param m + * The pointer to the mbuf. + * @return + * The starting address of the private data area of the given mbuf. + */ +static inline void * __rte_experimental +rte_mbuf_to_priv(struct rte_mbuf *m) +{ + return RTE_PTR_ADD(m, sizeof(struct rte_mbuf)); +} + +/** + * Returns TRUE if given mbuf is cloned by mbuf indirection, or FALSE + * otherwise. + * + * If a mbuf has its data in another mbuf and references it by mbuf + * indirection, this mbuf can be defined as a cloned mbuf. + */ +#define RTE_MBUF_CLONED(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF) + +/** + * Deprecated. + * Use RTE_MBUF_CLONED(). + */ +#define RTE_MBUF_INDIRECT(mb) RTE_MBUF_CLONED(mb) + +/** + * Returns TRUE if given mbuf has an external buffer, or FALSE otherwise. + * + * External buffer is a user-provided anonymous buffer. */ -#define RTE_MBUF_INDIRECT(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF) +#define RTE_MBUF_HAS_EXTBUF(mb) ((mb)->ol_flags & EXT_ATTACHED_MBUF) /** * Returns TRUE if given mbuf is direct, or FALSE otherwise. + * + * If a mbuf embeds its own data after the rte_mbuf structure, this mbuf + * can be defined as a direct mbuf. */ -#define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_INDIRECT(mb)) +#define RTE_MBUF_DIRECT(mb) \ + (!((mb)->ol_flags & (IND_ATTACHED_MBUF | EXT_ATTACHED_MBUF))) /** * Private data in case of pktmbuf pool. @@ -748,7 +825,7 @@ rte_mbuf_refcnt_read(const struct rte_mbuf *m) static inline void rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) { - rte_atomic16_set(&m->refcnt_atomic, new_value); + rte_atomic16_set(&m->refcnt_atomic, (int16_t)new_value); } /* internal */ @@ -778,8 +855,9 @@ rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) * reference counter can occur. */ if (likely(rte_mbuf_refcnt_read(m) == 1)) { - rte_mbuf_refcnt_set(m, 1 + value); - return 1 + value; + ++value; + rte_mbuf_refcnt_set(m, (uint16_t)value); + return (uint16_t)value; } return __rte_mbuf_refcnt_update(m, value); @@ -824,6 +902,59 @@ rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) #endif /* RTE_MBUF_REFCNT_ATOMIC */ +/** + * Reads the refcnt of an external buffer. + * + * @param shinfo + * Shared data of the external buffer. + * @return + * Reference count number. + */ +static inline uint16_t +rte_mbuf_ext_refcnt_read(const struct rte_mbuf_ext_shared_info *shinfo) +{ + return (uint16_t)(rte_atomic16_read(&shinfo->refcnt_atomic)); +} + +/** + * Set refcnt of an external buffer. + * + * @param shinfo + * Shared data of the external buffer. + * @param new_value + * Value set + */ +static inline void +rte_mbuf_ext_refcnt_set(struct rte_mbuf_ext_shared_info *shinfo, + uint16_t new_value) +{ + rte_atomic16_set(&shinfo->refcnt_atomic, (int16_t)new_value); +} + +/** + * Add given value to refcnt of an external buffer and return its new + * value. + * + * @param shinfo + * Shared data of the external buffer. + * @param value + * Value to add/subtract + * @return + * Updated value + */ +static inline uint16_t +rte_mbuf_ext_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo, + int16_t value) +{ + if (likely(rte_mbuf_ext_refcnt_read(shinfo) == 1)) { + ++value; + rte_mbuf_ext_refcnt_set(shinfo, (uint16_t)value); + return (uint16_t)value; + } + + return (uint16_t)rte_atomic16_add_return(&shinfo->refcnt_atomic, value); +} + /** Mbuf prefetch */ #define RTE_MBUF_PREFETCH_TO_FREE(m) do { \ if ((m) != NULL) \ @@ -915,89 +1046,6 @@ __rte_mbuf_raw_free(struct rte_mbuf *m) rte_mbuf_raw_free(m); } -/* Operations on ctrl mbuf */ - -/** - * The control mbuf constructor. - * - * This function initializes some fields in an mbuf structure that are - * not modified by the user once created (mbuf type, origin pool, buffer - * start address, and so on). This function is given as a callback function - * to rte_mempool_obj_iter() or rte_mempool_create() at pool creation time. - * - * @param mp - * The mempool from which the mbuf is allocated. - * @param opaque_arg - * A pointer that can be used by the user to retrieve useful information - * for mbuf initialization. This pointer is the opaque argument passed to - * rte_mempool_obj_iter() or rte_mempool_create(). - * @param m - * The mbuf to initialize. - * @param i - * The index of the mbuf in the pool table. - */ -void rte_ctrlmbuf_init(struct rte_mempool *mp, void *opaque_arg, - void *m, unsigned i); - -/** - * Allocate a new mbuf (type is ctrl) from mempool *mp*. - * - * This new mbuf is initialized with data pointing to the beginning of - * buffer, and with a length of zero. - * - * @param mp - * The mempool from which the mbuf is allocated. - * @return - * - The pointer to the new mbuf on success. - * - NULL if allocation failed. - */ -#define rte_ctrlmbuf_alloc(mp) rte_pktmbuf_alloc(mp) - -/** - * Free a control mbuf back into its original mempool. - * - * @param m - * The control mbuf to be freed. - */ -#define rte_ctrlmbuf_free(m) rte_pktmbuf_free(m) - -/** - * A macro that returns the pointer to the carried data. - * - * The value that can be read or assigned. - * - * @param m - * The control mbuf. - */ -#define rte_ctrlmbuf_data(m) ((char *)((m)->buf_addr) + (m)->data_off) - -/** - * A macro that returns the length of the carried data. - * - * The value that can be read or assigned. - * - * @param m - * The control mbuf. - */ -#define rte_ctrlmbuf_len(m) rte_pktmbuf_data_len(m) - -/** - * Tests if an mbuf is a control mbuf - * - * @param m - * The mbuf to be tested - * @return - * - True (1) if the mbuf is a control mbuf - * - False(0) otherwise - */ -static inline int -rte_is_ctrlmbuf(struct rte_mbuf *m) -{ - return !!(m->ol_flags & CTRL_MBUF_FLAG); -} - -/* Operations on pkt mbuf */ - /** * The packet mbuf constructor. * @@ -1116,7 +1164,7 @@ rte_pktmbuf_pool_create(const char *name, unsigned n, * - EEXIST - a memzone with the same name already exists * - ENOMEM - no appropriate memory area found in which to create memzone */ -struct rte_mempool * __rte_experimental +struct rte_mempool * rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n, unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size, int socket_id, const char *ops_name); @@ -1172,7 +1220,8 @@ rte_pktmbuf_priv_size(struct rte_mempool *mp) */ static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m) { - m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); + m->data_off = (uint16_t)RTE_MIN((uint16_t)RTE_PKTMBUF_HEADROOM, + (uint16_t)m->buf_len); } /** @@ -1280,12 +1329,162 @@ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool, return 0; } +/** + * Initialize shared data at the end of an external buffer before attaching + * to a mbuf by ``rte_pktmbuf_attach_extbuf()``. This is not a mandatory + * initialization but a helper function to simply spare a few bytes at the + * end of the buffer for shared data. If shared data is allocated + * separately, this should not be called but application has to properly + * initialize the shared data according to its need. + * + * Free callback and its argument is saved and the refcnt is set to 1. + * + * @warning + * The value of buf_len will be reduced to RTE_PTR_DIFF(shinfo, buf_addr) + * after this initialization. This shall be used for + * ``rte_pktmbuf_attach_extbuf()`` + * + * @param buf_addr + * The pointer to the external buffer. + * @param [in,out] buf_len + * The pointer to length of the external buffer. Input value must be + * larger than the size of ``struct rte_mbuf_ext_shared_info`` and + * padding for alignment. If not enough, this function will return NULL. + * Adjusted buffer length will be returned through this pointer. + * @param free_cb + * Free callback function to call when the external buffer needs to be + * freed. + * @param fcb_opaque + * Argument for the free callback function. + * + * @return + * A pointer to the initialized shared data on success, return NULL + * otherwise. + */ +static inline struct rte_mbuf_ext_shared_info * +rte_pktmbuf_ext_shinfo_init_helper(void *buf_addr, uint16_t *buf_len, + rte_mbuf_extbuf_free_callback_t free_cb, void *fcb_opaque) +{ + struct rte_mbuf_ext_shared_info *shinfo; + void *buf_end = RTE_PTR_ADD(buf_addr, *buf_len); + void *addr; + + addr = RTE_PTR_ALIGN_FLOOR(RTE_PTR_SUB(buf_end, sizeof(*shinfo)), + sizeof(uintptr_t)); + if (addr <= buf_addr) + return NULL; + + shinfo = (struct rte_mbuf_ext_shared_info *)addr; + shinfo->free_cb = free_cb; + shinfo->fcb_opaque = fcb_opaque; + rte_mbuf_ext_refcnt_set(shinfo, 1); + + *buf_len = (uint16_t)RTE_PTR_DIFF(shinfo, buf_addr); + return shinfo; +} + +/** + * Attach an external buffer to a mbuf. + * + * User-managed anonymous buffer can be attached to an mbuf. When attaching + * it, corresponding free callback function and its argument should be + * provided via shinfo. This callback function will be called once all the + * mbufs are detached from the buffer (refcnt becomes zero). + * + * The headroom for the attaching mbuf will be set to zero and this can be + * properly adjusted after attachment. For example, ``rte_pktmbuf_adj()`` + * or ``rte_pktmbuf_reset_headroom()`` might be used. + * + * More mbufs can be attached to the same external buffer by + * ``rte_pktmbuf_attach()`` once the external buffer has been attached by + * this API. + * + * Detachment can be done by either ``rte_pktmbuf_detach_extbuf()`` or + * ``rte_pktmbuf_detach()``. + * + * Memory for shared data must be provided and user must initialize all of + * the content properly, escpecially free callback and refcnt. The pointer + * of shared data will be stored in m->shinfo. + * ``rte_pktmbuf_ext_shinfo_init_helper`` can help to simply spare a few + * bytes at the end of buffer for the shared data, store free callback and + * its argument and set the refcnt to 1. The following is an example: + * + * struct rte_mbuf_ext_shared_info *shinfo = + * rte_pktmbuf_ext_shinfo_init_helper(buf_addr, &buf_len, + * free_cb, fcb_arg); + * rte_pktmbuf_attach_extbuf(m, buf_addr, buf_iova, buf_len, shinfo); + * rte_pktmbuf_reset_headroom(m); + * rte_pktmbuf_adj(m, data_len); + * + * Attaching an external buffer is quite similar to mbuf indirection in + * replacing buffer addresses and length of a mbuf, but a few differences: + * - When an indirect mbuf is attached, refcnt of the direct mbuf would be + * 2 as long as the direct mbuf itself isn't freed after the attachment. + * In such cases, the buffer area of a direct mbuf must be read-only. But + * external buffer has its own refcnt and it starts from 1. Unless + * multiple mbufs are attached to a mbuf having an external buffer, the + * external buffer is writable. + * - There's no need to allocate buffer from a mempool. Any buffer can be + * attached with appropriate free callback and its IO address. + * - Smaller metadata is required to maintain shared data such as refcnt. + * + * @warning + * @b EXPERIMENTAL: This API may change without prior notice. + * Once external buffer is enabled by allowing experimental API, + * ``RTE_MBUF_DIRECT()`` and ``RTE_MBUF_INDIRECT()`` are no longer + * exclusive. A mbuf can be considered direct if it is neither indirect nor + * having external buffer. + * + * @param m + * The pointer to the mbuf. + * @param buf_addr + * The pointer to the external buffer. + * @param buf_iova + * IO address of the external buffer. + * @param buf_len + * The size of the external buffer. + * @param shinfo + * User-provided memory for shared data of the external buffer. + */ +static inline void __rte_experimental +rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr, + rte_iova_t buf_iova, uint16_t buf_len, + struct rte_mbuf_ext_shared_info *shinfo) +{ + /* mbuf should not be read-only */ + RTE_ASSERT(RTE_MBUF_DIRECT(m) && rte_mbuf_refcnt_read(m) == 1); + RTE_ASSERT(shinfo->free_cb != NULL); + + m->buf_addr = buf_addr; + m->buf_iova = buf_iova; + m->buf_len = buf_len; + + m->data_len = 0; + m->data_off = 0; + + m->ol_flags |= EXT_ATTACHED_MBUF; + m->shinfo = shinfo; +} + +/** + * Detach the external buffer attached to a mbuf, same as + * ``rte_pktmbuf_detach()`` + * + * @param m + * The mbuf having external buffer. + */ +#define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m) + /** * Attach packet mbuf to another packet mbuf. * - * After attachment we refer the mbuf we attached as 'indirect', - * while mbuf we attached to as 'direct'. - * The direct mbuf's reference counter is incremented. + * If the mbuf we are attaching to isn't a direct buffer and is attached to + * an external buffer, the mbuf being attached will be attached to the + * external buffer instead of mbuf indirection. + * + * Otherwise, the mbuf will be indirectly attached. After attachment we + * refer the mbuf we attached as 'indirect', while mbuf we attached to as + * 'direct'. The direct mbuf's reference counter is incremented. * * Right now, not supported: * - attachment for already indirect mbuf (e.g. - mi has to be direct). @@ -1299,19 +1498,20 @@ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool, */ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) { - struct rte_mbuf *md; - RTE_ASSERT(RTE_MBUF_DIRECT(mi) && rte_mbuf_refcnt_read(mi) == 1); - /* if m is not direct, get the mbuf that embeds the data */ - if (RTE_MBUF_DIRECT(m)) - md = m; - else - md = rte_mbuf_from_indirect(m); + if (RTE_MBUF_HAS_EXTBUF(m)) { + rte_mbuf_ext_refcnt_update(m->shinfo, 1); + mi->ol_flags = m->ol_flags; + mi->shinfo = m->shinfo; + } else { + /* if m is not direct, get the mbuf that embeds the data */ + rte_mbuf_refcnt_update(rte_mbuf_from_indirect(m), 1); + mi->priv_size = m->priv_size; + mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF; + } - rte_mbuf_refcnt_update(md, 1); - mi->priv_size = m->priv_size; mi->buf_iova = m->buf_iova; mi->buf_addr = m->buf_addr; mi->buf_len = m->buf_len; @@ -1327,7 +1527,6 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) mi->next = NULL; mi->pkt_len = mi->data_len; mi->nb_segs = 1; - mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF; mi->packet_type = m->packet_type; mi->timestamp = m->timestamp; @@ -1336,12 +1535,52 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) } /** - * Detach an indirect packet mbuf. + * @internal used by rte_pktmbuf_detach(). + * + * Decrement the reference counter of the external buffer. When the + * reference counter becomes 0, the buffer is freed by pre-registered + * callback. + */ +static inline void +__rte_pktmbuf_free_extbuf(struct rte_mbuf *m) +{ + RTE_ASSERT(RTE_MBUF_HAS_EXTBUF(m)); + RTE_ASSERT(m->shinfo != NULL); + + if (rte_mbuf_ext_refcnt_update(m->shinfo, -1) == 0) + m->shinfo->free_cb(m->buf_addr, m->shinfo->fcb_opaque); +} + +/** + * @internal used by rte_pktmbuf_detach(). + * + * Decrement the direct mbuf's reference counter. When the reference + * counter becomes 0, the direct mbuf is freed. + */ +static inline void +__rte_pktmbuf_free_direct(struct rte_mbuf *m) +{ + struct rte_mbuf *md; + + RTE_ASSERT(RTE_MBUF_INDIRECT(m)); + + md = rte_mbuf_from_indirect(m); + + if (rte_mbuf_refcnt_update(md, -1) == 0) { + md->next = NULL; + md->nb_segs = 1; + rte_mbuf_refcnt_set(md, 1); + rte_mbuf_raw_free(md); + } +} + +/** + * Detach a packet mbuf from external buffer or direct buffer. * + * - decrement refcnt and free the external/direct buffer if refcnt + * becomes zero. * - restore original mbuf address and length values. * - reset pktmbuf data and data_len to their default values. - * - decrement the direct mbuf's reference counter. When the - * reference counter becomes 0, the direct mbuf is freed. * * All other fields of the given packet mbuf will be left intact. * @@ -1350,12 +1589,17 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) */ static inline void rte_pktmbuf_detach(struct rte_mbuf *m) { - struct rte_mbuf *md = rte_mbuf_from_indirect(m); struct rte_mempool *mp = m->pool; - uint32_t mbuf_size, buf_len, priv_size; + uint32_t mbuf_size, buf_len; + uint16_t priv_size; + + if (RTE_MBUF_HAS_EXTBUF(m)) + __rte_pktmbuf_free_extbuf(m); + else + __rte_pktmbuf_free_direct(m); priv_size = rte_pktmbuf_priv_size(mp); - mbuf_size = sizeof(struct rte_mbuf) + priv_size; + mbuf_size = (uint32_t)(sizeof(struct rte_mbuf) + priv_size); buf_len = rte_pktmbuf_data_room_size(mp); m->priv_size = priv_size; @@ -1365,13 +1609,6 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m) rte_pktmbuf_reset_headroom(m); m->data_len = 0; m->ol_flags = 0; - - if (rte_mbuf_refcnt_update(md, -1) == 0) { - md->next = NULL; - md->nb_segs = 1; - rte_mbuf_refcnt_set(md, 1); - rte_mbuf_raw_free(md); - } } /** @@ -1395,7 +1632,7 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m) if (likely(rte_mbuf_refcnt_read(m) == 1)) { - if (RTE_MBUF_INDIRECT(m)) + if (!RTE_MBUF_DIRECT(m)) rte_pktmbuf_detach(m); if (m->next != NULL) { @@ -1407,7 +1644,7 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m) } else if (__rte_mbuf_refcnt_update(m, -1) == 0) { - if (RTE_MBUF_INDIRECT(m)) + if (!RTE_MBUF_DIRECT(m)) rte_pktmbuf_detach(m); if (m->next != NULL) { @@ -1690,7 +1927,10 @@ static inline char *rte_pktmbuf_prepend(struct rte_mbuf *m, if (unlikely(len > rte_pktmbuf_headroom(m))) return NULL; - m->data_off -= len; + /* NB: elaborating the subtraction like this instead of using + * -= allows us to ensure the result type is uint16_t + * avoiding compiler warnings on gcc 8.1 at least */ + m->data_off = (uint16_t)(m->data_off - len); m->data_len = (uint16_t)(m->data_len + len); m->pkt_len = (m->pkt_len + len); @@ -1750,8 +1990,11 @@ static inline char *rte_pktmbuf_adj(struct rte_mbuf *m, uint16_t len) if (unlikely(len > m->data_len)) return NULL; + /* NB: elaborating the addition like this instead of using + * += allows us to ensure the result type is uint16_t + * avoiding compiler warnings on gcc 8.1 at least */ m->data_len = (uint16_t)(m->data_len - len); - m->data_off += len; + m->data_off = (uint16_t)(m->data_off + len); m->pkt_len = (m->pkt_len - len); return (char *)m->buf_addr + m->data_off; } @@ -1863,8 +2106,11 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail cur_tail = rte_pktmbuf_lastseg(head); cur_tail->next = tail; - /* accumulate number of segments and total length. */ - head->nb_segs += tail->nb_segs; + /* accumulate number of segments and total length. + * NB: elaborating the addition like this instead of using + * -= allows us to ensure the result type is uint16_t + * avoiding compiler warnings on gcc 8.1 at least */ + head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs); head->pkt_len += tail->pkt_len; /* pkt_len is only set in the head */ @@ -1894,7 +2140,11 @@ rte_validate_tx_offload(const struct rte_mbuf *m) return 0; if (ol_flags & PKT_TX_OUTER_IP_CKSUM) - inner_l3_offset += m->outer_l2_len + m->outer_l3_len; + /* NB: elaborating the addition like this instead of using + * += gives the result uint64_t type instead of int, + * avoiding compiler warnings on gcc 8.1 at least */ + inner_l3_offset = inner_l3_offset + m->outer_l2_len + + m->outer_l3_len; /* Headers are fragmented */ if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len) @@ -1939,7 +2189,7 @@ rte_validate_tx_offload(const struct rte_mbuf *m) static inline int rte_pktmbuf_linearize(struct rte_mbuf *mbuf) { - int seg_len, copy_len; + size_t seg_len, copy_len; struct rte_mbuf *m; struct rte_mbuf *m_next; char *buffer; diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.c b/lib/librte_mbuf/rte_mbuf_pool_ops.c index 48cc3420..5722976f 100644 --- a/lib/librte_mbuf/rte_mbuf_pool_ops.c +++ b/lib/librte_mbuf/rte_mbuf_pool_ops.c @@ -9,7 +9,7 @@ #include #include -int __rte_experimental +int rte_mbuf_set_platform_mempool_ops(const char *ops_name) { const struct rte_memzone *mz; @@ -23,7 +23,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name) RTE_MEMPOOL_OPS_NAMESIZE, SOCKET_ID_ANY, 0); if (mz == NULL) return -rte_errno; - strncpy(mz->addr, ops_name, strlen(ops_name)); + strcpy(mz->addr, ops_name); return 0; } else if (strcmp(mz->addr, ops_name) == 0) { return 0; @@ -35,7 +35,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name) return -EEXIST; } -const char * __rte_experimental +const char * rte_mbuf_platform_mempool_ops(void) { const struct rte_memzone *mz; @@ -46,7 +46,7 @@ rte_mbuf_platform_mempool_ops(void) return mz->addr; } -int __rte_experimental +int rte_mbuf_set_user_mempool_ops(const char *ops_name) { const struct rte_memzone *mz; @@ -62,12 +62,12 @@ rte_mbuf_set_user_mempool_ops(const char *ops_name) return -rte_errno; } - strncpy(mz->addr, ops_name, strlen(ops_name)); + strcpy(mz->addr, ops_name); return 0; } -const char * __rte_experimental +const char * rte_mbuf_user_mempool_ops(void) { const struct rte_memzone *mz; @@ -79,7 +79,7 @@ rte_mbuf_user_mempool_ops(void) } /* Return mbuf pool ops name */ -const char * __rte_experimental +const char * rte_mbuf_best_mempool_ops(void) { /* User defined mempool ops takes the priority */ diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.h b/lib/librte_mbuf/rte_mbuf_pool_ops.h index ebf5bf0f..7ed95a49 100644 --- a/lib/librte_mbuf/rte_mbuf_pool_ops.h +++ b/lib/librte_mbuf/rte_mbuf_pool_ops.h @@ -12,9 +12,6 @@ * These APIs are for configuring the mbuf pool ops names to be largely used by * rte_pktmbuf_pool_create(). However, this can also be used to set and inquire * the best mempool ops available. - * - * @warning - * @b EXPERIMENTAL: this API may change without prior notice */ #include @@ -34,7 +31,7 @@ extern "C" { * - On success, zero. * - On failure, a negative value. */ -int __rte_experimental +int rte_mbuf_set_platform_mempool_ops(const char *ops_name); /** @@ -46,7 +43,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name); * - On success, platform pool ops name. * - On failure, NULL. */ -const char * __rte_experimental +const char * rte_mbuf_platform_mempool_ops(void); /** @@ -60,7 +57,7 @@ rte_mbuf_platform_mempool_ops(void); * - On success, zero. * - On failure, a negative value. */ -int __rte_experimental +int rte_mbuf_set_user_mempool_ops(const char *ops_name); /** @@ -72,7 +69,7 @@ rte_mbuf_set_user_mempool_ops(const char *ops_name); * - On success, user pool ops name.. * - On failure, NULL. */ -const char * __rte_experimental +const char * rte_mbuf_user_mempool_ops(void); /** @@ -87,7 +84,7 @@ rte_mbuf_user_mempool_ops(void); * @return * returns preferred mbuf pool ops name */ -const char * __rte_experimental +const char * rte_mbuf_best_mempool_ops(void); diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c b/lib/librte_mbuf/rte_mbuf_ptype.c index 1feefacc..d7835e28 100644 --- a/lib/librte_mbuf/rte_mbuf_ptype.c +++ b/lib/librte_mbuf/rte_mbuf_ptype.c @@ -65,6 +65,9 @@ const char *rte_get_ptype_tunnel_name(uint32_t ptype) case RTE_PTYPE_TUNNEL_GTPU: return "TUNNEL_GTPU"; case RTE_PTYPE_TUNNEL_ESP: return "TUNNEL_ESP"; case RTE_PTYPE_TUNNEL_L2TP: return "TUNNEL_L2TP"; + case RTE_PTYPE_TUNNEL_VXLAN_GPE: return "TUNNEL_VXLAN_GPE"; + case RTE_PTYPE_TUNNEL_MPLS_IN_UDP: return "TUNNEL_MPLS_IN_UDP"; + case RTE_PTYPE_TUNNEL_MPLS_IN_GRE: return "TUNNEL_MPLS_IN_GRE"; default: return "TUNNEL_UNKNOWN"; } } diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h index b9a33811..01acc66e 100644 --- a/lib/librte_mbuf/rte_mbuf_ptype.h +++ b/lib/librte_mbuf/rte_mbuf_ptype.h @@ -422,6 +422,53 @@ extern "C" { * | 'version'=6, 'protocol'=115> */ #define RTE_PTYPE_TUNNEL_L2TP 0x0000a000 +/** + * VXLAN-GPE (VXLAN Generic Protocol Extension) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=4790> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=4790> + */ +#define RTE_PTYPE_TUNNEL_VXLAN_GPE 0x0000b000 +/** + * MPLS-in-GRE tunneling packet type (RFC 4023). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47 + * | 'protocol'=0x8847> + * or, + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47 + * | 'protocol'=0x8848> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'protocol'=47 + * | 'protocol'=0x8847> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47 + * | 'protocol'=0x8848> + */ +#define RTE_PTYPE_TUNNEL_MPLS_IN_GRE 0x0000c000 +/** + * MPLS-in-UDP tunneling packet type (RFC 7510). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=6635> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=6635> + */ +#define RTE_PTYPE_TUNNEL_MPLS_IN_UDP 0x0000d000 /** * Mask of tunneling packet types. */ @@ -606,9 +653,9 @@ extern "C" { #define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4) /** - * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by - * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can - * determine if it is an IPV4 packet. + * Check if the (outer) L3 header is IPv6. To avoid comparing IPv6 types one by + * one, bit 6 is selected to be used for IPv6 only. Then checking bit 6 can + * determine if it is an IPV6 packet. */ #define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6) diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map index d418dcb8..cae68db8 100644 --- a/lib/librte_mbuf/rte_mbuf_version.map +++ b/lib/librte_mbuf/rte_mbuf_version.map @@ -1,7 +1,6 @@ DPDK_2.0 { global: - rte_ctrlmbuf_init; rte_get_rx_ol_flag_name; rte_get_tx_ol_flag_name; rte_mbuf_sanity_check; @@ -36,7 +35,7 @@ DPDK_16.11 { } DPDK_2.1; -EXPERIMENTAL { +DPDK_18.08 { global: rte_mbuf_best_mempool_ops; @@ -45,5 +44,4 @@ EXPERIMENTAL { rte_mbuf_set_user_mempool_ops; rte_mbuf_user_mempool_ops; rte_pktmbuf_pool_create_by_ops; - } DPDK_16.11; diff --git a/lib/librte_member/rte_member.c b/lib/librte_member/rte_member.c index e147dd1f..702c01d3 100644 --- a/lib/librte_member/rte_member.c +++ b/lib/librte_member/rte_member.c @@ -297,10 +297,7 @@ rte_member_reset(const struct rte_member_setsum *setsum) } } -RTE_INIT(librte_member_init_log); - -static void -librte_member_init_log(void) +RTE_INIT(librte_member_init_log) { librte_member_logtype = rte_log_register("lib.member"); if (librte_member_logtype >= 0) diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile index 24e735a3..20bf63fb 100644 --- a/lib/librte_mempool/Makefile +++ b/lib/librte_mempool/Makefile @@ -7,15 +7,20 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_mempool.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_ring EXPORT_MAP := rte_mempool_version.map -LIBABIVER := 3 +LIBABIVER := 5 + +# memseg walk is not yet part of stable API +CFLAGS += -DALLOW_EXPERIMENTAL_API # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ops.c +SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ops_default.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h diff --git a/lib/librte_mempool/meson.build b/lib/librte_mempool/meson.build index 7a4f3dae..38d7ae89 100644 --- a/lib/librte_mempool/meson.build +++ b/lib/librte_mempool/meson.build @@ -1,7 +1,21 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -version = 2 -sources = files('rte_mempool.c', 'rte_mempool_ops.c') +allow_experimental_apis = true + +extra_flags = [] + +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +version = 5 +sources = files('rte_mempool.c', 'rte_mempool_ops.c', + 'rte_mempool_ops_default.c') headers = files('rte_mempool.h') deps += ['ring'] + +# memseg walk is not yet part of stable API +allow_experimental_apis = true diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index 54f7f4ba..03e6b5f7 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -3,6 +3,7 @@ * Copyright(c) 2016 6WIND S.A. */ +#include #include #include #include @@ -98,8 +99,31 @@ static unsigned optimize_object_size(unsigned obj_size) return new_obj_size * RTE_MEMPOOL_ALIGN; } +static int +find_min_pagesz(const struct rte_memseg_list *msl, void *arg) +{ + size_t *min = arg; + + if (msl->page_sz < *min) + *min = msl->page_sz; + + return 0; +} + +static size_t +get_min_page_size(void) +{ + size_t min_pagesz = SIZE_MAX; + + rte_memseg_list_walk(find_min_pagesz, &min_pagesz); + + return min_pagesz == SIZE_MAX ? (size_t) getpagesize() : min_pagesz; +} + + static void -mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova) +mempool_add_elem(struct rte_mempool *mp, __rte_unused void *opaque, + void *obj, rte_iova_t iova) { struct rte_mempool_objhdr *hdr; struct rte_mempool_objtlr *tlr __rte_unused; @@ -116,9 +140,6 @@ mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova) tlr = __mempool_get_trailer(obj); tlr->cookie = RTE_MEMPOOL_TRAILER_COOKIE; #endif - - /* enqueue in ring */ - rte_mempool_ops_enqueue_bulk(mp, &obj, 1); } /* call obj_cb() for each mempool element */ @@ -204,92 +225,6 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, return sz->total_size; } - -/* - * Calculate maximum amount of memory required to store given number of objects. - */ -size_t -rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift, - unsigned int flags) -{ - size_t obj_per_page, pg_num, pg_sz; - unsigned int mask; - - mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG; - if ((flags & mask) == mask) - /* alignment need one additional object */ - elt_num += 1; - - if (total_elt_sz == 0) - return 0; - - if (pg_shift == 0) - return total_elt_sz * elt_num; - - pg_sz = (size_t)1 << pg_shift; - obj_per_page = pg_sz / total_elt_sz; - if (obj_per_page == 0) - return RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * elt_num; - - pg_num = (elt_num + obj_per_page - 1) / obj_per_page; - return pg_num << pg_shift; -} - -/* - * Calculate how much memory would be actually required with the - * given memory footprint to store required number of elements. - */ -ssize_t -rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num, - size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num, - uint32_t pg_shift, unsigned int flags) -{ - uint32_t elt_cnt = 0; - rte_iova_t start, end; - uint32_t iova_idx; - size_t pg_sz = (size_t)1 << pg_shift; - unsigned int mask; - - mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG; - if ((flags & mask) == mask) - /* alignment need one additional object */ - elt_num += 1; - - /* if iova is NULL, assume contiguous memory */ - if (iova == NULL) { - start = 0; - end = pg_sz * pg_num; - iova_idx = pg_num; - } else { - start = iova[0]; - end = iova[0] + pg_sz; - iova_idx = 1; - } - while (elt_cnt < elt_num) { - - if (end - start >= total_elt_sz) { - /* enough contiguous memory, add an object */ - start += total_elt_sz; - elt_cnt++; - } else if (iova_idx < pg_num) { - /* no room to store one obj, add a page */ - if (end == iova[iova_idx]) { - end += pg_sz; - } else { - start = iova[iova_idx]; - end = iova[iova_idx] + pg_sz; - } - iova_idx++; - - } else { - /* no more page, return how many elements fit */ - return -(size_t)elt_cnt; - } - } - - return (size_t)iova_idx << pg_shift; -} - /* free a memchunk allocated with rte_memzone_reserve() */ static void rte_mempool_memchunk_mz_free(__rte_unused struct rte_mempool_memhdr *memhdr, @@ -323,6 +258,21 @@ rte_mempool_free_memchunks(struct rte_mempool *mp) } } +static int +mempool_ops_alloc_once(struct rte_mempool *mp) +{ + int ret; + + /* create the internal ring if not already done */ + if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) { + ret = rte_mempool_ops_alloc(mp); + if (ret != 0) + return ret; + mp->flags |= MEMPOOL_F_POOL_CREATED; + } + return 0; +} + /* Add objects in the pool, using a physically contiguous memory * zone. Return the number of objects added, or a negative value * on error. @@ -332,51 +282,19 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, void *opaque) { - unsigned total_elt_sz; - unsigned int mp_capa_flags; unsigned i = 0; size_t off; struct rte_mempool_memhdr *memhdr; int ret; - /* create the internal ring if not already done */ - if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) { - ret = rte_mempool_ops_alloc(mp); - if (ret != 0) - return ret; - mp->flags |= MEMPOOL_F_POOL_CREATED; - } - - /* Notify memory area to mempool */ - ret = rte_mempool_ops_register_memory_area(mp, vaddr, iova, len); - if (ret != -ENOTSUP && ret < 0) + ret = mempool_ops_alloc_once(mp); + if (ret != 0) return ret; /* mempool is already populated */ if (mp->populated_size >= mp->size) return -ENOSPC; - total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; - - /* Get mempool capabilities */ - mp_capa_flags = 0; - ret = rte_mempool_ops_get_capabilities(mp, &mp_capa_flags); - if ((ret < 0) && (ret != -ENOTSUP)) - return ret; - - /* update mempool capabilities */ - mp->flags |= mp_capa_flags; - - /* Detect pool area has sufficient space for elements */ - if (mp_capa_flags & MEMPOOL_F_CAPA_PHYS_CONTIG) { - if (len < total_elt_sz * mp->size) { - RTE_LOG(ERR, MEMPOOL, - "pool area %" PRIx64 " not enough\n", - (uint64_t)len); - return -ENOSPC; - } - } - memhdr = rte_zmalloc("MEMPOOL_MEMHDR", sizeof(*memhdr), 0); if (memhdr == NULL) return -ENOMEM; @@ -388,89 +306,34 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, memhdr->free_cb = free_cb; memhdr->opaque = opaque; - if (mp_capa_flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS) - /* align object start address to a multiple of total_elt_sz */ - off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz); - else if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN) + if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN) off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr; else off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr; - while (off + total_elt_sz <= len && mp->populated_size < mp->size) { - off += mp->header_size; - if (iova == RTE_BAD_IOVA) - mempool_add_elem(mp, (char *)vaddr + off, - RTE_BAD_IOVA); - else - mempool_add_elem(mp, (char *)vaddr + off, iova + off); - off += mp->elt_size + mp->trailer_size; - i++; + if (off > len) { + ret = -EINVAL; + goto fail; } + i = rte_mempool_ops_populate(mp, mp->size - mp->populated_size, + (char *)vaddr + off, + (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off), + len - off, mempool_add_elem, NULL); + /* not enough room to store one object */ - if (i == 0) - return -EINVAL; + if (i == 0) { + ret = -EINVAL; + goto fail; + } STAILQ_INSERT_TAIL(&mp->mem_list, memhdr, next); mp->nb_mem_chunks++; return i; -} - -int -rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, - phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, - void *opaque) -{ - return rte_mempool_populate_iova(mp, vaddr, paddr, len, free_cb, opaque); -} - -/* Add objects in the pool, using a table of physical pages. Return the - * number of objects added, or a negative value on error. - */ -int -rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr, - const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift, - rte_mempool_memchunk_free_cb_t *free_cb, void *opaque) -{ - uint32_t i, n; - int ret, cnt = 0; - size_t pg_sz = (size_t)1 << pg_shift; - - /* mempool must not be populated */ - if (mp->nb_mem_chunks != 0) - return -EEXIST; - - if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG) - return rte_mempool_populate_iova(mp, vaddr, RTE_BAD_IOVA, - pg_num * pg_sz, free_cb, opaque); - - for (i = 0; i < pg_num && mp->populated_size < mp->size; i += n) { - - /* populate with the largest group of contiguous pages */ - for (n = 1; (i + n) < pg_num && - iova[i + n - 1] + pg_sz == iova[i + n]; n++) - ; - - ret = rte_mempool_populate_iova(mp, vaddr + i * pg_sz, - iova[i], n * pg_sz, free_cb, opaque); - if (ret < 0) { - rte_mempool_free_memchunks(mp); - return ret; - } - /* no need to call the free callback for next chunks */ - free_cb = NULL; - cnt += ret; - } - return cnt; -} -int -rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, - const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, - rte_mempool_memchunk_free_cb_t *free_cb, void *opaque) -{ - return rte_mempool_populate_iova_tab(mp, vaddr, paddr, pg_num, pg_shift, - free_cb, opaque); +fail: + rte_free(memhdr); + return ret; } /* Populate the mempool with a virtual area. Return the number of @@ -485,16 +348,13 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, size_t off, phys_len; int ret, cnt = 0; - /* mempool must not be populated */ - if (mp->nb_mem_chunks != 0) - return -EEXIST; /* address and len must be page-aligned */ if (RTE_PTR_ALIGN_CEIL(addr, pg_sz) != addr) return -EINVAL; if (RTE_ALIGN_CEIL(len, pg_sz) != len) return -EINVAL; - if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG) + if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG) return rte_mempool_populate_iova(mp, addr, RTE_BAD_IOVA, len, free_cb, opaque); @@ -544,39 +404,94 @@ rte_mempool_populate_default(struct rte_mempool *mp) unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; char mz_name[RTE_MEMZONE_NAMESIZE]; const struct rte_memzone *mz; - size_t size, total_elt_sz, align, pg_sz, pg_shift; + ssize_t mem_size; + size_t align, pg_sz, pg_shift; rte_iova_t iova; unsigned mz_id, n; - unsigned int mp_flags; int ret; + bool no_contig, try_contig, no_pageshift; + + ret = mempool_ops_alloc_once(mp); + if (ret != 0) + return ret; /* mempool must not be populated */ if (mp->nb_mem_chunks != 0) return -EEXIST; - /* Get mempool capabilities */ - mp_flags = 0; - ret = rte_mempool_ops_get_capabilities(mp, &mp_flags); - if ((ret < 0) && (ret != -ENOTSUP)) - return ret; + no_contig = mp->flags & MEMPOOL_F_NO_IOVA_CONTIG; - /* update mempool capabilities */ - mp->flags |= mp_flags; + /* + * the following section calculates page shift and page size values. + * + * these values impact the result of calc_mem_size operation, which + * returns the amount of memory that should be allocated to store the + * desired number of objects. when not zero, it allocates more memory + * for the padding between objects, to ensure that an object does not + * cross a page boundary. in other words, page size/shift are to be set + * to zero if mempool elements won't care about page boundaries. + * there are several considerations for page size and page shift here. + * + * if we don't need our mempools to have physically contiguous objects, + * then just set page shift and page size to 0, because the user has + * indicated that there's no need to care about anything. + * + * if we do need contiguous objects, there is also an option to reserve + * the entire mempool memory as one contiguous block of memory, in + * which case the page shift and alignment wouldn't matter as well. + * + * if we require contiguous objects, but not necessarily the entire + * mempool reserved space to be contiguous, then there are two options. + * + * if our IO addresses are virtual, not actual physical (IOVA as VA + * case), then no page shift needed - our memory allocation will give us + * contiguous IO memory as far as the hardware is concerned, so + * act as if we're getting contiguous memory. + * + * if our IO addresses are physical, we may get memory from bigger + * pages, or we might get memory from smaller pages, and how much of it + * we require depends on whether we want bigger or smaller pages. + * However, requesting each and every memory size is too much work, so + * what we'll do instead is walk through the page sizes available, pick + * the smallest one and set up page shift to match that one. We will be + * wasting some space this way, but it's much nicer than looping around + * trying to reserve each and every page size. + * + * However, since size calculation will produce page-aligned sizes, it + * makes sense to first try and see if we can reserve the entire memzone + * in one contiguous chunk as well (otherwise we might end up wasting a + * 1G page on a 10MB memzone). If we fail to get enough contiguous + * memory, then we'll go and reserve space page-by-page. + */ + no_pageshift = no_contig || rte_eal_iova_mode() == RTE_IOVA_VA; + try_contig = !no_contig && !no_pageshift && rte_eal_has_hugepages(); - if (rte_eal_has_hugepages()) { - pg_shift = 0; /* not needed, zone is physically contiguous */ + if (no_pageshift) { pg_sz = 0; - align = RTE_CACHE_LINE_SIZE; + pg_shift = 0; + } else if (try_contig) { + pg_sz = get_min_page_size(); + pg_shift = rte_bsf32(pg_sz); } else { pg_sz = getpagesize(); pg_shift = rte_bsf32(pg_sz); - align = pg_sz; } - total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) { - size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift, - mp->flags); + size_t min_chunk_size; + unsigned int flags; + + if (try_contig || no_pageshift) + mem_size = rte_mempool_ops_calc_mem_size(mp, n, + 0, &min_chunk_size, &align); + else + mem_size = rte_mempool_ops_calc_mem_size(mp, n, + pg_shift, &min_chunk_size, &align); + + if (mem_size < 0) { + ret = mem_size; + goto fail; + } ret = snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id); @@ -585,30 +500,70 @@ rte_mempool_populate_default(struct rte_mempool *mp) goto fail; } - mz = rte_memzone_reserve_aligned(mz_name, size, - mp->socket_id, mz_flags, align); - /* not enough memory, retry with the biggest zone we have */ - if (mz == NULL) + flags = mz_flags; + + /* if we're trying to reserve contiguous memory, add appropriate + * memzone flag. + */ + if (try_contig) + flags |= RTE_MEMZONE_IOVA_CONTIG; + + mz = rte_memzone_reserve_aligned(mz_name, mem_size, + mp->socket_id, flags, align); + + /* if we were trying to allocate contiguous memory, failed and + * minimum required contiguous chunk fits minimum page, adjust + * memzone size to the page size, and try again. + */ + if (mz == NULL && try_contig && min_chunk_size <= pg_sz) { + try_contig = false; + flags &= ~RTE_MEMZONE_IOVA_CONTIG; + + mem_size = rte_mempool_ops_calc_mem_size(mp, n, + pg_shift, &min_chunk_size, &align); + if (mem_size < 0) { + ret = mem_size; + goto fail; + } + + mz = rte_memzone_reserve_aligned(mz_name, mem_size, + mp->socket_id, flags, align); + } + /* don't try reserving with 0 size if we were asked to reserve + * IOVA-contiguous memory. + */ + if (min_chunk_size < (size_t)mem_size && mz == NULL) { + /* not enough memory, retry with the biggest zone we + * have + */ mz = rte_memzone_reserve_aligned(mz_name, 0, - mp->socket_id, mz_flags, align); + mp->socket_id, flags, + RTE_MAX(pg_sz, align)); + } if (mz == NULL) { ret = -rte_errno; goto fail; } - if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG) + if (mz->len < min_chunk_size) { + rte_memzone_free(mz); + ret = -ENOMEM; + goto fail; + } + + if (no_contig) iova = RTE_BAD_IOVA; else iova = mz->iova; - if (rte_eal_has_hugepages()) + if (no_pageshift || try_contig) ret = rte_mempool_populate_iova(mp, mz->addr, iova, mz->len, rte_mempool_memchunk_mz_free, (void *)(uintptr_t)mz); else ret = rte_mempool_populate_virt(mp, mz->addr, - mz->len, pg_sz, + RTE_ALIGN_FLOOR(mz->len, pg_sz), pg_sz, rte_mempool_memchunk_mz_free, (void *)(uintptr_t)mz); if (ret < 0) { @@ -625,16 +580,18 @@ rte_mempool_populate_default(struct rte_mempool *mp) } /* return the memory size required for mempool objects in anonymous mem */ -static size_t +static ssize_t get_anon_size(const struct rte_mempool *mp) { - size_t size, total_elt_sz, pg_sz, pg_shift; + ssize_t size; + size_t pg_sz, pg_shift; + size_t min_chunk_size; + size_t align; pg_sz = getpagesize(); pg_shift = rte_bsf32(pg_sz); - total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; - size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift, - mp->flags); + size = rte_mempool_ops_calc_mem_size(mp, mp->size, pg_shift, + &min_chunk_size, &align); return size; } @@ -644,25 +601,45 @@ static void rte_mempool_memchunk_anon_free(struct rte_mempool_memhdr *memhdr, void *opaque) { - munmap(opaque, get_anon_size(memhdr->mp)); + ssize_t size; + + /* + * Calculate size since memhdr->len has contiguous chunk length + * which may be smaller if anon map is split into many contiguous + * chunks. Result must be the same as we calculated on populate. + */ + size = get_anon_size(memhdr->mp); + if (size < 0) + return; + + munmap(opaque, size); } /* populate the mempool with an anonymous mapping */ int rte_mempool_populate_anon(struct rte_mempool *mp) { - size_t size; + ssize_t size; int ret; char *addr; /* mempool is already populated, error */ - if (!STAILQ_EMPTY(&mp->mem_list)) { + if ((!STAILQ_EMPTY(&mp->mem_list)) || mp->nb_mem_chunks != 0) { rte_errno = EINVAL; return 0; } - /* get chunk of virtually continuous memory */ + ret = mempool_ops_alloc_once(mp); + if (ret != 0) + return ret; + size = get_anon_size(mp); + if (size < 0) { + rte_errno = -size; + return 0; + } + + /* get chunk of virtually continuous memory */ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (addr == MAP_FAILED) { @@ -795,6 +772,12 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size, mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list); + /* asked for zero items */ + if (n == 0) { + rte_errno = EINVAL; + return NULL; + } + /* asked cache too big */ if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE || CALC_CACHE_FLUSHTHRESH(cache_size) > n) { @@ -944,66 +927,6 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, return NULL; } -/* - * Create the mempool over already allocated chunk of memory. - * That external memory buffer can consists of physically disjoint pages. - * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create() - * behavior. - */ -struct rte_mempool * -rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, - unsigned cache_size, unsigned private_data_size, - rte_mempool_ctor_t *mp_init, void *mp_init_arg, - rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, - int socket_id, unsigned flags, void *vaddr, - const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift) -{ - struct rte_mempool *mp = NULL; - int ret; - - /* no virtual address supplied, use rte_mempool_create() */ - if (vaddr == NULL) - return rte_mempool_create(name, n, elt_size, cache_size, - private_data_size, mp_init, mp_init_arg, - obj_init, obj_init_arg, socket_id, flags); - - /* check that we have both VA and PA */ - if (iova == NULL) { - rte_errno = EINVAL; - return NULL; - } - - /* Check that pg_shift parameter is valid. */ - if (pg_shift > MEMPOOL_PG_SHIFT_MAX) { - rte_errno = EINVAL; - return NULL; - } - - mp = rte_mempool_create_empty(name, n, elt_size, cache_size, - private_data_size, socket_id, flags); - if (mp == NULL) - return NULL; - - /* call the mempool priv initializer */ - if (mp_init) - mp_init(mp, mp_init_arg); - - ret = rte_mempool_populate_iova_tab(mp, vaddr, iova, pg_num, pg_shift, - NULL, NULL); - if (ret < 0 || ret != (int)mp->size) - goto fail; - - /* call the object initializers */ - if (obj_init) - rte_mempool_obj_iter(mp, obj_init, obj_init_arg); - - return mp; - - fail: - rte_mempool_free(mp); - return NULL; -} - /* Return the number of entries in the mempool */ unsigned int rte_mempool_avail_count(const struct rte_mempool *mp) @@ -1132,6 +1055,36 @@ void rte_mempool_check_cookies(const struct rte_mempool *mp, #endif } +void +rte_mempool_contig_blocks_check_cookies(const struct rte_mempool *mp, + void * const *first_obj_table_const, unsigned int n, int free) +{ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + struct rte_mempool_info info; + const size_t total_elt_sz = + mp->header_size + mp->elt_size + mp->trailer_size; + unsigned int i, j; + + rte_mempool_ops_get_info(mp, &info); + + for (i = 0; i < n; ++i) { + void *first_obj = first_obj_table_const[i]; + + for (j = 0; j < info.contig_block_size; ++j) { + void *obj; + + obj = (void *)((uintptr_t)first_obj + j * total_elt_sz); + rte_mempool_check_cookies(mp, &obj, 1, free); + } + } +#else + RTE_SET_USED(mp); + RTE_SET_USED(first_obj_table_const); + RTE_SET_USED(n); + RTE_SET_USED(free); +#endif +} + #ifdef RTE_LIBRTE_MEMPOOL_DEBUG static void mempool_obj_audit(struct rte_mempool *mp, __rte_unused void *opaque, @@ -1197,6 +1150,7 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp) { #ifdef RTE_LIBRTE_MEMPOOL_DEBUG + struct rte_mempool_info info; struct rte_mempool_debug_stats sum; unsigned lcore_id; #endif @@ -1238,6 +1192,7 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) /* sum and dump statistics */ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG + rte_mempool_ops_get_info(mp, &info); memset(&sum, 0, sizeof(sum)); for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { sum.put_bulk += mp->stats[lcore_id].put_bulk; @@ -1246,6 +1201,8 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) sum.get_success_objs += mp->stats[lcore_id].get_success_objs; sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; sum.get_fail_objs += mp->stats[lcore_id].get_fail_objs; + sum.get_success_blks += mp->stats[lcore_id].get_success_blks; + sum.get_fail_blks += mp->stats[lcore_id].get_fail_blks; } fprintf(f, " stats:\n"); fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); @@ -1254,6 +1211,11 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); fprintf(f, " get_fail_objs=%"PRIu64"\n", sum.get_fail_objs); + if (info.contig_block_size > 0) { + fprintf(f, " get_success_blks=%"PRIu64"\n", + sum.get_success_blks); + fprintf(f, " get_fail_blks=%"PRIu64"\n", sum.get_fail_blks); + } #else fprintf(f, " no statistics available\n"); #endif diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 8b1b7f7e..7c9cd9a2 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -70,6 +70,10 @@ struct rte_mempool_debug_stats { uint64_t get_success_objs; /**< Objects successfully allocated. */ uint64_t get_fail_bulk; /**< Failed allocation number. */ uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ + /** Successful allocation number of contiguous blocks. */ + uint64_t get_success_blks; + /** Failed allocation number of contiguous blocks. */ + uint64_t get_fail_blks; } __rte_cache_aligned; #endif @@ -189,6 +193,20 @@ struct rte_mempool_memhdr { void *opaque; /**< Argument passed to the free callback */ }; +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Additional information about the mempool + * + * The structure is cache-line aligned to avoid ABI breakages in + * a number of cases when something small is added. + */ +struct rte_mempool_info { + /** Number of objects in the contiguous block */ + unsigned int contig_block_size; +} __rte_cache_aligned; + /** * The RTE mempool structure. */ @@ -244,25 +262,8 @@ struct rte_mempool { #define MEMPOOL_F_SP_PUT 0x0004 /**< Default put is "single-producer".*/ #define MEMPOOL_F_SC_GET 0x0008 /**< Default get is "single-consumer".*/ #define MEMPOOL_F_POOL_CREATED 0x0010 /**< Internal: pool is created. */ -#define MEMPOOL_F_NO_PHYS_CONTIG 0x0020 /**< Don't need physically contiguous objs. */ -/** - * This capability flag is advertised by a mempool handler, if the whole - * memory area containing the objects must be physically contiguous. - * Note: This flag should not be passed by application. - */ -#define MEMPOOL_F_CAPA_PHYS_CONTIG 0x0040 -/** - * This capability flag is advertised by a mempool handler. Used for a case - * where mempool driver wants object start address(vaddr) aligned to block - * size(/ total element size). - * - * Note: - * - This flag should not be passed by application. - * Flag used for mempool driver only. - * - Mempool driver must also set MEMPOOL_F_CAPA_PHYS_CONTIG flag along with - * MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS. - */ -#define MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS 0x0080 +#define MEMPOOL_F_NO_IOVA_CONTIG 0x0020 /**< Don't need IOVA contiguous objs. */ +#define MEMPOOL_F_NO_PHYS_CONTIG MEMPOOL_F_NO_IOVA_CONTIG /* deprecated */ /** * @internal When debug is enabled, store some statistics. @@ -282,8 +283,16 @@ struct rte_mempool { mp->stats[__lcore_id].name##_bulk += 1; \ } \ } while(0) +#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { \ + unsigned int __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) { \ + mp->stats[__lcore_id].name##_blks += n; \ + mp->stats[__lcore_id].name##_bulk += 1; \ + } \ + } while (0) #else #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) +#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do {} while (0) #endif /** @@ -351,6 +360,38 @@ void rte_mempool_check_cookies(const struct rte_mempool *mp, #define __mempool_check_cookies(mp, obj_table_const, n, free) do {} while(0) #endif /* RTE_LIBRTE_MEMPOOL_DEBUG */ +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @internal Check contiguous object blocks and update cookies or panic. + * + * @param mp + * Pointer to the memory pool. + * @param first_obj_table_const + * Pointer to a table of void * pointers (first object of the contiguous + * object blocks). + * @param n + * Number of contiguous object blocks. + * @param free + * - 0: object is supposed to be allocated, mark it as free + * - 1: object is supposed to be free, mark it as allocated + * - 2: just check that cookie is valid (free or allocated) + */ +void rte_mempool_contig_blocks_check_cookies(const struct rte_mempool *mp, + void * const *first_obj_table_const, unsigned int n, int free); + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#define __mempool_contig_blocks_check_cookies(mp, first_obj_table_const, n, \ + free) \ + rte_mempool_contig_blocks_check_cookies(mp, first_obj_table_const, n, \ + free) +#else +#define __mempool_contig_blocks_check_cookies(mp, first_obj_table_const, n, \ + free) \ + do {} while (0) +#endif /* RTE_LIBRTE_MEMPOOL_DEBUG */ + #define RTE_MEMPOOL_OPS_NAMESIZE 32 /**< Max length of ops struct name. */ /** @@ -382,22 +423,136 @@ typedef int (*rte_mempool_enqueue_t)(struct rte_mempool *mp, typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp, void **obj_table, unsigned int n); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Dequeue a number of contiquous object blocks from the external pool. + */ +typedef int (*rte_mempool_dequeue_contig_blocks_t)(struct rte_mempool *mp, + void **first_obj_table, unsigned int n); + /** * Return the number of available objects in the external pool. */ typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp); /** - * Get the mempool capabilities. + * Calculate memory size required to store given number of objects. + * + * If mempool objects are not required to be IOVA-contiguous + * (the flag MEMPOOL_F_NO_IOVA_CONTIG is set), min_chunk_size defines + * virtually contiguous chunk size. Otherwise, if mempool objects must + * be IOVA-contiguous (the flag MEMPOOL_F_NO_IOVA_CONTIG is clear), + * min_chunk_size defines IOVA-contiguous chunk size. + * + * @param[in] mp + * Pointer to the memory pool. + * @param[in] obj_num + * Number of objects. + * @param[in] pg_shift + * LOG2 of the physical pages size. If set to 0, ignore page boundaries. + * @param[out] min_chunk_size + * Location for minimum size of the memory chunk which may be used to + * store memory pool objects. + * @param[out] align + * Location for required memory chunk alignment. + * @return + * Required memory size aligned at page boundary. + */ +typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align); + +/** + * Default way to calculate memory size required to store given number of + * objects. + * + * If page boundaries may be ignored, it is just a product of total + * object size including header and trailer and number of objects. + * Otherwise, it is a number of pages required to store given number of + * objects without crossing page boundary. + * + * Note that if object size is bigger than page size, then it assumes + * that pages are grouped in subsets of physically continuous pages big + * enough to store at least one object. + * + * Minimum size of memory chunk is a maximum of the page size and total + * element size. + * + * Required memory chunk alignment is a maximum of page size and cache + * line size. */ -typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp, - unsigned int *flags); +ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align); /** - * Notify new memory area to mempool. + * Function to be called for each populated object. + * + * @param[in] mp + * A pointer to the mempool structure. + * @param[in] opaque + * An opaque pointer passed to iterator. + * @param[in] vaddr + * Object virtual address. + * @param[in] iova + * Input/output virtual address of the object or RTE_BAD_IOVA. */ -typedef int (*rte_mempool_ops_register_memory_area_t) -(const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len); +typedef void (rte_mempool_populate_obj_cb_t)(struct rte_mempool *mp, + void *opaque, void *vaddr, rte_iova_t iova); + +/** + * Populate memory pool objects using provided memory chunk. + * + * Populated objects should be enqueued to the pool, e.g. using + * rte_mempool_ops_enqueue_bulk(). + * + * If the given IO address is unknown (iova = RTE_BAD_IOVA), + * the chunk doesn't need to be physically contiguous (only virtually), + * and allocated objects may span two pages. + * + * @param[in] mp + * A pointer to the mempool structure. + * @param[in] max_objs + * Maximum number of objects to be populated. + * @param[in] vaddr + * The virtual address of memory that should be used to store objects. + * @param[in] iova + * The IO address + * @param[in] len + * The length of memory in bytes. + * @param[in] obj_cb + * Callback function to be executed for each populated object. + * @param[in] obj_cb_arg + * An opaque pointer passed to the callback function. + * @return + * The number of objects added on success. + * On error, no objects are populated and a negative errno is returned. + */ +typedef int (*rte_mempool_populate_t)(struct rte_mempool *mp, + unsigned int max_objs, + void *vaddr, rte_iova_t iova, size_t len, + rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg); + +/** + * Default way to populate memory pool object using provided memory + * chunk: just slice objects one by one. + */ +int rte_mempool_op_populate_default(struct rte_mempool *mp, + unsigned int max_objs, + void *vaddr, rte_iova_t iova, size_t len, + rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Get some additional information about a mempool. + */ +typedef int (*rte_mempool_get_info_t)(const struct rte_mempool *mp, + struct rte_mempool_info *info); + /** Structure defining mempool operations structure */ struct rte_mempool_ops { @@ -408,13 +563,23 @@ struct rte_mempool_ops { rte_mempool_dequeue_t dequeue; /**< Dequeue an object. */ rte_mempool_get_count get_count; /**< Get qty of available objs. */ /** - * Get the mempool capabilities + * Optional callback to calculate memory size required to + * store specified number of objects. + */ + rte_mempool_calc_mem_size_t calc_mem_size; + /** + * Optional callback to populate mempool objects using + * provided memory chunk. + */ + rte_mempool_populate_t populate; + /** + * Get mempool info */ - rte_mempool_get_capabilities_t get_capabilities; + rte_mempool_get_info_t get_info; /** - * Notify new memory area to mempool + * Dequeue a number of contiguous object blocks. */ - rte_mempool_ops_register_memory_area_t register_memory_area; + rte_mempool_dequeue_contig_blocks_t dequeue_contig_blocks; } __rte_cache_aligned; #define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */ @@ -492,6 +657,30 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp, return ops->dequeue(mp, obj_table, n); } +/** + * @internal Wrapper for mempool_ops dequeue_contig_blocks callback. + * + * @param[in] mp + * Pointer to the memory pool. + * @param[out] first_obj_table + * Pointer to a table of void * pointers (first objects). + * @param[in] n + * Number of blocks to get. + * @return + * - 0: Success; got n objects. + * - <0: Error; code of dequeue function. + */ +static inline int +rte_mempool_ops_dequeue_contig_blocks(struct rte_mempool *mp, + void **first_obj_table, unsigned int n) +{ + struct rte_mempool_ops *ops; + + ops = rte_mempool_get_ops(mp->ops_index); + RTE_ASSERT(ops->dequeue_contig_blocks != NULL); + return ops->dequeue_contig_blocks(mp, first_obj_table, n); +} + /** * @internal wrapper for mempool_ops enqueue callback. * @@ -527,41 +716,74 @@ unsigned rte_mempool_ops_get_count(const struct rte_mempool *mp); /** - * @internal wrapper for mempool_ops get_capabilities callback. + * @internal wrapper for mempool_ops calc_mem_size callback. + * API to calculate size of memory required to store specified number of + * object. * - * @param mp [in] + * @param[in] mp * Pointer to the memory pool. - * @param flags [out] - * Pointer to the mempool flags. + * @param[in] obj_num + * Number of objects. + * @param[in] pg_shift + * LOG2 of the physical pages size. If set to 0, ignore page boundaries. + * @param[out] min_chunk_size + * Location for minimum size of the memory chunk which may be used to + * store memory pool objects. + * @param[out] align + * Location for required memory chunk alignment. * @return - * - 0: Success; The mempool driver has advertised his pool capabilities in - * flags param. - * - -ENOTSUP - doesn't support get_capabilities ops (valid case). - * - Otherwise, pool create fails. + * Required memory size aligned at page boundary. */ -int -rte_mempool_ops_get_capabilities(const struct rte_mempool *mp, - unsigned int *flags); +ssize_t rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align); + /** - * @internal wrapper for mempool_ops register_memory_area callback. - * API to notify the mempool handler when a new memory area is added to pool. + * @internal wrapper for mempool_ops populate callback. * - * @param mp + * Populate memory pool objects using provided memory chunk. + * + * @param[in] mp + * A pointer to the mempool structure. + * @param[in] max_objs + * Maximum number of objects to be populated. + * @param[in] vaddr + * The virtual address of memory that should be used to store objects. + * @param[in] iova + * The IO address + * @param[in] len + * The length of memory in bytes. + * @param[in] obj_cb + * Callback function to be executed for each populated object. + * @param[in] obj_cb_arg + * An opaque pointer passed to the callback function. + * @return + * The number of objects added on success. + * On error, no objects are populated and a negative errno is returned. + */ +int rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs, + void *vaddr, rte_iova_t iova, size_t len, + rte_mempool_populate_obj_cb_t *obj_cb, + void *obj_cb_arg); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Wrapper for mempool_ops get_info callback. + * + * @param[in] mp * Pointer to the memory pool. - * @param vaddr - * Pointer to the buffer virtual address. - * @param iova - * Pointer to the buffer IO address. - * @param len - * Pool size. + * @param[out] info + * Pointer to the rte_mempool_info structure * @return - * - 0: Success; - * - -ENOTSUP - doesn't support register_memory_area ops (valid error case). - * - Otherwise, rte_mempool_populate_phys fails thus pool create fails. + * - 0: Success; The mempool driver supports retrieving supplementary + * mempool information + * - -ENOTSUP - doesn't support get_info ops (valid case). */ -int -rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, - char *vaddr, rte_iova_t iova, size_t len); +__rte_experimental +int rte_mempool_ops_get_info(const struct rte_mempool *mp, + struct rte_mempool_info *info); /** * @internal wrapper for mempool_ops free callback. @@ -710,8 +932,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior * when using rte_mempool_get() or rte_mempool_get_bulk() is * "single-consumer". Otherwise, it is "multi-consumers". - * - MEMPOOL_F_NO_PHYS_CONTIG: If set, allocated objects won't - * necessarily be contiguous in physical memory. + * - MEMPOOL_F_NO_IOVA_CONTIG: If set, allocated objects won't + * necessarily be contiguous in IO memory. * @return * The pointer to the new allocated mempool, on success. NULL on error * with rte_errno set appropriately. Possible rte_errno values include: @@ -729,72 +951,6 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags); -/** - * Create a new mempool named *name* in memory. - * - * The pool contains n elements of elt_size. Its size is set to n. - * This function uses ``memzone_reserve()`` to allocate the mempool header - * (and the objects if vaddr is NULL). - * Depending on the input parameters, mempool elements can be either allocated - * together with the mempool header, or an externally provided memory buffer - * could be used to store mempool objects. In later case, that external - * memory buffer can consist of set of disjoint physical pages. - * - * @param name - * The name of the mempool. - * @param n - * The number of elements in the mempool. The optimum size (in terms of - * memory usage) for a mempool is when n is a power of two minus one: - * n = (2^q - 1). - * @param elt_size - * The size of each element. - * @param cache_size - * Size of the cache. See rte_mempool_create() for details. - * @param private_data_size - * The size of the private data appended after the mempool - * structure. This is useful for storing some private data after the - * mempool structure, as is done for rte_mbuf_pool for example. - * @param mp_init - * A function pointer that is called for initialization of the pool, - * before object initialization. The user can initialize the private - * data in this function if needed. This parameter can be NULL if - * not needed. - * @param mp_init_arg - * An opaque pointer to data that can be used in the mempool - * constructor function. - * @param obj_init - * A function called for each object at initialization of the pool. - * See rte_mempool_create() for details. - * @param obj_init_arg - * An opaque pointer passed to the object constructor function. - * @param socket_id - * The *socket_id* argument is the socket identifier in the case of - * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA - * constraint for the reserved zone. - * @param flags - * Flags controlling the behavior of the mempool. See - * rte_mempool_create() for details. - * @param vaddr - * Virtual address of the externally allocated memory buffer. - * Will be used to store mempool objects. - * @param iova - * Array of IO addresses of the pages that comprises given memory buffer. - * @param pg_num - * Number of elements in the iova array. - * @param pg_shift - * LOG2 of the physical pages size. - * @return - * The pointer to the new allocated mempool, on success. NULL on error - * with rte_errno set appropriately. See rte_mempool_create() for details. - */ -struct rte_mempool * -rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, - unsigned cache_size, unsigned private_data_size, - rte_mempool_ctor_t *mp_init, void *mp_init_arg, - rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, - int socket_id, unsigned flags, void *vaddr, - const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift); - /** * Create an empty mempool * @@ -877,46 +1033,6 @@ int rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, void *opaque); -__rte_deprecated -int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, - phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, - void *opaque); - -/** - * Add physical memory for objects in the pool at init - * - * Add a virtually contiguous memory chunk in the pool where objects can - * be instantiated. The IO addresses corresponding to the virtual - * area are described in iova[], pg_num, pg_shift. - * - * @param mp - * A pointer to the mempool structure. - * @param vaddr - * The virtual address of memory that should be used to store objects. - * @param iova - * An array of IO addresses of each page composing the virtual area. - * @param pg_num - * Number of elements in the iova array. - * @param pg_shift - * LOG2 of the physical pages size. - * @param free_cb - * The callback used to free this chunk when destroying the mempool. - * @param opaque - * An opaque argument passed to free_cb. - * @return - * The number of objects added on success. - * On error, the chunks are not added in the memory list of the - * mempool and a negative errno is returned. - */ -int rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr, - const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift, - rte_mempool_memchunk_free_cb_t *free_cb, void *opaque); - -__rte_deprecated -int rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, - const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, - rte_mempool_memchunk_free_cb_t *free_cb, void *opaque); - /** * Add virtually contiguous memory for objects in the pool at init * @@ -1048,22 +1164,6 @@ rte_mempool_cache_create(uint32_t size, int socket_id); void rte_mempool_cache_free(struct rte_mempool_cache *cache); -/** - * Flush a user-owned mempool cache to the specified mempool. - * - * @param cache - * A pointer to the mempool cache. - * @param mp - * A pointer to the mempool. - */ -static __rte_always_inline void -rte_mempool_cache_flush(struct rte_mempool_cache *cache, - struct rte_mempool *mp) -{ - rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len); - cache->len = 0; -} - /** * Get a pointer to the per-lcore default mempool cache. * @@ -1086,6 +1186,26 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id) return &mp->local_cache[lcore_id]; } +/** + * Flush a user-owned mempool cache to the specified mempool. + * + * @param cache + * A pointer to the mempool cache. + * @param mp + * A pointer to the mempool. + */ +static __rte_always_inline void +rte_mempool_cache_flush(struct rte_mempool_cache *cache, + struct rte_mempool *mp) +{ + if (cache == NULL) + cache = rte_mempool_default_cache(mp, rte_lcore_id()); + if (cache == NULL || cache->len == 0) + return; + rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len); + cache->len = 0; +} + /** * @internal Put several objects back in the mempool; used internally. * @param mp @@ -1365,6 +1485,49 @@ rte_mempool_get(struct rte_mempool *mp, void **obj_p) return rte_mempool_get_bulk(mp, obj_p, 1); } +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Get a contiguous blocks of objects from the mempool. + * + * If cache is enabled, consider to flush it first, to reuse objects + * as soon as possible. + * + * The application should check that the driver supports the operation + * by calling rte_mempool_ops_get_info() and checking that `contig_block_size` + * is not zero. + * + * @param mp + * A pointer to the mempool structure. + * @param first_obj_table + * A pointer to a pointer to the first object in each block. + * @param n + * The number of blocks to get from mempool. + * @return + * - 0: Success; blocks taken. + * - -ENOBUFS: Not enough entries in the mempool; no object is retrieved. + * - -EOPNOTSUPP: The mempool driver does not support block dequeue + */ +static __rte_always_inline int +__rte_experimental +rte_mempool_get_contig_blocks(struct rte_mempool *mp, + void **first_obj_table, unsigned int n) +{ + int ret; + + ret = rte_mempool_ops_dequeue_contig_blocks(mp, first_obj_table, n); + if (ret == 0) { + __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_success, n); + __mempool_contig_blocks_check_cookies(mp, first_obj_table, n, + 1); + } else { + __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_fail, n); + } + + return ret; +} + /** * Return the number of entries in the mempool. * @@ -1439,7 +1602,7 @@ rte_mempool_empty(const struct rte_mempool *mp) * A pointer (virtual address) to the element of the pool. * @return * The IO address of the elt element. - * If the mempool was created with MEMPOOL_F_NO_PHYS_CONTIG, the + * If the mempool was created with MEMPOOL_F_NO_IOVA_CONTIG, the * returned value is RTE_BAD_IOVA. */ static inline rte_iova_t @@ -1451,13 +1614,6 @@ rte_mempool_virt2iova(const void *elt) return hdr->iova; } -__rte_deprecated -static inline phys_addr_t -rte_mempool_virt2phy(__rte_unused const struct rte_mempool *mp, const void *elt) -{ - return rte_mempool_virt2iova(elt); -} - /** * Check the consistency of mempool objects. * @@ -1526,64 +1682,6 @@ struct rte_mempool *rte_mempool_lookup(const char *name); uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, struct rte_mempool_objsz *sz); -/** - * Get the size of memory required to store mempool elements. - * - * Calculate the maximum amount of memory required to store given number - * of objects. Assume that the memory buffer will be aligned at page - * boundary. - * - * Note that if object size is bigger then page size, then it assumes - * that pages are grouped in subsets of physically continuous pages big - * enough to store at least one object. - * - * @param elt_num - * Number of elements. - * @param total_elt_sz - * The size of each element, including header and trailer, as returned - * by rte_mempool_calc_obj_size(). - * @param pg_shift - * LOG2 of the physical pages size. If set to 0, ignore page boundaries. - * @param flags - * The mempool flags. - * @return - * Required memory size aligned at page boundary. - */ -size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, - uint32_t pg_shift, unsigned int flags); - -/** - * Get the size of memory required to store mempool elements. - * - * Calculate how much memory would be actually required with the given - * memory footprint to store required number of objects. - * - * @param vaddr - * Virtual address of the externally allocated memory buffer. - * Will be used to store mempool objects. - * @param elt_num - * Number of elements. - * @param total_elt_sz - * The size of each element, including header and trailer, as returned - * by rte_mempool_calc_obj_size(). - * @param iova - * Array of IO addresses of the pages that comprises given memory buffer. - * @param pg_num - * Number of elements in the iova array. - * @param pg_shift - * LOG2 of the physical pages size. - * @param flags - * The mempool flags. - * @return - * On success, the number of bytes needed to store given number of - * objects, aligned to the given page size. If the provided memory - * buffer is too small, return a negative value whose absolute value - * is the actual number of elements that can be stored in that buffer. - */ -ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, - size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num, - uint32_t pg_shift, unsigned int flags); - /** * Walk list of all memory pools * diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c index 0732255c..a27e1fa5 100644 --- a/lib/librte_mempool/rte_mempool_ops.c +++ b/lib/librte_mempool/rte_mempool_ops.c @@ -57,8 +57,10 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h) ops->enqueue = h->enqueue; ops->dequeue = h->dequeue; ops->get_count = h->get_count; - ops->get_capabilities = h->get_capabilities; - ops->register_memory_area = h->register_memory_area; + ops->calc_mem_size = h->calc_mem_size; + ops->populate = h->populate; + ops->get_info = h->get_info; + ops->dequeue_contig_blocks = h->dequeue_contig_blocks; rte_spinlock_unlock(&rte_mempool_ops_table.sl); @@ -97,32 +99,57 @@ rte_mempool_ops_get_count(const struct rte_mempool *mp) return ops->get_count(mp); } -/* wrapper to get external mempool capabilities. */ +/* wrapper to notify new memory area to external mempool */ +ssize_t +rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align) +{ + struct rte_mempool_ops *ops; + + ops = rte_mempool_get_ops(mp->ops_index); + + if (ops->calc_mem_size == NULL) + return rte_mempool_op_calc_mem_size_default(mp, obj_num, + pg_shift, min_chunk_size, align); + + return ops->calc_mem_size(mp, obj_num, pg_shift, min_chunk_size, align); +} + +/* wrapper to populate memory pool objects using provided memory chunk */ int -rte_mempool_ops_get_capabilities(const struct rte_mempool *mp, - unsigned int *flags) +rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs, + void *vaddr, rte_iova_t iova, size_t len, + rte_mempool_populate_obj_cb_t *obj_cb, + void *obj_cb_arg) { struct rte_mempool_ops *ops; ops = rte_mempool_get_ops(mp->ops_index); - RTE_FUNC_PTR_OR_ERR_RET(ops->get_capabilities, -ENOTSUP); - return ops->get_capabilities(mp, flags); + if (ops->populate == NULL) + return rte_mempool_op_populate_default(mp, max_objs, vaddr, + iova, len, obj_cb, + obj_cb_arg); + + return ops->populate(mp, max_objs, vaddr, iova, len, obj_cb, + obj_cb_arg); } -/* wrapper to notify new memory area to external mempool */ +/* wrapper to get additional mempool info */ int -rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr, - rte_iova_t iova, size_t len) +rte_mempool_ops_get_info(const struct rte_mempool *mp, + struct rte_mempool_info *info) { struct rte_mempool_ops *ops; ops = rte_mempool_get_ops(mp->ops_index); - RTE_FUNC_PTR_OR_ERR_RET(ops->register_memory_area, -ENOTSUP); - return ops->register_memory_area(mp, vaddr, iova, len); + RTE_FUNC_PTR_OR_ERR_RET(ops->get_info, -ENOTSUP); + return ops->get_info(mp, info); } + /* sets mempool ops previously registered by rte_mempool_register_ops. */ int rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name, diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c new file mode 100644 index 00000000..4e2bfc82 --- /dev/null +++ b/lib/librte_mempool/rte_mempool_ops_default.c @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 6WIND S.A. + * Copyright(c) 2018 Solarflare Communications Inc. + */ + +#include + +ssize_t +rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align) +{ + size_t total_elt_sz; + size_t obj_per_page, pg_num, pg_sz; + size_t mem_size; + + total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; + if (total_elt_sz == 0) { + mem_size = 0; + } else if (pg_shift == 0) { + mem_size = total_elt_sz * obj_num; + } else { + pg_sz = (size_t)1 << pg_shift; + obj_per_page = pg_sz / total_elt_sz; + if (obj_per_page == 0) { + /* + * Note that if object size is bigger than page size, + * then it is assumed that pages are grouped in subsets + * of physically continuous pages big enough to store + * at least one object. + */ + mem_size = + RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * obj_num; + } else { + pg_num = (obj_num + obj_per_page - 1) / obj_per_page; + mem_size = pg_num << pg_shift; + } + } + + *min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz); + + *align = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, (size_t)1 << pg_shift); + + return mem_size; +} + +int +rte_mempool_op_populate_default(struct rte_mempool *mp, unsigned int max_objs, + void *vaddr, rte_iova_t iova, size_t len, + rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg) +{ + size_t total_elt_sz; + size_t off; + unsigned int i; + void *obj; + + total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; + + for (off = 0, i = 0; off + total_elt_sz <= len && i < max_objs; i++) { + off += mp->header_size; + obj = (char *)vaddr + off; + obj_cb(mp, obj_cb_arg, obj, + (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off)); + rte_mempool_ops_enqueue_bulk(mp, &obj, 1); + off += mp->elt_size + mp->trailer_size; + } + + return i; +} diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map index 62b76f91..17cbca46 100644 --- a/lib/librte_mempool/rte_mempool_version.map +++ b/lib/librte_mempool/rte_mempool_version.map @@ -8,9 +8,6 @@ DPDK_2.0 { rte_mempool_list_dump; rte_mempool_lookup; rte_mempool_walk; - rte_mempool_xmem_create; - rte_mempool_xmem_size; - rte_mempool_xmem_usage; local: *; }; @@ -34,8 +31,6 @@ DPDK_16.07 { rte_mempool_ops_table; rte_mempool_populate_anon; rte_mempool_populate_default; - rte_mempool_populate_phys; - rte_mempool_populate_phys_tab; rte_mempool_populate_virt; rte_mempool_register_ops; rte_mempool_set_ops_byname; @@ -45,9 +40,21 @@ DPDK_16.07 { DPDK_17.11 { global: - rte_mempool_ops_get_capabilities; - rte_mempool_ops_register_memory_area; rte_mempool_populate_iova; - rte_mempool_populate_iova_tab; } DPDK_16.07; + +DPDK_18.05 { + global: + + rte_mempool_contig_blocks_check_cookies; + rte_mempool_op_calc_mem_size_default; + rte_mempool_op_populate_default; + +} DPDK_17.11; + +EXPERIMENTAL { + global: + + rte_mempool_ops_get_info; +}; diff --git a/lib/librte_meter/Makefile b/lib/librte_meter/Makefile index 3b80c6ac..2dc071e8 100644 --- a/lib/librte_meter/Makefile +++ b/lib/librte_meter/Makefile @@ -16,7 +16,7 @@ LDLIBS += -lrte_eal EXPORT_MAP := rte_meter_version.map -LIBABIVER := 1 +LIBABIVER := 2 # # all source are stored in SRCS-y diff --git a/lib/librte_meter/meson.build b/lib/librte_meter/meson.build index 646fd4d4..947bc19e 100644 --- a/lib/librte_meter/meson.build +++ b/lib/librte_meter/meson.build @@ -1,5 +1,6 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation +version = 2 sources = files('rte_meter.c') headers = files('rte_meter.h') diff --git a/lib/librte_meter/rte_meter.c b/lib/librte_meter/rte_meter.c index 332c5190..473f69ab 100644 --- a/lib/librte_meter/rte_meter.c +++ b/lib/librte_meter/rte_meter.c @@ -31,61 +31,82 @@ rte_meter_get_tb_params(uint64_t hz, uint64_t rate, uint64_t *tb_period, uint64_ } int -rte_meter_srtcm_config(struct rte_meter_srtcm *m, struct rte_meter_srtcm_params *params) +rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p, + struct rte_meter_srtcm_params *params) { - uint64_t hz; + uint64_t hz = rte_get_tsc_hz(); /* Check input parameters */ - if ((m == NULL) || (params == NULL)) { - return -1; - } + if ((p == NULL) || + (params == NULL) || + (params->cir == 0) || + ((params->cbs == 0) && (params->ebs == 0))) + return -EINVAL; - if ((params->cir == 0) || ((params->cbs == 0) && (params->ebs == 0))) { - return -2; - } + /* Initialize srTCM run-time structure */ + p->cbs = params->cbs; + p->ebs = params->ebs; + rte_meter_get_tb_params(hz, params->cir, &p->cir_period, + &p->cir_bytes_per_period); + + return 0; +} + +int +rte_meter_srtcm_config(struct rte_meter_srtcm *m, + struct rte_meter_srtcm_profile *p) +{ + /* Check input parameters */ + if ((m == NULL) || (p == NULL)) + return -EINVAL; /* Initialize srTCM run-time structure */ - hz = rte_get_tsc_hz(); m->time = rte_get_tsc_cycles(); - m->tc = m->cbs = params->cbs; - m->te = m->ebs = params->ebs; - rte_meter_get_tb_params(hz, params->cir, &m->cir_period, &m->cir_bytes_per_period); - - RTE_LOG(INFO, METER, "Low level srTCM config: \n" - "\tCIR period = %" PRIu64 ", CIR bytes per period = %" PRIu64 "\n", - m->cir_period, m->cir_bytes_per_period); + m->tc = p->cbs; + m->te = p->ebs; return 0; } int -rte_meter_trtcm_config(struct rte_meter_trtcm *m, struct rte_meter_trtcm_params *params) +rte_meter_trtcm_profile_config(struct rte_meter_trtcm_profile *p, + struct rte_meter_trtcm_params *params) { - uint64_t hz; + uint64_t hz = rte_get_tsc_hz(); /* Check input parameters */ - if ((m == NULL) || (params == NULL)) { - return -1; - } + if ((p == NULL) || + (params == NULL) || + (params->cir == 0) || + (params->pir == 0) || + (params->pir < params->cir) || + (params->cbs == 0) || + (params->pbs == 0)) + return -EINVAL; - if ((params->cir == 0) || (params->pir == 0) || (params->pir < params->cir) || - (params->cbs == 0) || (params->pbs == 0)) { - return -2; - } + /* Initialize trTCM run-time structure */ + p->cbs = params->cbs; + p->pbs = params->pbs; + rte_meter_get_tb_params(hz, params->cir, &p->cir_period, + &p->cir_bytes_per_period); + rte_meter_get_tb_params(hz, params->pir, &p->pir_period, + &p->pir_bytes_per_period); + + return 0; +} + +int +rte_meter_trtcm_config(struct rte_meter_trtcm *m, + struct rte_meter_trtcm_profile *p) +{ + /* Check input parameters */ + if ((m == NULL) || (p == NULL)) + return -EINVAL; /* Initialize trTCM run-time structure */ - hz = rte_get_tsc_hz(); m->time_tc = m->time_tp = rte_get_tsc_cycles(); - m->tc = m->cbs = params->cbs; - m->tp = m->pbs = params->pbs; - rte_meter_get_tb_params(hz, params->cir, &m->cir_period, &m->cir_bytes_per_period); - rte_meter_get_tb_params(hz, params->pir, &m->pir_period, &m->pir_bytes_per_period); - - RTE_LOG(INFO, METER, "Low level trTCM config: \n" - "\tCIR period = %" PRIu64 ", CIR bytes per period = %" PRIu64 "\n" - "\tPIR period = %" PRIu64 ", PIR bytes per period = %" PRIu64 "\n", - m->cir_period, m->cir_bytes_per_period, - m->pir_period, m->pir_bytes_per_period); + m->tc = p->cbs; + m->tp = p->pbs; return 0; } diff --git a/lib/librte_meter/rte_meter.h b/lib/librte_meter/rte_meter.h index ebdc453f..58a05158 100644 --- a/lib/librte_meter/rte_meter.h +++ b/lib/librte_meter/rte_meter.h @@ -53,45 +53,87 @@ struct rte_meter_trtcm_params { uint64_t pbs; /**< Peak Burst Size (PBS). Measured in bytes. */ }; +/** + * Internal data structure storing the srTCM configuration profile. Typically + * shared by multiple srTCM objects. + */ +struct rte_meter_srtcm_profile; + +/** + * Internal data structure storing the trTCM configuration profile. Typically + * shared by multiple trTCM objects. + */ +struct rte_meter_trtcm_profile; + /** Internal data structure storing the srTCM run-time context per metered traffic flow. */ struct rte_meter_srtcm; /** Internal data structure storing the trTCM run-time context per metered traffic flow. */ struct rte_meter_trtcm; +/** + * srTCM profile configuration + * + * @param p + * Pointer to pre-allocated srTCM profile data structure + * @param params + * srTCM profile parameters + * @return + * 0 upon success, error code otherwise + */ +int +rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p, + struct rte_meter_srtcm_params *params); + +/** + * trTCM profile configuration + * + * @param p + * Pointer to pre-allocated trTCM profile data structure + * @param params + * trTCM profile parameters + * @return + * 0 upon success, error code otherwise + */ +int +rte_meter_trtcm_profile_config(struct rte_meter_trtcm_profile *p, + struct rte_meter_trtcm_params *params); + /** * srTCM configuration per metered traffic flow * * @param m * Pointer to pre-allocated srTCM data structure - * @param params - * User parameters per srTCM metered traffic flow + * @param p + * srTCM profile. Needs to be valid. * @return * 0 upon success, error code otherwise */ int rte_meter_srtcm_config(struct rte_meter_srtcm *m, - struct rte_meter_srtcm_params *params); + struct rte_meter_srtcm_profile *p); /** * trTCM configuration per metered traffic flow * * @param m * Pointer to pre-allocated trTCM data structure - * @param params - * User parameters per trTCM metered traffic flow + * @param p + * trTCM profile. Needs to be valid. * @return * 0 upon success, error code otherwise */ int rte_meter_trtcm_config(struct rte_meter_trtcm *m, - struct rte_meter_trtcm_params *params); + struct rte_meter_trtcm_profile *p); /** * srTCM color blind traffic metering * * @param m * Handle to srTCM instance + * @param p + * srTCM profile specified at srTCM object creation time * @param time * Current CPU time stamp (measured in CPU cycles) * @param pkt_len @@ -101,6 +143,7 @@ rte_meter_trtcm_config(struct rte_meter_trtcm *m, */ static inline enum rte_meter_color rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m, + struct rte_meter_srtcm_profile *p, uint64_t time, uint32_t pkt_len); @@ -109,6 +152,8 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m, * * @param m * Handle to srTCM instance + * @param p + * srTCM profile specified at srTCM object creation time * @param time * Current CPU time stamp (measured in CPU cycles) * @param pkt_len @@ -120,6 +165,7 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m, */ static inline enum rte_meter_color rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m, + struct rte_meter_srtcm_profile *p, uint64_t time, uint32_t pkt_len, enum rte_meter_color pkt_color); @@ -129,6 +175,8 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m, * * @param m * Handle to trTCM instance + * @param p + * trTCM profile specified at trTCM object creation time * @param time * Current CPU time stamp (measured in CPU cycles) * @param pkt_len @@ -138,6 +186,7 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m, */ static inline enum rte_meter_color rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m, + struct rte_meter_trtcm_profile *p, uint64_t time, uint32_t pkt_len); @@ -146,6 +195,8 @@ rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m, * * @param m * Handle to trTCM instance + * @param p + * trTCM profile specified at trTCM object creation time * @param time * Current CPU time stamp (measured in CPU cycles) * @param pkt_len @@ -157,6 +208,7 @@ rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m, */ static inline enum rte_meter_color rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m, + struct rte_meter_trtcm_profile *p, uint64_t time, uint32_t pkt_len, enum rte_meter_color pkt_color); @@ -166,33 +218,57 @@ rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m, * ***/ +struct rte_meter_srtcm_profile { + uint64_t cbs; + /**< Upper limit for C token bucket */ + uint64_t ebs; + /**< Upper limit for E token bucket */ + uint64_t cir_period; + /**< Number of CPU cycles for each update of C and E token buckets */ + uint64_t cir_bytes_per_period; + /**< Number of bytes to add to C and E token buckets on each update */ +}; + /* Internal data structure storing the srTCM run-time context per metered traffic flow. */ struct rte_meter_srtcm { uint64_t time; /* Time of latest update of C and E token buckets */ uint64_t tc; /* Number of bytes currently available in the committed (C) token bucket */ uint64_t te; /* Number of bytes currently available in the excess (E) token bucket */ - uint64_t cbs; /* Upper limit for C token bucket */ - uint64_t ebs; /* Upper limit for E token bucket */ - uint64_t cir_period; /* Number of CPU cycles for one update of C and E token buckets */ - uint64_t cir_bytes_per_period; /* Number of bytes to add to C and E token buckets on each update */ }; -/* Internal data structure storing the trTCM run-time context per metered traffic flow. */ +struct rte_meter_trtcm_profile { + uint64_t cbs; + /**< Upper limit for C token bucket */ + uint64_t pbs; + /**< Upper limit for P token bucket */ + uint64_t cir_period; + /**< Number of CPU cycles for one update of C token bucket */ + uint64_t cir_bytes_per_period; + /**< Number of bytes to add to C token bucket on each update */ + uint64_t pir_period; + /**< Number of CPU cycles for one update of P token bucket */ + uint64_t pir_bytes_per_period; + /**< Number of bytes to add to P token bucket on each update */ +}; + +/** + * Internal data structure storing the trTCM run-time context per metered + * traffic flow. + */ struct rte_meter_trtcm { - uint64_t time_tc; /* Time of latest update of C token bucket */ - uint64_t time_tp; /* Time of latest update of E token bucket */ - uint64_t tc; /* Number of bytes currently available in the committed (C) token bucket */ - uint64_t tp; /* Number of bytes currently available in the peak (P) token bucket */ - uint64_t cbs; /* Upper limit for C token bucket */ - uint64_t pbs; /* Upper limit for P token bucket */ - uint64_t cir_period; /* Number of CPU cycles for one update of C token bucket */ - uint64_t cir_bytes_per_period; /* Number of bytes to add to C token bucket on each update */ - uint64_t pir_period; /* Number of CPU cycles for one update of P token bucket */ - uint64_t pir_bytes_per_period; /* Number of bytes to add to P token bucket on each update */ + uint64_t time_tc; + /**< Time of latest update of C token bucket */ + uint64_t time_tp; + /**< Time of latest update of E token bucket */ + uint64_t tc; + /**< Number of bytes currently available in committed(C) token bucket */ + uint64_t tp; + /**< Number of bytes currently available in the peak(P) token bucket */ }; static inline enum rte_meter_color rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m, + struct rte_meter_srtcm_profile *p, uint64_t time, uint32_t pkt_len) { @@ -200,17 +276,17 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m, /* Bucket update */ time_diff = time - m->time; - n_periods = time_diff / m->cir_period; - m->time += n_periods * m->cir_period; + n_periods = time_diff / p->cir_period; + m->time += n_periods * p->cir_period; /* Put the tokens overflowing from tc into te bucket */ - tc = m->tc + n_periods * m->cir_bytes_per_period; + tc = m->tc + n_periods * p->cir_bytes_per_period; te = m->te; - if (tc > m->cbs) { - te += (tc - m->cbs); - if (te > m->ebs) - te = m->ebs; - tc = m->cbs; + if (tc > p->cbs) { + te += (tc - p->cbs); + if (te > p->ebs) + te = p->ebs; + tc = p->cbs; } /* Color logic */ @@ -233,6 +309,7 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m, static inline enum rte_meter_color rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m, + struct rte_meter_srtcm_profile *p, uint64_t time, uint32_t pkt_len, enum rte_meter_color pkt_color) @@ -241,17 +318,17 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m, /* Bucket update */ time_diff = time - m->time; - n_periods = time_diff / m->cir_period; - m->time += n_periods * m->cir_period; + n_periods = time_diff / p->cir_period; + m->time += n_periods * p->cir_period; /* Put the tokens overflowing from tc into te bucket */ - tc = m->tc + n_periods * m->cir_bytes_per_period; + tc = m->tc + n_periods * p->cir_bytes_per_period; te = m->te; - if (tc > m->cbs) { - te += (tc - m->cbs); - if (te > m->ebs) - te = m->ebs; - tc = m->cbs; + if (tc > p->cbs) { + te += (tc - p->cbs); + if (te > p->ebs) + te = p->ebs; + tc = p->cbs; } /* Color logic */ @@ -274,6 +351,7 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m, static inline enum rte_meter_color rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m, + struct rte_meter_trtcm_profile *p, uint64_t time, uint32_t pkt_len) { @@ -282,18 +360,18 @@ rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m, /* Bucket update */ time_diff_tc = time - m->time_tc; time_diff_tp = time - m->time_tp; - n_periods_tc = time_diff_tc / m->cir_period; - n_periods_tp = time_diff_tp / m->pir_period; - m->time_tc += n_periods_tc * m->cir_period; - m->time_tp += n_periods_tp * m->pir_period; + n_periods_tc = time_diff_tc / p->cir_period; + n_periods_tp = time_diff_tp / p->pir_period; + m->time_tc += n_periods_tc * p->cir_period; + m->time_tp += n_periods_tp * p->pir_period; - tc = m->tc + n_periods_tc * m->cir_bytes_per_period; - if (tc > m->cbs) - tc = m->cbs; + tc = m->tc + n_periods_tc * p->cir_bytes_per_period; + if (tc > p->cbs) + tc = p->cbs; - tp = m->tp + n_periods_tp * m->pir_bytes_per_period; - if (tp > m->pbs) - tp = m->pbs; + tp = m->tp + n_periods_tp * p->pir_bytes_per_period; + if (tp > p->pbs) + tp = p->pbs; /* Color logic */ if (tp < pkt_len) { @@ -315,6 +393,7 @@ rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m, static inline enum rte_meter_color rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m, + struct rte_meter_trtcm_profile *p, uint64_t time, uint32_t pkt_len, enum rte_meter_color pkt_color) @@ -324,18 +403,18 @@ rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m, /* Bucket update */ time_diff_tc = time - m->time_tc; time_diff_tp = time - m->time_tp; - n_periods_tc = time_diff_tc / m->cir_period; - n_periods_tp = time_diff_tp / m->pir_period; - m->time_tc += n_periods_tc * m->cir_period; - m->time_tp += n_periods_tp * m->pir_period; - - tc = m->tc + n_periods_tc * m->cir_bytes_per_period; - if (tc > m->cbs) - tc = m->cbs; - - tp = m->tp + n_periods_tp * m->pir_bytes_per_period; - if (tp > m->pbs) - tp = m->pbs; + n_periods_tc = time_diff_tc / p->cir_period; + n_periods_tp = time_diff_tp / p->pir_period; + m->time_tc += n_periods_tc * p->cir_period; + m->time_tp += n_periods_tp * p->pir_period; + + tc = m->tc + n_periods_tc * p->cir_bytes_per_period; + if (tc > p->cbs) + tc = p->cbs; + + tp = m->tp + n_periods_tp * p->pir_bytes_per_period; + if (tp > p->pbs) + tp = p->pbs; /* Color logic */ if ((pkt_color == e_RTE_METER_RED) || (tp < pkt_len)) { diff --git a/lib/librte_meter/rte_meter_version.map b/lib/librte_meter/rte_meter_version.map index 2fd647c6..cb79f0c2 100644 --- a/lib/librte_meter/rte_meter_version.map +++ b/lib/librte_meter/rte_meter_version.map @@ -10,3 +10,10 @@ DPDK_2.0 { local: *; }; + +DPDK_18.08 { + global: + + rte_meter_srtcm_profile_config; + rte_meter_trtcm_profile_config; +}; diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c index 556ae1ba..99a96b65 100644 --- a/lib/librte_metrics/rte_metrics.c +++ b/lib/librte_metrics/rte_metrics.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -95,6 +96,9 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names) /* Some sanity checks */ if (cnt_names < 1 || names == NULL) return -EINVAL; + for (idx_name = 0; idx_name < cnt_names; idx_name++) + if (names[idx_name] == NULL) + return -EINVAL; memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); if (memzone == NULL) @@ -113,10 +117,7 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names) for (idx_name = 0; idx_name < cnt_names; idx_name++) { entry = &stats->metadata[idx_name + stats->cnt_stats]; - strncpy(entry->name, names[idx_name], - RTE_METRICS_MAX_NAME_LEN); - /* Enforce NULL-termination */ - entry->name[RTE_METRICS_MAX_NAME_LEN - 1] = '\0'; + strlcpy(entry->name, names[idx_name], RTE_METRICS_MAX_NAME_LEN); memset(entry->value, 0, sizeof(entry->value)); entry->idx_next_stat = idx_name + stats->cnt_stats + 1; } @@ -161,6 +162,11 @@ rte_metrics_update_values(int port_id, stats = memzone->addr; rte_spinlock_lock(&stats->lock); + + if (key >= stats->cnt_stats) { + rte_spinlock_unlock(&stats->lock); + return -EINVAL; + } idx_metric = key; cnt_setsize = 1; while (idx_metric < stats->cnt_stats) { @@ -202,9 +208,8 @@ rte_metrics_get_names(struct rte_metric_name *names, int return_value; memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); - /* If not allocated, fail silently */ if (memzone == NULL) - return 0; + return -EIO; stats = memzone->addr; rte_spinlock_lock(&stats->lock); @@ -215,7 +220,7 @@ rte_metrics_get_names(struct rte_metric_name *names, return return_value; } for (idx_name = 0; idx_name < stats->cnt_stats; idx_name++) - strncpy(names[idx_name].name, + strlcpy(names[idx_name].name, stats->metadata[idx_name].name, RTE_METRICS_MAX_NAME_LEN); } @@ -240,9 +245,9 @@ rte_metrics_get_values(int port_id, return -EINVAL; memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); - /* If not allocated, fail silently */ if (memzone == NULL) - return 0; + return -EIO; + stats = memzone->addr; rte_spinlock_lock(&stats->lock); diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile index 95ff5490..85e403f4 100644 --- a/lib/librte_net/Makefile +++ b/lib/librte_net/Makefile @@ -5,6 +5,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_net.a +CFLAGS += -DALLOW_EXPERIMENTAL_API CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 LDLIBS += -lrte_mbuf -lrte_eal -lrte_mempool diff --git a/lib/librte_net/meson.build b/lib/librte_net/meson.build index 78c0f03e..d3ea1feb 100644 --- a/lib/librte_net/meson.build +++ b/lib/librte_net/meson.build @@ -2,6 +2,7 @@ # Copyright(c) 2017 Intel Corporation version = 1 +allow_experimental_apis = true headers = files('rte_ip.h', 'rte_tcp.h', 'rte_udp.h', diff --git a/lib/librte_net/rte_esp.h b/lib/librte_net/rte_esp.h index 0fc99ac6..f77ec2eb 100644 --- a/lib/librte_net/rte_esp.h +++ b/lib/librte_net/rte_esp.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright (c) 2016-2017, Mellanox Technologies. + * Copyright 2016 Mellanox Technologies, Ltd */ #ifndef _RTE_ESP_H_ diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h index 45daa911..bee2b34f 100644 --- a/lib/librte_net/rte_ether.h +++ b/lib/librte_net/rte_ether.h @@ -210,7 +210,7 @@ static inline void eth_random_addr(uint8_t *addr) uint8_t *p = (uint8_t *)&rand; rte_memcpy(addr, p, ETHER_ADDR_LEN); - addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */ + addr[0] &= (uint8_t)~ETHER_GROUP_ADDR; /* clear multicast bit */ addr[0] |= ETHER_LOCAL_ADMIN_ADDR; /* set local assignment bit */ } @@ -301,6 +301,7 @@ struct vxlan_hdr { #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ #define ETHER_TYPE_QINQ 0x88A8 /**< IEEE 802.1ad QinQ tagging. */ +#define ETHER_TYPE_ETAG 0x893F /**< IEEE 802.1BR E-Tag. */ #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ #define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ #define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */ @@ -309,6 +310,31 @@ struct vxlan_hdr { #define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)) /**< VXLAN tunnel header length. */ +/** + * VXLAN-GPE protocol header (draft-ietf-nvo3-vxlan-gpe-05). + * Contains the 8-bit flag, 8-bit next-protocol, 24-bit VXLAN Network + * Identifier and Reserved fields (16 bits and 8 bits). + */ +struct vxlan_gpe_hdr { + uint8_t vx_flags; /**< flag (8). */ + uint8_t reserved[2]; /**< Reserved (16). */ + uint8_t proto; /**< next-protocol (8). */ + uint32_t vx_vni; /**< VNI (24) + Reserved (8). */ +} __attribute__((__packed__)); + +/* VXLAN-GPE next protocol types */ +#define VXLAN_GPE_TYPE_IPV4 1 /**< IPv4 Protocol. */ +#define VXLAN_GPE_TYPE_IPV6 2 /**< IPv6 Protocol. */ +#define VXLAN_GPE_TYPE_ETH 3 /**< Ethernet Protocol. */ +#define VXLAN_GPE_TYPE_NSH 4 /**< NSH Protocol. */ +#define VXLAN_GPE_TYPE_MPLS 5 /**< MPLS Protocol. */ +#define VXLAN_GPE_TYPE_GBP 6 /**< GBP Protocol. */ +#define VXLAN_GPE_TYPE_VBNG 7 /**< vBNG Protocol. */ + +#define ETHER_VXLAN_GPE_HLEN (sizeof(struct udp_hdr) + \ + sizeof(struct vxlan_gpe_hdr)) +/**< VXLAN-GPE tunnel header length. */ + /** * Extract VLAN tag information into mbuf * @@ -324,11 +350,12 @@ static inline int rte_vlan_strip(struct rte_mbuf *m) { struct ether_hdr *eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + struct vlan_hdr *vh; if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) return -1; - struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); + vh = (struct vlan_hdr *)(eh + 1); m->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h index f32684c6..f2a8904a 100644 --- a/lib/librte_net/rte_ip.h +++ b/lib/librte_net/rte_ip.h @@ -108,25 +108,25 @@ __rte_raw_cksum(const void *buf, size_t len, uint32_t sum) /* workaround gcc strict-aliasing warning */ uintptr_t ptr = (uintptr_t)buf; typedef uint16_t __attribute__((__may_alias__)) u16_p; - const u16_p *u16 = (const u16_p *)ptr; - - while (len >= (sizeof(*u16) * 4)) { - sum += u16[0]; - sum += u16[1]; - sum += u16[2]; - sum += u16[3]; - len -= sizeof(*u16) * 4; - u16 += 4; + const u16_p *u16_buf = (const u16_p *)ptr; + + while (len >= (sizeof(*u16_buf) * 4)) { + sum += u16_buf[0]; + sum += u16_buf[1]; + sum += u16_buf[2]; + sum += u16_buf[3]; + len -= sizeof(*u16_buf) * 4; + u16_buf += 4; } - while (len >= sizeof(*u16)) { - sum += *u16; - len -= sizeof(*u16); - u16 += 1; + while (len >= sizeof(*u16_buf)) { + sum += *u16_buf; + len -= sizeof(*u16_buf); + u16_buf += 1; } /* if length is in odd bytes */ if (len == 1) - sum += *((const uint8_t *)u16); + sum += *((const uint8_t *)u16_buf); return sum; } @@ -222,7 +222,7 @@ rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len, for (;;) { tmp = __rte_raw_cksum(buf, seglen, 0); if (done & 1) - tmp = rte_bswap16(tmp); + tmp = rte_bswap16((uint16_t)tmp); sum += tmp; done += seglen; if (done == len) @@ -253,7 +253,7 @@ rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr) { uint16_t cksum; cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr)); - return (cksum == 0xffff) ? cksum : ~cksum; + return (cksum == 0xffff) ? cksum : (uint16_t)~cksum; } /** @@ -318,8 +318,8 @@ rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) uint32_t cksum; uint32_t l4_len; - l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - - sizeof(struct ipv4_hdr); + l4_len = (uint32_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) - + sizeof(struct ipv4_hdr)); cksum = rte_raw_cksum(l4_hdr, l4_len); cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); @@ -329,7 +329,7 @@ rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) if (cksum == 0) cksum = 0xffff; - return cksum; + return (uint16_t)cksum; } /** @@ -375,7 +375,7 @@ rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr, uint64_t ol_flags) uint32_t proto; /* L4 protocol - top 3 bytes must be zero */ } psd_hdr; - psd_hdr.proto = (ipv6_hdr->proto << 24); + psd_hdr.proto = (uint32_t)(ipv6_hdr->proto << 24); if (ol_flags & PKT_TX_TCP_SEG) { psd_hdr.len = 0; } else { @@ -418,7 +418,7 @@ rte_ipv6_udptcp_cksum(const struct ipv6_hdr *ipv6_hdr, const void *l4_hdr) if (cksum == 0) cksum = 0xffff; - return cksum; + return (uint16_t)cksum; } #ifdef __cplusplus diff --git a/lib/librte_net/rte_net.c b/lib/librte_net/rte_net.c index 56a13e3c..9eb7c743 100644 --- a/lib/librte_net/rte_net.c +++ b/lib/librte_net/rte_net.c @@ -178,8 +178,8 @@ ip4_hlen(const struct ipv4_hdr *hdr) } /* parse ipv6 extended headers, update offset and return next proto */ -static uint16_t -skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off, +int __rte_experimental +rte_net_skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off, int *frag) { struct ext_hdr { @@ -201,7 +201,7 @@ skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off, xh = rte_pktmbuf_read(m, *off, sizeof(*xh), &xh_copy); if (xh == NULL) - return 0; + return -1; *off += (xh->len + 1) * 8; proto = xh->next_hdr; break; @@ -209,7 +209,7 @@ skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off, xh = rte_pktmbuf_read(m, *off, sizeof(*xh), &xh_copy); if (xh == NULL) - return 0; + return -1; *off += 8; proto = xh->next_hdr; *frag = 1; @@ -220,7 +220,7 @@ skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off, return proto; } } - return 0; + return -1; } /* parse mbuf data to get packet type */ @@ -233,6 +233,7 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m, uint32_t pkt_type = RTE_PTYPE_L2_ETHER; uint32_t off = 0; uint16_t proto; + int ret; if (hdr_lens == NULL) hdr_lens = &local_hdr_lens; @@ -316,7 +317,10 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m, off += hdr_lens->l3_len; pkt_type |= ptype_l3_ip6(proto); if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) { - proto = skip_ip6_ext(proto, m, &off, &frag); + ret = rte_net_skip_ip6_ext(proto, m, &off, &frag); + if (ret < 0) + return pkt_type; + proto = ret; hdr_lens->l3_len = off - hdr_lens->l2_len; } if (proto == 0) @@ -449,7 +453,10 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m, uint32_t prev_off; prev_off = off; - proto = skip_ip6_ext(proto, m, &off, &frag); + ret = rte_net_skip_ip6_ext(proto, m, &off, &frag); + if (ret < 0) + return pkt_type; + proto = ret; hdr_lens->inner_l3_len += off - prev_off; } if (proto == 0) diff --git a/lib/librte_net/rte_net.h b/lib/librte_net/rte_net.h index 0e97901f..b6ab6e1d 100644 --- a/lib/librte_net/rte_net.h +++ b/lib/librte_net/rte_net.h @@ -28,6 +28,33 @@ struct rte_net_hdr_lens { uint8_t inner_l4_len; }; +/** + * Skip IPv6 header extensions. + * + * This function skips all IPv6 extensions, returning size of + * complete header including options and final protocol value. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * @param proto + * Protocol field of IPv6 header. + * @param m + * The packet mbuf to be parsed. + * @param off + * On input, must contain the offset to the first byte following + * IPv6 header, on output, contains offset to the first byte + * of next layer (after any IPv6 extension header) + * @param frag + * Contains 1 in output if packet is an IPv6 fragment. + * @return + * Protocol that follows IPv6 header. + * -1 if an error occurs during mbuf parsing. + */ +int __rte_experimental +rte_net_skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off, + int *frag); + /** * Parse an Ethernet packet to get its packet type. * diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map index 213e6fd3..26c06e7c 100644 --- a/lib/librte_net/rte_net_version.map +++ b/lib/librte_net/rte_net_version.map @@ -17,4 +17,5 @@ EXPERIMENTAL { global: rte_net_make_rarp_packet; -} DPDK_17.05; + rte_net_skip_ip6_ext; +}; diff --git a/lib/librte_pci/Makefile b/lib/librte_pci/Makefile index fe213ea6..94a63267 100644 --- a/lib/librte_pci/Makefile +++ b/lib/librte_pci/Makefile @@ -1,33 +1,5 @@ -# BSD LICENSE -# -# Copyright(c) 2017 6WIND S.A. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of 6WIND nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 6WIND S.A. include $(RTE_SDK)/mk/rte.vars.mk diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c index 1a6d7485..530738db 100644 --- a/lib/librte_pci/rte_pci.c +++ b/lib/librte_pci/rte_pci.c @@ -155,9 +155,10 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, MAP_SHARED | additional_flags, fd, offset); if (mapaddr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", - __func__, fd, requested_addr, - (unsigned long)size, (unsigned long)offset, + RTE_LOG(ERR, EAL, + "%s(): cannot mmap(%d, %p, 0x%zx, 0x%llx): %s (%p)\n", + __func__, fd, requested_addr, size, + (unsigned long long)offset, strerror(errno), mapaddr); } else RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); @@ -174,8 +175,8 @@ pci_unmap_resource(void *requested_addr, size_t size) /* Unmap the PCI memory resource of device */ if (munmap(requested_addr, size)) { - RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n", - __func__, requested_addr, (unsigned long)size, + RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, %#zx): %s\n", + __func__, requested_addr, size, strerror(errno)); } else RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n", diff --git a/lib/librte_pci/rte_pci_version.map b/lib/librte_pci/rte_pci_version.map index 15d93d95..c0280277 100644 --- a/lib/librte_pci/rte_pci_version.map +++ b/lib/librte_pci/rte_pci_version.map @@ -3,12 +3,11 @@ DPDK_17.11 { eal_parse_pci_BDF; eal_parse_pci_DomBDF; - rte_pci_addr_cmp; - rte_pci_addr_parse; - rte_pci_device_name; pci_map_resource; pci_unmap_resource; rte_eal_compare_pci_addr; + rte_pci_addr_cmp; + rte_pci_addr_parse; rte_pci_device_name; local: *; diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile index 98fa752e..0ee0fa1a 100644 --- a/lib/librte_pdump/Makefile +++ b/lib/librte_pdump/Makefile @@ -1,11 +1,12 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2016 Intel Corporation +# Copyright(c) 2016-2018 Intel Corporation include $(RTE_SDK)/mk/rte.vars.mk # library name LIB = librte_pdump.a +CFLAGS += -DALLOW_EXPERIMENTAL_API CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 CFLAGS += -D_GNU_SOURCE LDLIBS += -lpthread diff --git a/lib/librte_pdump/meson.build b/lib/librte_pdump/meson.build index 3a95eabd..be80904b 100644 --- a/lib/librte_pdump/meson.build +++ b/lib/librte_pdump/meson.build @@ -1,6 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation +version = 2 sources = files('rte_pdump.c') headers = files('rte_pdump.h') +allow_experimental_apis = true deps += ['ethdev'] diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c index ec8a5d84..6c3a8858 100644 --- a/lib/librte_pdump/rte_pdump.c +++ b/lib/librte_pdump/rte_pdump.c @@ -1,35 +1,24 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2016 Intel Corporation + * Copyright(c) 2016-2018 Intel Corporation */ -#include -#include -#include -#include -#include -#include -#include -#include - #include #include #include #include #include #include +#include #include "rte_pdump.h" -#define SOCKET_PATH_VAR_RUN "/var/run" -#define SOCKET_PATH_HOME "HOME" -#define DPDK_DIR "/.dpdk" -#define SOCKET_DIR "/pdump_sockets" -#define SERVER_SOCKET "%s/pdump_server_socket" -#define CLIENT_SOCKET "%s/pdump_client_socket_%d_%u" #define DEVICE_ID_SIZE 64 /* Macros for printing using RTE_LOG */ #define RTE_LOGTYPE_PDUMP RTE_LOGTYPE_USER1 +/* Used for the multi-process communication */ +#define PDUMP_MP "mp_pdump" + enum pdump_operation { DISABLE = 1, ENABLE = 2 @@ -39,11 +28,6 @@ enum pdump_version { V1 = 1 }; -static pthread_t pdump_thread; -static int pdump_socket_fd; -static char server_socket_dir[PATH_MAX]; -static char client_socket_dir[PATH_MAX]; - struct pdump_request { uint16_t ver; uint16_t op; @@ -75,7 +59,7 @@ struct pdump_response { static struct pdump_rxtx_cbs { struct rte_ring *ring; struct rte_mempool *mp; - struct rte_eth_rxtx_callback *cb; + const struct rte_eth_rxtx_callback *cb; void *filter; } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT], tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; @@ -307,7 +291,7 @@ pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, } static int -set_pdump_rxtx_cbs(struct pdump_request *p) +set_pdump_rxtx_cbs(const struct pdump_request *p) { uint16_t nb_rx_q = 0, nb_tx_q = 0, end_q, queue; uint16_t port; @@ -391,312 +375,50 @@ set_pdump_rxtx_cbs(struct pdump_request *p) return ret; } -/* get socket path (/var/run if root, $HOME otherwise) */ static int -pdump_get_socket_path(char *buffer, int bufsz, enum rte_pdump_socktype type) +pdump_server(const struct rte_mp_msg *mp_msg, const void *peer) { - char dpdk_dir[PATH_MAX] = {0}; - char dir[PATH_MAX] = {0}; - char *dir_home = NULL; - int ret = 0; - - if (type == RTE_PDUMP_SOCKET_SERVER && server_socket_dir[0] != 0) - snprintf(dir, sizeof(dir), "%s", server_socket_dir); - else if (type == RTE_PDUMP_SOCKET_CLIENT && client_socket_dir[0] != 0) - snprintf(dir, sizeof(dir), "%s", client_socket_dir); - else { - if (getuid() != 0) { - dir_home = getenv(SOCKET_PATH_HOME); - if (!dir_home) { - RTE_LOG(ERR, PDUMP, - "Failed to get environment variable" - " value for %s, %s:%d\n", - SOCKET_PATH_HOME, __func__, __LINE__); - return -1; - } - snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s", - dir_home, DPDK_DIR); - } else - snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s", - SOCKET_PATH_VAR_RUN, DPDK_DIR); - - mkdir(dpdk_dir, 0700); - snprintf(dir, sizeof(dir), "%s%s", - dpdk_dir, SOCKET_DIR); - } - - ret = mkdir(dir, 0700); - /* if user passed socket path is invalid, return immediately */ - if (ret < 0 && errno != EEXIST) { - RTE_LOG(ERR, PDUMP, - "Failed to create dir:%s:%s\n", dir, - strerror(errno)); - rte_errno = errno; - return -1; - } - - if (type == RTE_PDUMP_SOCKET_SERVER) - snprintf(buffer, bufsz, SERVER_SOCKET, dir); - else - snprintf(buffer, bufsz, CLIENT_SOCKET, dir, getpid(), - rte_sys_gettid()); - - return 0; -} - -static int -pdump_create_server_socket(void) -{ - int ret, socket_fd; - struct sockaddr_un addr; - socklen_t addr_len; - - ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path), - RTE_PDUMP_SOCKET_SERVER); - if (ret != 0) { - RTE_LOG(ERR, PDUMP, - "Failed to get server socket path: %s:%d\n", - __func__, __LINE__); - return -1; - } - addr.sun_family = AF_UNIX; - - /* remove if file already exists */ - unlink(addr.sun_path); - - /* set up a server socket */ - socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0); - if (socket_fd < 0) { - RTE_LOG(ERR, PDUMP, - "Failed to create server socket: %s, %s:%d\n", - strerror(errno), __func__, __LINE__); - return -1; - } - - addr_len = sizeof(struct sockaddr_un); - ret = bind(socket_fd, (struct sockaddr *) &addr, addr_len); - if (ret) { - RTE_LOG(ERR, PDUMP, - "Failed to bind to server socket: %s, %s:%d\n", - strerror(errno), __func__, __LINE__); - close(socket_fd); + struct rte_mp_msg mp_resp; + const struct pdump_request *cli_req; + struct pdump_response *resp = (struct pdump_response *)&mp_resp.param; + + /* recv client requests */ + if (mp_msg->len_param != sizeof(*cli_req)) { + RTE_LOG(ERR, PDUMP, "failed to recv from client\n"); + resp->err_value = -EINVAL; + } else { + cli_req = (const struct pdump_request *)mp_msg->param; + resp->ver = cli_req->ver; + resp->res_op = cli_req->op; + resp->err_value = set_pdump_rxtx_cbs(cli_req); + } + + strlcpy(mp_resp.name, PDUMP_MP, RTE_MP_MAX_NAME_LEN); + mp_resp.len_param = sizeof(*resp); + mp_resp.num_fds = 0; + if (rte_mp_reply(&mp_resp, peer) < 0) { + RTE_LOG(ERR, PDUMP, "failed to send to client:%s, %s:%d\n", + strerror(rte_errno), __func__, __LINE__); return -1; } - /* save the socket in local configuration */ - pdump_socket_fd = socket_fd; - return 0; } -static __attribute__((noreturn)) void * -pdump_thread_main(__rte_unused void *arg) -{ - struct sockaddr_un cli_addr; - socklen_t cli_len; - struct pdump_request cli_req; - struct pdump_response resp; - int n; - int ret = 0; - - /* host thread, never break out */ - for (;;) { - /* recv client requests */ - cli_len = sizeof(cli_addr); - n = recvfrom(pdump_socket_fd, &cli_req, - sizeof(struct pdump_request), 0, - (struct sockaddr *)&cli_addr, &cli_len); - if (n < 0) { - RTE_LOG(ERR, PDUMP, - "failed to recv from client:%s, %s:%d\n", - strerror(errno), __func__, __LINE__); - continue; - } - - ret = set_pdump_rxtx_cbs(&cli_req); - - resp.ver = cli_req.ver; - resp.res_op = cli_req.op; - resp.err_value = ret; - n = sendto(pdump_socket_fd, &resp, - sizeof(struct pdump_response), - 0, (struct sockaddr *)&cli_addr, cli_len); - if (n < 0) { - RTE_LOG(ERR, PDUMP, - "failed to send to client:%s, %s:%d\n", - strerror(errno), __func__, __LINE__); - } - } -} - int -rte_pdump_init(const char *path) +rte_pdump_init(const char *path __rte_unused) { - int ret = 0; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; - - ret = rte_pdump_set_socket_dir(path, RTE_PDUMP_SOCKET_SERVER); - if (ret != 0) - return -1; - - ret = pdump_create_server_socket(); - if (ret != 0) { - RTE_LOG(ERR, PDUMP, - "Failed to create server socket:%s:%d\n", - __func__, __LINE__); - return -1; - } - - /* create the host thread to wait/handle pdump requests */ - ret = pthread_create(&pdump_thread, NULL, pdump_thread_main, NULL); - if (ret != 0) { - RTE_LOG(ERR, PDUMP, - "Failed to create the pdump thread:%s, %s:%d\n", - strerror(ret), __func__, __LINE__); - return -1; - } - /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pdump-thread"); - ret = rte_thread_setname(pdump_thread, thread_name); - if (ret != 0) { - RTE_LOG(DEBUG, PDUMP, - "Failed to set thread name for pdump handling\n"); - } - - return 0; + return rte_mp_action_register(PDUMP_MP, pdump_server); } int rte_pdump_uninit(void) { - int ret; - - ret = pthread_cancel(pdump_thread); - if (ret != 0) { - RTE_LOG(ERR, PDUMP, - "Failed to cancel the pdump thread:%s, %s:%d\n", - strerror(ret), __func__, __LINE__); - return -1; - } - - ret = close(pdump_socket_fd); - if (ret != 0) { - RTE_LOG(ERR, PDUMP, - "Failed to close server socket: %s, %s:%d\n", - strerror(errno), __func__, __LINE__); - return -1; - } - - struct sockaddr_un addr; - - ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path), - RTE_PDUMP_SOCKET_SERVER); - if (ret != 0) { - RTE_LOG(ERR, PDUMP, - "Failed to get server socket path: %s:%d\n", - __func__, __LINE__); - return -1; - } - ret = unlink(addr.sun_path); - if (ret != 0) { - RTE_LOG(ERR, PDUMP, - "Failed to remove server socket addr: %s, %s:%d\n", - strerror(errno), __func__, __LINE__); - return -1; - } + rte_mp_action_unregister(PDUMP_MP); return 0; } -static int -pdump_create_client_socket(struct pdump_request *p) -{ - int ret, socket_fd; - int pid; - int n; - struct pdump_response server_resp; - struct sockaddr_un addr, serv_addr, from; - socklen_t addr_len, serv_len; - - pid = getpid(); - - socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0); - if (socket_fd < 0) { - RTE_LOG(ERR, PDUMP, - "client socket(): %s:pid(%d):tid(%u), %s:%d\n", - strerror(errno), pid, rte_sys_gettid(), - __func__, __LINE__); - rte_errno = errno; - return -1; - } - - ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path), - RTE_PDUMP_SOCKET_CLIENT); - if (ret != 0) { - RTE_LOG(ERR, PDUMP, - "Failed to get client socket path: %s:%d\n", - __func__, __LINE__); - rte_errno = errno; - goto exit; - } - addr.sun_family = AF_UNIX; - addr_len = sizeof(struct sockaddr_un); - - do { - ret = bind(socket_fd, (struct sockaddr *) &addr, addr_len); - if (ret) { - RTE_LOG(ERR, PDUMP, - "client bind(): %s, %s:%d\n", - strerror(errno), __func__, __LINE__); - rte_errno = errno; - break; - } - - serv_len = sizeof(struct sockaddr_un); - memset(&serv_addr, 0, sizeof(serv_addr)); - ret = pdump_get_socket_path(serv_addr.sun_path, - sizeof(serv_addr.sun_path), - RTE_PDUMP_SOCKET_SERVER); - if (ret != 0) { - RTE_LOG(ERR, PDUMP, - "Failed to get server socket path: %s:%d\n", - __func__, __LINE__); - rte_errno = errno; - break; - } - serv_addr.sun_family = AF_UNIX; - - n = sendto(socket_fd, p, sizeof(struct pdump_request), 0, - (struct sockaddr *)&serv_addr, serv_len); - if (n < 0) { - RTE_LOG(ERR, PDUMP, - "failed to send to server:%s, %s:%d\n", - strerror(errno), __func__, __LINE__); - rte_errno = errno; - ret = -1; - break; - } - - n = recvfrom(socket_fd, &server_resp, - sizeof(struct pdump_response), 0, - (struct sockaddr *)&from, &serv_len); - if (n < 0) { - RTE_LOG(ERR, PDUMP, - "failed to recv from server:%s, %s:%d\n", - strerror(errno), __func__, __LINE__); - rte_errno = errno; - ret = -1; - break; - } - ret = server_resp.err_value; - } while (0); - -exit: - close(socket_fd); - unlink(addr.sun_path); - return ret; -} - static int pdump_validate_ring_mp(struct rte_ring *ring, struct rte_mempool *mp) { @@ -768,36 +490,48 @@ pdump_prepare_client_request(char *device, uint16_t queue, struct rte_mempool *mp, void *filter) { - int ret; - struct pdump_request req = {.ver = 1,}; - - req.flags = flags; - req.op = operation; + int ret = -1; + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct pdump_request *req = (struct pdump_request *)mp_req.param; + struct pdump_response *resp; + + req->ver = 1; + req->flags = flags; + req->op = operation; if ((operation & ENABLE) != 0) { - snprintf(req.data.en_v1.device, sizeof(req.data.en_v1.device), - "%s", device); - req.data.en_v1.queue = queue; - req.data.en_v1.ring = ring; - req.data.en_v1.mp = mp; - req.data.en_v1.filter = filter; + snprintf(req->data.en_v1.device, + sizeof(req->data.en_v1.device), "%s", device); + req->data.en_v1.queue = queue; + req->data.en_v1.ring = ring; + req->data.en_v1.mp = mp; + req->data.en_v1.filter = filter; } else { - snprintf(req.data.dis_v1.device, sizeof(req.data.dis_v1.device), - "%s", device); - req.data.dis_v1.queue = queue; - req.data.dis_v1.ring = NULL; - req.data.dis_v1.mp = NULL; - req.data.dis_v1.filter = NULL; + snprintf(req->data.dis_v1.device, + sizeof(req->data.dis_v1.device), "%s", device); + req->data.dis_v1.queue = queue; + req->data.dis_v1.ring = NULL; + req->data.dis_v1.mp = NULL; + req->data.dis_v1.filter = NULL; + } + + strlcpy(mp_req.name, PDUMP_MP, RTE_MP_MAX_NAME_LEN); + mp_req.len_param = sizeof(*req); + mp_req.num_fds = 0; + if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0) { + mp_rep = &mp_reply.msgs[0]; + resp = (struct pdump_response *)mp_rep->param; + rte_errno = resp->err_value; + if (!resp->err_value) + ret = 0; + free(mp_reply.msgs); } - ret = pdump_create_client_socket(&req); - if (ret < 0) { + if (ret < 0) RTE_LOG(ERR, PDUMP, "client request for pdump enable/disable failed\n"); - rte_errno = ret; - return -1; - } - - return 0; + return ret; } int @@ -884,30 +618,8 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue, } int -rte_pdump_set_socket_dir(const char *path, enum rte_pdump_socktype type) +rte_pdump_set_socket_dir(const char *path __rte_unused, + enum rte_pdump_socktype type __rte_unused) { - int ret, count; - - if (path != NULL) { - if (type == RTE_PDUMP_SOCKET_SERVER) { - count = sizeof(server_socket_dir); - ret = snprintf(server_socket_dir, count, "%s", path); - } else { - count = sizeof(client_socket_dir); - ret = snprintf(client_socket_dir, count, "%s", path); - } - - if (ret < 0 || ret >= count) { - RTE_LOG(ERR, PDUMP, - "Invalid socket path:%s:%d\n", - __func__, __LINE__); - if (type == RTE_PDUMP_SOCKET_SERVER) - server_socket_dir[0] = 0; - else - client_socket_dir[0] = 0; - return -EINVAL; - } - } - return 0; } diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h index a7e83727..673a2b07 100644 --- a/lib/librte_pdump/rte_pdump.h +++ b/lib/librte_pdump/rte_pdump.h @@ -37,10 +37,10 @@ enum rte_pdump_socktype { /** * Initialize packet capturing handling * - * Creates pthread and server socket for handling clients - * requests to enable/disable rxtx callbacks. + * Register the IPC action for communication with target (primary) process. * * @param path + * This parameter is going to be deprecated; it was used for specifying the * directory path for server socket. * * @return @@ -52,7 +52,7 @@ rte_pdump_init(const char *path); /** * Un initialize packet capturing handling * - * Cancels pthread, close server socket, removes server socket address. + * Unregister the IPC action for communication with target (primary) process. * * @return * 0 on success, -1 on error @@ -163,6 +163,7 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue, uint32_t flags); /** + * @deprecated * Allows applications to set server and client socket paths. * If specified path is null default path will be selected, i.e. *"/var/run/" for root user and "$HOME" for non root user. @@ -181,7 +182,7 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue, * 0 on success, -EINVAL on error * */ -int +__rte_deprecated int rte_pdump_set_socket_dir(const char *path, enum rte_pdump_socktype type); #ifdef __cplusplus diff --git a/lib/librte_pipeline/Makefile b/lib/librte_pipeline/Makefile index e94fbc02..84afe98c 100644 --- a/lib/librte_pipeline/Makefile +++ b/lib/librte_pipeline/Makefile @@ -8,10 +8,11 @@ include $(RTE_SDK)/mk/rte.vars.mk # LIB = librte_pipeline.a +CFLAGS += -DALLOW_EXPERIMENTAL_API CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_table -LDLIBS += -lrte_port +LDLIBS += -lrte_port -lrte_meter -lrte_sched EXPORT_MAP := rte_pipeline_version.map @@ -21,8 +22,10 @@ LIBABIVER := 3 # all source are stored in SRCS-y # SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) := rte_pipeline.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += rte_port_in_action.c +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += rte_table_action.c # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_PIPELINE)-include += rte_pipeline.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PIPELINE)-include += rte_pipeline.h rte_port_in_action.h rte_table_action.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_pipeline/meson.build b/lib/librte_pipeline/meson.build index a35b6220..dc16ab42 100644 --- a/lib/librte_pipeline/meson.build +++ b/lib/librte_pipeline/meson.build @@ -2,6 +2,7 @@ # Copyright(c) 2017 Intel Corporation version = 3 -sources = files('rte_pipeline.c') -headers = files('rte_pipeline.h') -deps += ['port', 'table'] +allow_experimental_apis = true +sources = files('rte_pipeline.c', 'rte_port_in_action.c', 'rte_table_action.c') +headers = files('rte_pipeline.h', 'rte_port_in_action.h', 'rte_table_action.h') +deps += ['port', 'table', 'meter', 'sched'] diff --git a/lib/librte_pipeline/rte_pipeline_version.map b/lib/librte_pipeline/rte_pipeline_version.map index e4ee154f..d820b22f 100644 --- a/lib/librte_pipeline/rte_pipeline_version.map +++ b/lib/librte_pipeline/rte_pipeline_version.map @@ -45,3 +45,31 @@ DPDK_16.04 { rte_pipeline_ah_packet_drop; } DPDK_2.2; + +EXPERIMENTAL { + global: + + rte_port_in_action_apply; + rte_port_in_action_create; + rte_port_in_action_free; + rte_port_in_action_params_get; + rte_port_in_action_profile_action_register; + rte_port_in_action_profile_create; + rte_port_in_action_profile_free; + rte_port_in_action_profile_freeze; + rte_table_action_apply; + rte_table_action_create; + rte_table_action_dscp_table_update; + rte_table_action_free; + rte_table_action_meter_profile_add; + rte_table_action_meter_profile_delete; + rte_table_action_meter_read; + rte_table_action_profile_action_register; + rte_table_action_profile_create; + rte_table_action_profile_free; + rte_table_action_profile_freeze; + rte_table_action_table_params_get; + rte_table_action_stats_read; + rte_table_action_time_read; + rte_table_action_ttl_read; +}; diff --git a/lib/librte_pipeline/rte_port_in_action.c b/lib/librte_pipeline/rte_port_in_action.c new file mode 100644 index 00000000..e3b00df8 --- /dev/null +++ b/lib/librte_pipeline/rte_port_in_action.c @@ -0,0 +1,531 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2018 Intel Corporation + */ + +#include +#include + +#include +#include +#include +#include + +#include "rte_port_in_action.h" + +/** + * RTE_PORT_IN_ACTION_FLTR + */ +static int +fltr_cfg_check(struct rte_port_in_action_fltr_config *cfg) +{ + if (cfg == NULL) + return -1; + + return 0; +} + +struct fltr_data { + uint32_t port_id; +}; + +static void +fltr_init(struct fltr_data *data, + struct rte_port_in_action_fltr_config *cfg) +{ + data->port_id = cfg->port_id; +} + +static int +fltr_apply(struct fltr_data *data, + struct rte_port_in_action_fltr_params *p) +{ + /* Check input arguments */ + if (p == NULL) + return -1; + + data->port_id = p->port_id; + + return 0; +} + +/** + * RTE_PORT_IN_ACTION_LB + */ +static int +lb_cfg_check(struct rte_port_in_action_lb_config *cfg) +{ + if ((cfg == NULL) || + (cfg->key_size < RTE_PORT_IN_ACTION_LB_KEY_SIZE_MIN) || + (cfg->key_size > RTE_PORT_IN_ACTION_LB_KEY_SIZE_MAX) || + (!rte_is_power_of_2(cfg->key_size)) || + (cfg->f_hash == NULL)) + return -1; + + return 0; +} + +struct lb_data { + uint32_t port_id[RTE_PORT_IN_ACTION_LB_TABLE_SIZE]; +}; + +static void +lb_init(struct lb_data *data, + struct rte_port_in_action_lb_config *cfg) +{ + memcpy(data->port_id, cfg->port_id, sizeof(cfg->port_id)); +} + +static int +lb_apply(struct lb_data *data, + struct rte_port_in_action_lb_params *p) +{ + /* Check input arguments */ + if (p == NULL) + return -1; + + memcpy(data->port_id, p->port_id, sizeof(p->port_id)); + + return 0; +} + +/** + * Action profile + */ +static int +action_valid(enum rte_port_in_action_type action) +{ + switch (action) { + case RTE_PORT_IN_ACTION_FLTR: + case RTE_PORT_IN_ACTION_LB: + return 1; + default: + return 0; + } +} + +#define RTE_PORT_IN_ACTION_MAX 64 + +struct ap_config { + uint64_t action_mask; + struct rte_port_in_action_fltr_config fltr; + struct rte_port_in_action_lb_config lb; +}; + +static size_t +action_cfg_size(enum rte_port_in_action_type action) +{ + switch (action) { + case RTE_PORT_IN_ACTION_FLTR: + return sizeof(struct rte_port_in_action_fltr_config); + case RTE_PORT_IN_ACTION_LB: + return sizeof(struct rte_port_in_action_lb_config); + default: + return 0; + } +} + +static void* +action_cfg_get(struct ap_config *ap_config, + enum rte_port_in_action_type type) +{ + switch (type) { + case RTE_PORT_IN_ACTION_FLTR: + return &ap_config->fltr; + + case RTE_PORT_IN_ACTION_LB: + return &ap_config->lb; + + default: + return NULL; + } +} + +static void +action_cfg_set(struct ap_config *ap_config, + enum rte_port_in_action_type type, + void *action_cfg) +{ + void *dst = action_cfg_get(ap_config, type); + + if (dst) + memcpy(dst, action_cfg, action_cfg_size(type)); + + ap_config->action_mask |= 1LLU << type; +} + +struct ap_data { + size_t offset[RTE_PORT_IN_ACTION_MAX]; + size_t total_size; +}; + +static size_t +action_data_size(enum rte_port_in_action_type action, + struct ap_config *ap_config __rte_unused) +{ + switch (action) { + case RTE_PORT_IN_ACTION_FLTR: + return sizeof(struct fltr_data); + + case RTE_PORT_IN_ACTION_LB: + return sizeof(struct lb_data); + + default: + return 0; + } +} + +static void +action_data_offset_set(struct ap_data *ap_data, + struct ap_config *ap_config) +{ + uint64_t action_mask = ap_config->action_mask; + size_t offset; + uint32_t action; + + memset(ap_data->offset, 0, sizeof(ap_data->offset)); + + offset = 0; + for (action = 0; action < RTE_PORT_IN_ACTION_MAX; action++) + if (action_mask & (1LLU << action)) { + ap_data->offset[action] = offset; + offset += action_data_size((enum rte_port_in_action_type)action, + ap_config); + } + + ap_data->total_size = offset; +} + +struct rte_port_in_action_profile { + struct ap_config cfg; + struct ap_data data; + int frozen; +}; + +struct rte_port_in_action_profile * +rte_port_in_action_profile_create(uint32_t socket_id) +{ + struct rte_port_in_action_profile *ap; + + /* Memory allocation */ + ap = rte_zmalloc_socket(NULL, + sizeof(struct rte_port_in_action_profile), + RTE_CACHE_LINE_SIZE, + socket_id); + if (ap == NULL) + return NULL; + + return ap; +} + +int +rte_port_in_action_profile_action_register(struct rte_port_in_action_profile *profile, + enum rte_port_in_action_type type, + void *action_config) +{ + int status; + + /* Check input arguments */ + if ((profile == NULL) || + profile->frozen || + (action_valid(type) == 0) || + (profile->cfg.action_mask & (1LLU << type)) || + ((action_cfg_size(type) == 0) && action_config) || + (action_cfg_size(type) && (action_config == NULL))) + return -EINVAL; + + switch (type) { + case RTE_PORT_IN_ACTION_FLTR: + status = fltr_cfg_check(action_config); + break; + + case RTE_PORT_IN_ACTION_LB: + status = lb_cfg_check(action_config); + break; + + default: + status = 0; + break; + } + + if (status) + return status; + + /* Action enable */ + action_cfg_set(&profile->cfg, type, action_config); + + return 0; +} + +int +rte_port_in_action_profile_freeze(struct rte_port_in_action_profile *profile) +{ + if (profile->frozen) + return -EBUSY; + + action_data_offset_set(&profile->data, &profile->cfg); + profile->frozen = 1; + + return 0; +} + +int +rte_port_in_action_profile_free(struct rte_port_in_action_profile *profile) +{ + if (profile == NULL) + return 0; + + free(profile); + return 0; +} + +/** + * Action + */ +struct rte_port_in_action { + struct ap_config cfg; + struct ap_data data; + uint8_t memory[0] __rte_cache_aligned; +}; + +static __rte_always_inline void * +action_data_get(struct rte_port_in_action *action, + enum rte_port_in_action_type type) +{ + size_t offset = action->data.offset[type]; + + return &action->memory[offset]; +} + +static void +action_data_init(struct rte_port_in_action *action, + enum rte_port_in_action_type type) +{ + void *data = action_data_get(action, type); + + switch (type) { + case RTE_PORT_IN_ACTION_FLTR: + fltr_init(data, &action->cfg.fltr); + return; + + case RTE_PORT_IN_ACTION_LB: + lb_init(data, &action->cfg.lb); + return; + + default: + return; + } +} + +struct rte_port_in_action * +rte_port_in_action_create(struct rte_port_in_action_profile *profile, + uint32_t socket_id) +{ + struct rte_port_in_action *action; + size_t size; + uint32_t i; + + /* Check input arguments */ + if ((profile == NULL) || + (profile->frozen == 0)) + return NULL; + + /* Memory allocation */ + size = sizeof(struct rte_port_in_action) + profile->data.total_size; + size = RTE_CACHE_LINE_ROUNDUP(size); + + action = rte_zmalloc_socket(NULL, + size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (action == NULL) + return NULL; + + /* Initialization */ + memcpy(&action->cfg, &profile->cfg, sizeof(profile->cfg)); + memcpy(&action->data, &profile->data, sizeof(profile->data)); + + for (i = 0; i < RTE_PORT_IN_ACTION_MAX; i++) + if (action->cfg.action_mask & (1LLU << i)) + action_data_init(action, + (enum rte_port_in_action_type)i); + + return action; +} + +int +rte_port_in_action_apply(struct rte_port_in_action *action, + enum rte_port_in_action_type type, + void *action_params) +{ + void *action_data; + + /* Check input arguments */ + if ((action == NULL) || + (action_valid(type) == 0) || + ((action->cfg.action_mask & (1LLU << type)) == 0) || + (action_params == NULL)) + return -EINVAL; + + /* Data update */ + action_data = action_data_get(action, type); + + switch (type) { + case RTE_PORT_IN_ACTION_FLTR: + return fltr_apply(action_data, + action_params); + + case RTE_PORT_IN_ACTION_LB: + return lb_apply(action_data, + action_params); + + default: + return -EINVAL; + } +} + +static int +ah_filter_on_match(struct rte_pipeline *p, + struct rte_mbuf **pkts, + uint32_t n_pkts, + void *arg) +{ + struct rte_port_in_action *action = arg; + struct rte_port_in_action_fltr_config *cfg = &action->cfg.fltr; + uint64_t *key_mask = (uint64_t *) cfg->key_mask; + uint64_t *key = (uint64_t *) cfg->key; + uint32_t key_offset = cfg->key_offset; + struct fltr_data *data = action_data_get(action, + RTE_PORT_IN_ACTION_FLTR); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(pkt, + key_offset); + + uint64_t xor0 = (pkt_key[0] & key_mask[0]) ^ key[0]; + uint64_t xor1 = (pkt_key[1] & key_mask[1]) ^ key[1]; + uint64_t or = xor0 | xor1; + + if (or == 0) { + rte_pipeline_ah_packet_hijack(p, 1LLU << i); + rte_pipeline_port_out_packet_insert(p, + data->port_id, pkt); + } + } + + return 0; +} + +static int +ah_filter_on_mismatch(struct rte_pipeline *p, + struct rte_mbuf **pkts, + uint32_t n_pkts, + void *arg) +{ + struct rte_port_in_action *action = arg; + struct rte_port_in_action_fltr_config *cfg = &action->cfg.fltr; + uint64_t *key_mask = (uint64_t *) cfg->key_mask; + uint64_t *key = (uint64_t *) cfg->key; + uint32_t key_offset = cfg->key_offset; + struct fltr_data *data = action_data_get(action, + RTE_PORT_IN_ACTION_FLTR); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(pkt, + key_offset); + + uint64_t xor0 = (pkt_key[0] & key_mask[0]) ^ key[0]; + uint64_t xor1 = (pkt_key[1] & key_mask[1]) ^ key[1]; + uint64_t or = xor0 | xor1; + + if (or) { + rte_pipeline_ah_packet_hijack(p, 1LLU << i); + rte_pipeline_port_out_packet_insert(p, + data->port_id, pkt); + } + } + + return 0; +} + +static int +ah_lb(struct rte_pipeline *p, + struct rte_mbuf **pkts, + uint32_t n_pkts, + void *arg) +{ + struct rte_port_in_action *action = arg; + struct rte_port_in_action_lb_config *cfg = &action->cfg.lb; + struct lb_data *data = action_data_get(action, RTE_PORT_IN_ACTION_LB); + uint64_t pkt_mask = RTE_LEN2MASK(n_pkts, uint64_t); + uint32_t i; + + rte_pipeline_ah_packet_hijack(p, pkt_mask); + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + uint8_t *pkt_key = RTE_MBUF_METADATA_UINT8_PTR(pkt, + cfg->key_offset); + + uint64_t digest = cfg->f_hash(pkt_key, + cfg->key_mask, + cfg->key_size, + cfg->seed); + uint64_t pos = digest & (RTE_PORT_IN_ACTION_LB_TABLE_SIZE - 1); + uint32_t port_id = data->port_id[pos]; + + rte_pipeline_port_out_packet_insert(p, port_id, pkt); + } + + return 0; +} + +static rte_pipeline_port_in_action_handler +ah_selector(struct rte_port_in_action *action) +{ + if (action->cfg.action_mask == 0) + return NULL; + + if (action->cfg.action_mask == 1LLU << RTE_PORT_IN_ACTION_FLTR) + return (action->cfg.fltr.filter_on_match) ? + ah_filter_on_match : ah_filter_on_mismatch; + + if (action->cfg.action_mask == 1LLU << RTE_PORT_IN_ACTION_LB) + return ah_lb; + + return NULL; +} + +int +rte_port_in_action_params_get(struct rte_port_in_action *action, + struct rte_pipeline_port_in_params *params) +{ + rte_pipeline_port_in_action_handler f_action; + + /* Check input arguments */ + if ((action == NULL) || + (params == NULL)) + return -EINVAL; + + f_action = ah_selector(action); + + /* Fill in params */ + params->f_action = f_action; + params->arg_ah = (f_action) ? action : NULL; + + return 0; +} + +int +rte_port_in_action_free(struct rte_port_in_action *action) +{ + if (action == NULL) + return 0; + + rte_free(action); + + return 0; +} diff --git a/lib/librte_pipeline/rte_port_in_action.h b/lib/librte_pipeline/rte_port_in_action.h new file mode 100644 index 00000000..0a85e4e0 --- /dev/null +++ b/lib/librte_pipeline/rte_port_in_action.h @@ -0,0 +1,301 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef __INCLUDE_RTE_PORT_IN_ACTION_H__ +#define __INCLUDE_RTE_PORT_IN_ACTION_H__ + +/** + * @file + * RTE Pipeline Input Port Actions + * + * This API provides a common set of actions for pipeline input ports to speed + * up application development. + * + * Each pipeline input port can be assigned an action handler to be executed + * on every input packet during the pipeline execution. The pipeline library + * allows the user to define his own input port actions by providing customized + * input port action handler. While the user can still follow this process, this + * API is intended to provide a quicker development alternative for a set of + * predefined actions. + * + * The typical steps to use this API are: + * - Define an input port action profile. This is a configuration template that + * can potentially be shared by multiple input ports from the same or + * different pipelines, with different input ports from the same pipeline + * able to use different action profiles. For every input port using a given + * action profile, the profile defines the set of actions and the action + * configuration to be executed by the input port. API functions: + * rte_port_in_action_profile_create(), + * rte_port_in_action_profile_action_register(), + * rte_port_in_action_profile_freeze(). + * + * - Instantiate the input port action profile to create input port action + * objects. Each pipeline input port has its own action object. + * API functions: rte_port_in_action_create(). + * + * - Use the input port action object to generate the input port action handler + * invoked by the pipeline. API functions: + * rte_port_in_action_params_get(). + * + * - Use the input port action object to generate the internal data structures + * used by the input port action handler based on given action parameters. + * API functions: rte_port_in_action_apply(). + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include +#include + +#include "rte_pipeline.h" + +/** Input port actions. */ +enum rte_port_in_action_type { + /** Filter selected input packets. */ + RTE_PORT_IN_ACTION_FLTR = 0, + + /** Load balance. */ + RTE_PORT_IN_ACTION_LB, +}; + +/** + * RTE_PORT_IN_ACTION_FLTR + */ +/** Filter key size (number of bytes) */ +#define RTE_PORT_IN_ACTION_FLTR_KEY_SIZE 16 + +/** Filter action configuration (per action profile). */ +struct rte_port_in_action_fltr_config { + /** Key offset within the input packet buffer. Offset 0 points to the + * first byte of the MBUF structure. + */ + uint32_t key_offset; + + /** Key mask. */ + uint8_t key_mask[RTE_PORT_IN_ACTION_FLTR_KEY_SIZE]; + + /** Key value. */ + uint8_t key[RTE_PORT_IN_ACTION_FLTR_KEY_SIZE]; + + /** When non-zero, all the input packets that match the *key* (with the + * *key_mask* applied) are sent to the pipeline output port *port_id*. + * When zero, all the input packets that do NOT match the *key* (with + * *key_mask* applied) are sent to the pipeline output port *port_id*. + */ + int filter_on_match; + + /** Pipeline output port ID to send the filtered input packets to. + * Can be updated later. + * + * @see struct rte_port_in_action_fltr_params + */ + uint32_t port_id; +}; + +/** Filter action parameters (per action). */ +struct rte_port_in_action_fltr_params { + /** Pipeline output port ID to send the filtered input packets to. */ + uint32_t port_id; +}; + +/** + * RTE_PORT_IN_ACTION_LB + */ +/** Load balance key size min (number of bytes). */ +#define RTE_PORT_IN_ACTION_LB_KEY_SIZE_MIN 8 + +/** Load balance key size max (number of bytes). */ +#define RTE_PORT_IN_ACTION_LB_KEY_SIZE_MAX 64 + +/** Load balance table size. */ +#define RTE_PORT_IN_ACTION_LB_TABLE_SIZE 16 + +/** Load balance action configuration (per action profile). */ +struct rte_port_in_action_lb_config { + /** Key size (number of bytes). */ + uint32_t key_size; + + /** Key offset within the input packet buffer. Offset 0 points to the + * first byte of the MBUF structure. + */ + uint32_t key_offset; + + /** Key mask(*key_size* bytes are valid). */ + uint8_t key_mask[RTE_PORT_IN_ACTION_LB_KEY_SIZE_MAX]; + + /** Hash function. */ + rte_table_hash_op_hash f_hash; + + /** Seed value for *f_hash*. */ + uint64_t seed; + + /** Table defining the weight of each pipeline output port. The weights + * are set in 1/RTE_PORT_IN_ACTION_LB_TABLE_SIZE increments. To assign a + * weight of N/RTE_PORT_IN_ACTION_LB_TABLE_SIZE to a given output port + * (0 <= N <= RTE_PORT_IN_ACTION_LB_TABLE_SIZE), the output port needs + * to show up exactly N times in this table. Can be updated later. + * + * @see struct rte_port_in_action_lb_params + */ + uint32_t port_id[RTE_PORT_IN_ACTION_LB_TABLE_SIZE]; +}; + +/** Load balance action parameters (per action). */ +struct rte_port_in_action_lb_params { + /** Table defining the weight of each pipeline output port. The weights + * are set in 1/RTE_PORT_IN_ACTION_LB_TABLE_SIZE increments. To assign a + * weight of N/RTE_PORT_IN_ACTION_LB_TABLE_SIZE to a given output port + * (0 <= N <= RTE_PORT_IN_ACTION_LB_TABLE_SIZE), the output port needs + * to show up exactly N times in this table. + */ + uint32_t port_id[RTE_PORT_IN_ACTION_LB_TABLE_SIZE]; +}; + +/** + * Input port action profile. + */ +struct rte_port_in_action_profile; + +/** + * Input port action profile create. + * + * @param[in] socket_id + * CPU socket ID for the internal data structures memory allocation. + * @return + * Input port action profile handle on success, NULL otherwise. + */ +struct rte_port_in_action_profile * __rte_experimental +rte_port_in_action_profile_create(uint32_t socket_id); + +/** + * Input port action profile free. + * + * @param[in] profile + * Input port action profile handle (needs to be valid). + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_port_in_action_profile_free(struct rte_port_in_action_profile *profile); + +/** + * Input port action profile action register. + * + * @param[in] profile + * Input port action profile handle (needs to be valid and not in frozen + * state). + * @param[in] type + * Specific input port action to be registered for *profile*. + * @param[in] action_config + * Configuration for the *type* action. + * If struct rte_port_in_action_*type*_config is defined, it needs to point to + * a valid instance of this structure, otherwise it needs to be set to NULL. + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_port_in_action_profile_action_register( + struct rte_port_in_action_profile *profile, + enum rte_port_in_action_type type, + void *action_config); + +/** + * Input port action profile freeze. + * + * Once this function is called successfully, the given profile enters the + * frozen state with the following immediate effects: no more actions can be + * registered for this profile, so the profile can be instantiated to create + * input port action objects. + * + * @param[in] profile + * Input port profile action handle (needs to be valid and not in frozen + * state). + * @return + * Zero on success, non-zero error code otherwise. + * + * @see rte_port_in_action_create() + */ +int __rte_experimental +rte_port_in_action_profile_freeze(struct rte_port_in_action_profile *profile); + +/** + * Input port action. + */ +struct rte_port_in_action; + +/** + * Input port action create. + * + * Instantiates the given input port action profile to create an input port + * action object. + * + * @param[in] profile + * Input port profile action handle (needs to be valid and in frozen state). + * @param[in] socket_id + * CPU socket ID where the internal data structures required by the new input + * port action object should be allocated. + * @return + * Handle to input port action object on success, NULL on error. + */ +struct rte_port_in_action * __rte_experimental +rte_port_in_action_create(struct rte_port_in_action_profile *profile, + uint32_t socket_id); + +/** + * Input port action free. + * + * @param[in] action + * Handle to input port action object (needs to be valid). + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_port_in_action_free(struct rte_port_in_action *action); + +/** + * Input port params get. + * + * @param[in] action + * Handle to input port action object (needs to be valid). + * @param[inout] params + * Pipeline input port parameters (needs to be pre-allocated). + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_port_in_action_params_get(struct rte_port_in_action *action, + struct rte_pipeline_port_in_params *params); + +/** + * Input port action apply. + * + * @param[in] action + * Handle to input port action object (needs to be valid). + * @param[in] type + * Specific input port action previously registered for the input port action + * profile of the *action* object. + * @param[in] action_params + * Parameters for the *type* action. + * If struct rte_port_in_action_*type*_params is defined, it needs to point to + * a valid instance of this structure, otherwise it needs to be set to NULL. + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_port_in_action_apply(struct rte_port_in_action *action, + enum rte_port_in_action_type type, + void *action_params); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_PORT_IN_ACTION_H__ */ diff --git a/lib/librte_pipeline/rte_table_action.c b/lib/librte_pipeline/rte_table_action.c new file mode 100644 index 00000000..83ffa5de --- /dev/null +++ b/lib/librte_pipeline/rte_table_action.c @@ -0,0 +1,2386 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2018 Intel Corporation + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_table_action.h" + +#define rte_htons rte_cpu_to_be_16 +#define rte_htonl rte_cpu_to_be_32 + +#define rte_ntohs rte_be_to_cpu_16 +#define rte_ntohl rte_be_to_cpu_32 + +/** + * RTE_TABLE_ACTION_FWD + */ +#define fwd_data rte_pipeline_table_entry + +static int +fwd_apply(struct fwd_data *data, + struct rte_table_action_fwd_params *p) +{ + data->action = p->action; + + if (p->action == RTE_PIPELINE_ACTION_PORT) + data->port_id = p->id; + + if (p->action == RTE_PIPELINE_ACTION_TABLE) + data->table_id = p->id; + + return 0; +} + +/** + * RTE_TABLE_ACTION_LB + */ +static int +lb_cfg_check(struct rte_table_action_lb_config *cfg) +{ + if ((cfg == NULL) || + (cfg->key_size < RTE_TABLE_ACTION_LB_KEY_SIZE_MIN) || + (cfg->key_size > RTE_TABLE_ACTION_LB_KEY_SIZE_MAX) || + (!rte_is_power_of_2(cfg->key_size)) || + (cfg->f_hash == NULL)) + return -1; + + return 0; +} + +struct lb_data { + uint32_t out[RTE_TABLE_ACTION_LB_TABLE_SIZE]; +} __attribute__((__packed__)); + +static int +lb_apply(struct lb_data *data, + struct rte_table_action_lb_params *p) +{ + memcpy(data->out, p->out, sizeof(data->out)); + + return 0; +} + +static __rte_always_inline void +pkt_work_lb(struct rte_mbuf *mbuf, + struct lb_data *data, + struct rte_table_action_lb_config *cfg) +{ + uint8_t *pkt_key = RTE_MBUF_METADATA_UINT8_PTR(mbuf, cfg->key_offset); + uint32_t *out = RTE_MBUF_METADATA_UINT32_PTR(mbuf, cfg->out_offset); + uint64_t digest, pos; + uint32_t out_val; + + digest = cfg->f_hash(pkt_key, + cfg->key_mask, + cfg->key_size, + cfg->seed); + pos = digest & (RTE_TABLE_ACTION_LB_TABLE_SIZE - 1); + out_val = data->out[pos]; + + *out = out_val; +} + +/** + * RTE_TABLE_ACTION_MTR + */ +static int +mtr_cfg_check(struct rte_table_action_mtr_config *mtr) +{ + if ((mtr->alg == RTE_TABLE_ACTION_METER_SRTCM) || + ((mtr->n_tc != 1) && (mtr->n_tc != 4)) || + (mtr->n_bytes_enabled != 0)) + return -ENOTSUP; + return 0; +} + +#define MBUF_SCHED_QUEUE_TC_COLOR(queue, tc, color) \ + ((uint16_t)((((uint64_t)(queue)) & 0x3) | \ + ((((uint64_t)(tc)) & 0x3) << 2) | \ + ((((uint64_t)(color)) & 0x3) << 4))) + +#define MBUF_SCHED_COLOR(sched, color) \ + (((sched) & (~0x30LLU)) | ((color) << 4)) + +struct mtr_trtcm_data { + struct rte_meter_trtcm trtcm; + uint64_t stats[e_RTE_METER_COLORS]; +} __attribute__((__packed__)); + +#define MTR_TRTCM_DATA_METER_PROFILE_ID_GET(data) \ + (((data)->stats[e_RTE_METER_GREEN] & 0xF8LLU) >> 3) + +static void +mtr_trtcm_data_meter_profile_id_set(struct mtr_trtcm_data *data, + uint32_t profile_id) +{ + data->stats[e_RTE_METER_GREEN] &= ~0xF8LLU; + data->stats[e_RTE_METER_GREEN] |= (profile_id % 32) << 3; +} + +#define MTR_TRTCM_DATA_POLICER_ACTION_DROP_GET(data, color)\ + (((data)->stats[(color)] & 4LLU) >> 2) + +#define MTR_TRTCM_DATA_POLICER_ACTION_COLOR_GET(data, color)\ + ((enum rte_meter_color)((data)->stats[(color)] & 3LLU)) + +static void +mtr_trtcm_data_policer_action_set(struct mtr_trtcm_data *data, + enum rte_meter_color color, + enum rte_table_action_policer action) +{ + if (action == RTE_TABLE_ACTION_POLICER_DROP) { + data->stats[color] |= 4LLU; + } else { + data->stats[color] &= ~7LLU; + data->stats[color] |= color & 3LLU; + } +} + +static uint64_t +mtr_trtcm_data_stats_get(struct mtr_trtcm_data *data, + enum rte_meter_color color) +{ + return data->stats[color] >> 8; +} + +static void +mtr_trtcm_data_stats_reset(struct mtr_trtcm_data *data, + enum rte_meter_color color) +{ + data->stats[color] &= 0xFFLU; +} + +#define MTR_TRTCM_DATA_STATS_INC(data, color) \ + ((data)->stats[(color)] += (1LLU << 8)) + +static size_t +mtr_data_size(struct rte_table_action_mtr_config *mtr) +{ + return mtr->n_tc * sizeof(struct mtr_trtcm_data); +} + +struct dscp_table_entry_data { + enum rte_meter_color color; + uint16_t tc; + uint16_t queue_tc_color; +}; + +struct dscp_table_data { + struct dscp_table_entry_data entry[64]; +}; + +struct meter_profile_data { + struct rte_meter_trtcm_profile profile; + uint32_t profile_id; + int valid; +}; + +static struct meter_profile_data * +meter_profile_data_find(struct meter_profile_data *mp, + uint32_t mp_size, + uint32_t profile_id) +{ + uint32_t i; + + for (i = 0; i < mp_size; i++) { + struct meter_profile_data *mp_data = &mp[i]; + + if (mp_data->valid && (mp_data->profile_id == profile_id)) + return mp_data; + } + + return NULL; +} + +static struct meter_profile_data * +meter_profile_data_find_unused(struct meter_profile_data *mp, + uint32_t mp_size) +{ + uint32_t i; + + for (i = 0; i < mp_size; i++) { + struct meter_profile_data *mp_data = &mp[i]; + + if (!mp_data->valid) + return mp_data; + } + + return NULL; +} + +static int +mtr_apply_check(struct rte_table_action_mtr_params *p, + struct rte_table_action_mtr_config *cfg, + struct meter_profile_data *mp, + uint32_t mp_size) +{ + uint32_t i; + + if (p->tc_mask > RTE_LEN2MASK(cfg->n_tc, uint32_t)) + return -EINVAL; + + for (i = 0; i < RTE_TABLE_ACTION_TC_MAX; i++) { + struct rte_table_action_mtr_tc_params *p_tc = &p->mtr[i]; + struct meter_profile_data *mp_data; + + if ((p->tc_mask & (1LLU << i)) == 0) + continue; + + mp_data = meter_profile_data_find(mp, + mp_size, + p_tc->meter_profile_id); + if (!mp_data) + return -EINVAL; + } + + return 0; +} + +static int +mtr_apply(struct mtr_trtcm_data *data, + struct rte_table_action_mtr_params *p, + struct rte_table_action_mtr_config *cfg, + struct meter_profile_data *mp, + uint32_t mp_size) +{ + uint32_t i; + int status; + + /* Check input arguments */ + status = mtr_apply_check(p, cfg, mp, mp_size); + if (status) + return status; + + /* Apply */ + for (i = 0; i < RTE_TABLE_ACTION_TC_MAX; i++) { + struct rte_table_action_mtr_tc_params *p_tc = &p->mtr[i]; + struct mtr_trtcm_data *data_tc = &data[i]; + struct meter_profile_data *mp_data; + + if ((p->tc_mask & (1LLU << i)) == 0) + continue; + + /* Find profile */ + mp_data = meter_profile_data_find(mp, + mp_size, + p_tc->meter_profile_id); + if (!mp_data) + return -EINVAL; + + memset(data_tc, 0, sizeof(*data_tc)); + + /* Meter object */ + status = rte_meter_trtcm_config(&data_tc->trtcm, + &mp_data->profile); + if (status) + return status; + + /* Meter profile */ + mtr_trtcm_data_meter_profile_id_set(data_tc, + mp_data - mp); + + /* Policer actions */ + mtr_trtcm_data_policer_action_set(data_tc, + e_RTE_METER_GREEN, + p_tc->policer[e_RTE_METER_GREEN]); + + mtr_trtcm_data_policer_action_set(data_tc, + e_RTE_METER_YELLOW, + p_tc->policer[e_RTE_METER_YELLOW]); + + mtr_trtcm_data_policer_action_set(data_tc, + e_RTE_METER_RED, + p_tc->policer[e_RTE_METER_RED]); + } + + return 0; +} + +static __rte_always_inline uint64_t +pkt_work_mtr(struct rte_mbuf *mbuf, + struct mtr_trtcm_data *data, + struct dscp_table_data *dscp_table, + struct meter_profile_data *mp, + uint64_t time, + uint32_t dscp, + uint16_t total_length) +{ + uint64_t drop_mask, sched; + uint64_t *sched_ptr = (uint64_t *) &mbuf->hash.sched; + struct dscp_table_entry_data *dscp_entry = &dscp_table->entry[dscp]; + enum rte_meter_color color_in, color_meter, color_policer; + uint32_t tc, mp_id; + + tc = dscp_entry->tc; + color_in = dscp_entry->color; + data += tc; + mp_id = MTR_TRTCM_DATA_METER_PROFILE_ID_GET(data); + sched = *sched_ptr; + + /* Meter */ + color_meter = rte_meter_trtcm_color_aware_check( + &data->trtcm, + &mp[mp_id].profile, + time, + total_length, + color_in); + + /* Stats */ + MTR_TRTCM_DATA_STATS_INC(data, color_meter); + + /* Police */ + drop_mask = MTR_TRTCM_DATA_POLICER_ACTION_DROP_GET(data, color_meter); + color_policer = + MTR_TRTCM_DATA_POLICER_ACTION_COLOR_GET(data, color_meter); + *sched_ptr = MBUF_SCHED_COLOR(sched, color_policer); + + return drop_mask; +} + +/** + * RTE_TABLE_ACTION_TM + */ +static int +tm_cfg_check(struct rte_table_action_tm_config *tm) +{ + if ((tm->n_subports_per_port == 0) || + (rte_is_power_of_2(tm->n_subports_per_port) == 0) || + (tm->n_subports_per_port > UINT16_MAX) || + (tm->n_pipes_per_subport == 0) || + (rte_is_power_of_2(tm->n_pipes_per_subport) == 0)) + return -ENOTSUP; + + return 0; +} + +struct tm_data { + uint16_t queue_tc_color; + uint16_t subport; + uint32_t pipe; +} __attribute__((__packed__)); + +static int +tm_apply_check(struct rte_table_action_tm_params *p, + struct rte_table_action_tm_config *cfg) +{ + if ((p->subport_id >= cfg->n_subports_per_port) || + (p->pipe_id >= cfg->n_pipes_per_subport)) + return -EINVAL; + + return 0; +} + +static int +tm_apply(struct tm_data *data, + struct rte_table_action_tm_params *p, + struct rte_table_action_tm_config *cfg) +{ + int status; + + /* Check input arguments */ + status = tm_apply_check(p, cfg); + if (status) + return status; + + /* Apply */ + data->queue_tc_color = 0; + data->subport = (uint16_t) p->subport_id; + data->pipe = p->pipe_id; + + return 0; +} + +static __rte_always_inline void +pkt_work_tm(struct rte_mbuf *mbuf, + struct tm_data *data, + struct dscp_table_data *dscp_table, + uint32_t dscp) +{ + struct dscp_table_entry_data *dscp_entry = &dscp_table->entry[dscp]; + struct tm_data *sched_ptr = (struct tm_data *) &mbuf->hash.sched; + struct tm_data sched; + + sched = *data; + sched.queue_tc_color = dscp_entry->queue_tc_color; + *sched_ptr = sched; +} + +/** + * RTE_TABLE_ACTION_ENCAP + */ +static int +encap_valid(enum rte_table_action_encap_type encap) +{ + switch (encap) { + case RTE_TABLE_ACTION_ENCAP_ETHER: + case RTE_TABLE_ACTION_ENCAP_VLAN: + case RTE_TABLE_ACTION_ENCAP_QINQ: + case RTE_TABLE_ACTION_ENCAP_MPLS: + case RTE_TABLE_ACTION_ENCAP_PPPOE: + return 1; + default: + return 0; + } +} + +static int +encap_cfg_check(struct rte_table_action_encap_config *encap) +{ + if ((encap->encap_mask == 0) || + (__builtin_popcountll(encap->encap_mask) != 1)) + return -ENOTSUP; + + return 0; +} + +struct encap_ether_data { + struct ether_hdr ether; +} __attribute__((__packed__)); + +#define VLAN(pcp, dei, vid) \ + ((uint16_t)((((uint64_t)(pcp)) & 0x7LLU) << 13) | \ + ((((uint64_t)(dei)) & 0x1LLU) << 12) | \ + (((uint64_t)(vid)) & 0xFFFLLU)) \ + +struct encap_vlan_data { + struct ether_hdr ether; + struct vlan_hdr vlan; +} __attribute__((__packed__)); + +struct encap_qinq_data { + struct ether_hdr ether; + struct vlan_hdr svlan; + struct vlan_hdr cvlan; +} __attribute__((__packed__)); + +#define ETHER_TYPE_MPLS_UNICAST 0x8847 + +#define ETHER_TYPE_MPLS_MULTICAST 0x8848 + +#define MPLS(label, tc, s, ttl) \ + ((uint32_t)(((((uint64_t)(label)) & 0xFFFFFLLU) << 12) |\ + ((((uint64_t)(tc)) & 0x7LLU) << 9) | \ + ((((uint64_t)(s)) & 0x1LLU) << 8) | \ + (((uint64_t)(ttl)) & 0xFFLLU))) + +struct encap_mpls_data { + struct ether_hdr ether; + uint32_t mpls[RTE_TABLE_ACTION_MPLS_LABELS_MAX]; + uint32_t mpls_count; +} __attribute__((__packed__)); + +#define ETHER_TYPE_PPPOE_SESSION 0x8864 + +#define PPP_PROTOCOL_IP 0x0021 + +struct pppoe_ppp_hdr { + uint16_t ver_type_code; + uint16_t session_id; + uint16_t length; + uint16_t protocol; +} __attribute__((__packed__)); + +struct encap_pppoe_data { + struct ether_hdr ether; + struct pppoe_ppp_hdr pppoe_ppp; +} __attribute__((__packed__)); + +static size_t +encap_data_size(struct rte_table_action_encap_config *encap) +{ + switch (encap->encap_mask) { + case 1LLU << RTE_TABLE_ACTION_ENCAP_ETHER: + return sizeof(struct encap_ether_data); + + case 1LLU << RTE_TABLE_ACTION_ENCAP_VLAN: + return sizeof(struct encap_vlan_data); + + case 1LLU << RTE_TABLE_ACTION_ENCAP_QINQ: + return sizeof(struct encap_qinq_data); + + case 1LLU << RTE_TABLE_ACTION_ENCAP_MPLS: + return sizeof(struct encap_mpls_data); + + case 1LLU << RTE_TABLE_ACTION_ENCAP_PPPOE: + return sizeof(struct encap_pppoe_data); + + default: + return 0; + } +} + +static int +encap_apply_check(struct rte_table_action_encap_params *p, + struct rte_table_action_encap_config *cfg) +{ + if ((encap_valid(p->type) == 0) || + ((cfg->encap_mask & (1LLU << p->type)) == 0)) + return -EINVAL; + + switch (p->type) { + case RTE_TABLE_ACTION_ENCAP_ETHER: + return 0; + + case RTE_TABLE_ACTION_ENCAP_VLAN: + return 0; + + case RTE_TABLE_ACTION_ENCAP_QINQ: + return 0; + + case RTE_TABLE_ACTION_ENCAP_MPLS: + if ((p->mpls.mpls_count == 0) || + (p->mpls.mpls_count > RTE_TABLE_ACTION_MPLS_LABELS_MAX)) + return -EINVAL; + + return 0; + + case RTE_TABLE_ACTION_ENCAP_PPPOE: + return 0; + + default: + return -EINVAL; + } +} + +static int +encap_ether_apply(void *data, + struct rte_table_action_encap_params *p, + struct rte_table_action_common_config *common_cfg) +{ + struct encap_ether_data *d = data; + uint16_t ethertype = (common_cfg->ip_version) ? + ETHER_TYPE_IPv4 : + ETHER_TYPE_IPv6; + + /* Ethernet */ + ether_addr_copy(&p->ether.ether.da, &d->ether.d_addr); + ether_addr_copy(&p->ether.ether.sa, &d->ether.s_addr); + d->ether.ether_type = rte_htons(ethertype); + + return 0; +} + +static int +encap_vlan_apply(void *data, + struct rte_table_action_encap_params *p, + struct rte_table_action_common_config *common_cfg) +{ + struct encap_vlan_data *d = data; + uint16_t ethertype = (common_cfg->ip_version) ? + ETHER_TYPE_IPv4 : + ETHER_TYPE_IPv6; + + /* Ethernet */ + ether_addr_copy(&p->vlan.ether.da, &d->ether.d_addr); + ether_addr_copy(&p->vlan.ether.sa, &d->ether.s_addr); + d->ether.ether_type = rte_htons(ETHER_TYPE_VLAN); + + /* VLAN */ + d->vlan.vlan_tci = rte_htons(VLAN(p->vlan.vlan.pcp, + p->vlan.vlan.dei, + p->vlan.vlan.vid)); + d->vlan.eth_proto = rte_htons(ethertype); + + return 0; +} + +static int +encap_qinq_apply(void *data, + struct rte_table_action_encap_params *p, + struct rte_table_action_common_config *common_cfg) +{ + struct encap_qinq_data *d = data; + uint16_t ethertype = (common_cfg->ip_version) ? + ETHER_TYPE_IPv4 : + ETHER_TYPE_IPv6; + + /* Ethernet */ + ether_addr_copy(&p->qinq.ether.da, &d->ether.d_addr); + ether_addr_copy(&p->qinq.ether.sa, &d->ether.s_addr); + d->ether.ether_type = rte_htons(ETHER_TYPE_QINQ); + + /* SVLAN */ + d->svlan.vlan_tci = rte_htons(VLAN(p->qinq.svlan.pcp, + p->qinq.svlan.dei, + p->qinq.svlan.vid)); + d->svlan.eth_proto = rte_htons(ETHER_TYPE_VLAN); + + /* CVLAN */ + d->cvlan.vlan_tci = rte_htons(VLAN(p->qinq.cvlan.pcp, + p->qinq.cvlan.dei, + p->qinq.cvlan.vid)); + d->cvlan.eth_proto = rte_htons(ethertype); + + return 0; +} + +static int +encap_mpls_apply(void *data, + struct rte_table_action_encap_params *p) +{ + struct encap_mpls_data *d = data; + uint16_t ethertype = (p->mpls.unicast) ? + ETHER_TYPE_MPLS_UNICAST : + ETHER_TYPE_MPLS_MULTICAST; + uint32_t i; + + /* Ethernet */ + ether_addr_copy(&p->mpls.ether.da, &d->ether.d_addr); + ether_addr_copy(&p->mpls.ether.sa, &d->ether.s_addr); + d->ether.ether_type = rte_htons(ethertype); + + /* MPLS */ + for (i = 0; i < p->mpls.mpls_count - 1; i++) + d->mpls[i] = rte_htonl(MPLS(p->mpls.mpls[i].label, + p->mpls.mpls[i].tc, + 0, + p->mpls.mpls[i].ttl)); + + d->mpls[i] = rte_htonl(MPLS(p->mpls.mpls[i].label, + p->mpls.mpls[i].tc, + 1, + p->mpls.mpls[i].ttl)); + + d->mpls_count = p->mpls.mpls_count; + return 0; +} + +static int +encap_pppoe_apply(void *data, + struct rte_table_action_encap_params *p) +{ + struct encap_pppoe_data *d = data; + + /* Ethernet */ + ether_addr_copy(&p->pppoe.ether.da, &d->ether.d_addr); + ether_addr_copy(&p->pppoe.ether.sa, &d->ether.s_addr); + d->ether.ether_type = rte_htons(ETHER_TYPE_PPPOE_SESSION); + + /* PPPoE and PPP*/ + d->pppoe_ppp.ver_type_code = rte_htons(0x1100); + d->pppoe_ppp.session_id = rte_htons(p->pppoe.pppoe.session_id); + d->pppoe_ppp.length = 0; /* not pre-computed */ + d->pppoe_ppp.protocol = rte_htons(PPP_PROTOCOL_IP); + + return 0; +} + +static int +encap_apply(void *data, + struct rte_table_action_encap_params *p, + struct rte_table_action_encap_config *cfg, + struct rte_table_action_common_config *common_cfg) +{ + int status; + + /* Check input arguments */ + status = encap_apply_check(p, cfg); + if (status) + return status; + + switch (p->type) { + case RTE_TABLE_ACTION_ENCAP_ETHER: + return encap_ether_apply(data, p, common_cfg); + + case RTE_TABLE_ACTION_ENCAP_VLAN: + return encap_vlan_apply(data, p, common_cfg); + + case RTE_TABLE_ACTION_ENCAP_QINQ: + return encap_qinq_apply(data, p, common_cfg); + + case RTE_TABLE_ACTION_ENCAP_MPLS: + return encap_mpls_apply(data, p); + + case RTE_TABLE_ACTION_ENCAP_PPPOE: + return encap_pppoe_apply(data, p); + + default: + return -EINVAL; + } +} + +static __rte_always_inline void * +encap(void *dst, const void *src, size_t n) +{ + dst = ((uint8_t *) dst) - n; + return rte_memcpy(dst, src, n); +} + +static __rte_always_inline void +pkt_work_encap(struct rte_mbuf *mbuf, + void *data, + struct rte_table_action_encap_config *cfg, + void *ip, + uint16_t total_length, + uint32_t ip_offset) +{ + switch (cfg->encap_mask) { + case 1LLU << RTE_TABLE_ACTION_ENCAP_ETHER: + encap(ip, data, sizeof(struct encap_ether_data)); + mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) + + sizeof(struct encap_ether_data)); + mbuf->pkt_len = mbuf->data_len = total_length + + sizeof(struct encap_ether_data); + break; + + case 1LLU << RTE_TABLE_ACTION_ENCAP_VLAN: + encap(ip, data, sizeof(struct encap_vlan_data)); + mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) + + sizeof(struct encap_vlan_data)); + mbuf->pkt_len = mbuf->data_len = total_length + + sizeof(struct encap_vlan_data); + break; + + case 1LLU << RTE_TABLE_ACTION_ENCAP_QINQ: + encap(ip, data, sizeof(struct encap_qinq_data)); + mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) + + sizeof(struct encap_qinq_data)); + mbuf->pkt_len = mbuf->data_len = total_length + + sizeof(struct encap_qinq_data); + break; + + case 1LLU << RTE_TABLE_ACTION_ENCAP_MPLS: + { + struct encap_mpls_data *mpls = data; + size_t size = sizeof(struct ether_hdr) + + mpls->mpls_count * 4; + + encap(ip, data, size); + mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) + size); + mbuf->pkt_len = mbuf->data_len = total_length + size; + break; + } + + case 1LLU << RTE_TABLE_ACTION_ENCAP_PPPOE: + { + struct encap_pppoe_data *pppoe = + encap(ip, data, sizeof(struct encap_pppoe_data)); + pppoe->pppoe_ppp.length = rte_htons(total_length + 2); + mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) + + sizeof(struct encap_pppoe_data)); + mbuf->pkt_len = mbuf->data_len = total_length + + sizeof(struct encap_pppoe_data); + break; + } + + default: + break; + } +} + +/** + * RTE_TABLE_ACTION_NAT + */ +static int +nat_cfg_check(struct rte_table_action_nat_config *nat) +{ + if ((nat->proto != 0x06) && + (nat->proto != 0x11)) + return -ENOTSUP; + + return 0; +} + +struct nat_ipv4_data { + uint32_t addr; + uint16_t port; +} __attribute__((__packed__)); + +struct nat_ipv6_data { + uint8_t addr[16]; + uint16_t port; +} __attribute__((__packed__)); + +static size_t +nat_data_size(struct rte_table_action_nat_config *nat __rte_unused, + struct rte_table_action_common_config *common) +{ + int ip_version = common->ip_version; + + return (ip_version) ? + sizeof(struct nat_ipv4_data) : + sizeof(struct nat_ipv6_data); +} + +static int +nat_apply_check(struct rte_table_action_nat_params *p, + struct rte_table_action_common_config *cfg) +{ + if ((p->ip_version && (cfg->ip_version == 0)) || + ((p->ip_version == 0) && cfg->ip_version)) + return -EINVAL; + + return 0; +} + +static int +nat_apply(void *data, + struct rte_table_action_nat_params *p, + struct rte_table_action_common_config *cfg) +{ + int status; + + /* Check input arguments */ + status = nat_apply_check(p, cfg); + if (status) + return status; + + /* Apply */ + if (p->ip_version) { + struct nat_ipv4_data *d = data; + + d->addr = rte_htonl(p->addr.ipv4); + d->port = rte_htons(p->port); + } else { + struct nat_ipv6_data *d = data; + + memcpy(d->addr, p->addr.ipv6, sizeof(d->addr)); + d->port = rte_htons(p->port); + } + + return 0; +} + +static __rte_always_inline uint16_t +nat_ipv4_checksum_update(uint16_t cksum0, + uint32_t ip0, + uint32_t ip1) +{ + int32_t cksum1; + + cksum1 = cksum0; + cksum1 = ~cksum1 & 0xFFFF; + + /* Subtract ip0 (one's complement logic) */ + cksum1 -= (ip0 >> 16) + (ip0 & 0xFFFF); + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + + /* Add ip1 (one's complement logic) */ + cksum1 += (ip1 >> 16) + (ip1 & 0xFFFF); + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + + return (uint16_t)(~cksum1); +} + +static __rte_always_inline uint16_t +nat_ipv4_tcp_udp_checksum_update(uint16_t cksum0, + uint32_t ip0, + uint32_t ip1, + uint16_t port0, + uint16_t port1) +{ + int32_t cksum1; + + cksum1 = cksum0; + cksum1 = ~cksum1 & 0xFFFF; + + /* Subtract ip0 and port 0 (one's complement logic) */ + cksum1 -= (ip0 >> 16) + (ip0 & 0xFFFF) + port0; + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + + /* Add ip1 and port1 (one's complement logic) */ + cksum1 += (ip1 >> 16) + (ip1 & 0xFFFF) + port1; + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + + return (uint16_t)(~cksum1); +} + +static __rte_always_inline uint16_t +nat_ipv6_tcp_udp_checksum_update(uint16_t cksum0, + uint16_t *ip0, + uint16_t *ip1, + uint16_t port0, + uint16_t port1) +{ + int32_t cksum1; + + cksum1 = cksum0; + cksum1 = ~cksum1 & 0xFFFF; + + /* Subtract ip0 and port 0 (one's complement logic) */ + cksum1 -= ip0[0] + ip0[1] + ip0[2] + ip0[3] + + ip0[4] + ip0[5] + ip0[6] + ip0[7] + port0; + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + + /* Add ip1 and port1 (one's complement logic) */ + cksum1 += ip1[0] + ip1[1] + ip1[2] + ip1[3] + + ip1[4] + ip1[5] + ip1[6] + ip1[7] + port1; + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16); + + return (uint16_t)(~cksum1); +} + +static __rte_always_inline void +pkt_ipv4_work_nat(struct ipv4_hdr *ip, + struct nat_ipv4_data *data, + struct rte_table_action_nat_config *cfg) +{ + if (cfg->source_nat) { + if (cfg->proto == 0x6) { + struct tcp_hdr *tcp = (struct tcp_hdr *) &ip[1]; + uint16_t ip_cksum, tcp_cksum; + + ip_cksum = nat_ipv4_checksum_update(ip->hdr_checksum, + ip->src_addr, + data->addr); + + tcp_cksum = nat_ipv4_tcp_udp_checksum_update(tcp->cksum, + ip->src_addr, + data->addr, + tcp->src_port, + data->port); + + ip->src_addr = data->addr; + ip->hdr_checksum = ip_cksum; + tcp->src_port = data->port; + tcp->cksum = tcp_cksum; + } else { + struct udp_hdr *udp = (struct udp_hdr *) &ip[1]; + uint16_t ip_cksum, udp_cksum; + + ip_cksum = nat_ipv4_checksum_update(ip->hdr_checksum, + ip->src_addr, + data->addr); + + udp_cksum = nat_ipv4_tcp_udp_checksum_update(udp->dgram_cksum, + ip->src_addr, + data->addr, + udp->src_port, + data->port); + + ip->src_addr = data->addr; + ip->hdr_checksum = ip_cksum; + udp->src_port = data->port; + if (udp->dgram_cksum) + udp->dgram_cksum = udp_cksum; + } + } else { + if (cfg->proto == 0x6) { + struct tcp_hdr *tcp = (struct tcp_hdr *) &ip[1]; + uint16_t ip_cksum, tcp_cksum; + + ip_cksum = nat_ipv4_checksum_update(ip->hdr_checksum, + ip->dst_addr, + data->addr); + + tcp_cksum = nat_ipv4_tcp_udp_checksum_update(tcp->cksum, + ip->dst_addr, + data->addr, + tcp->dst_port, + data->port); + + ip->dst_addr = data->addr; + ip->hdr_checksum = ip_cksum; + tcp->dst_port = data->port; + tcp->cksum = tcp_cksum; + } else { + struct udp_hdr *udp = (struct udp_hdr *) &ip[1]; + uint16_t ip_cksum, udp_cksum; + + ip_cksum = nat_ipv4_checksum_update(ip->hdr_checksum, + ip->dst_addr, + data->addr); + + udp_cksum = nat_ipv4_tcp_udp_checksum_update(udp->dgram_cksum, + ip->dst_addr, + data->addr, + udp->dst_port, + data->port); + + ip->dst_addr = data->addr; + ip->hdr_checksum = ip_cksum; + udp->dst_port = data->port; + if (udp->dgram_cksum) + udp->dgram_cksum = udp_cksum; + } + } +} + +static __rte_always_inline void +pkt_ipv6_work_nat(struct ipv6_hdr *ip, + struct nat_ipv6_data *data, + struct rte_table_action_nat_config *cfg) +{ + if (cfg->source_nat) { + if (cfg->proto == 0x6) { + struct tcp_hdr *tcp = (struct tcp_hdr *) &ip[1]; + uint16_t tcp_cksum; + + tcp_cksum = nat_ipv6_tcp_udp_checksum_update(tcp->cksum, + (uint16_t *)ip->src_addr, + (uint16_t *)data->addr, + tcp->src_port, + data->port); + + rte_memcpy(ip->src_addr, data->addr, 16); + tcp->src_port = data->port; + tcp->cksum = tcp_cksum; + } else { + struct udp_hdr *udp = (struct udp_hdr *) &ip[1]; + uint16_t udp_cksum; + + udp_cksum = nat_ipv6_tcp_udp_checksum_update(udp->dgram_cksum, + (uint16_t *)ip->src_addr, + (uint16_t *)data->addr, + udp->src_port, + data->port); + + rte_memcpy(ip->src_addr, data->addr, 16); + udp->src_port = data->port; + udp->dgram_cksum = udp_cksum; + } + } else { + if (cfg->proto == 0x6) { + struct tcp_hdr *tcp = (struct tcp_hdr *) &ip[1]; + uint16_t tcp_cksum; + + tcp_cksum = nat_ipv6_tcp_udp_checksum_update(tcp->cksum, + (uint16_t *)ip->dst_addr, + (uint16_t *)data->addr, + tcp->dst_port, + data->port); + + rte_memcpy(ip->dst_addr, data->addr, 16); + tcp->dst_port = data->port; + tcp->cksum = tcp_cksum; + } else { + struct udp_hdr *udp = (struct udp_hdr *) &ip[1]; + uint16_t udp_cksum; + + udp_cksum = nat_ipv6_tcp_udp_checksum_update(udp->dgram_cksum, + (uint16_t *)ip->dst_addr, + (uint16_t *)data->addr, + udp->dst_port, + data->port); + + rte_memcpy(ip->dst_addr, data->addr, 16); + udp->dst_port = data->port; + udp->dgram_cksum = udp_cksum; + } + } +} + +/** + * RTE_TABLE_ACTION_TTL + */ +static int +ttl_cfg_check(struct rte_table_action_ttl_config *ttl) +{ + if (ttl->drop == 0) + return -ENOTSUP; + + return 0; +} + +struct ttl_data { + uint32_t n_packets; +} __attribute__((__packed__)); + +#define TTL_INIT(data, decrement) \ + ((data)->n_packets = (decrement) ? 1 : 0) + +#define TTL_DEC_GET(data) \ + ((uint8_t)((data)->n_packets & 1)) + +#define TTL_STATS_RESET(data) \ + ((data)->n_packets = ((data)->n_packets & 1)) + +#define TTL_STATS_READ(data) \ + ((data)->n_packets >> 1) + +#define TTL_STATS_ADD(data, value) \ + ((data)->n_packets = \ + (((((data)->n_packets >> 1) + (value)) << 1) | \ + ((data)->n_packets & 1))) + +static int +ttl_apply(void *data, + struct rte_table_action_ttl_params *p) +{ + struct ttl_data *d = data; + + TTL_INIT(d, p->decrement); + + return 0; +} + +static __rte_always_inline uint64_t +pkt_ipv4_work_ttl(struct ipv4_hdr *ip, + struct ttl_data *data) +{ + uint32_t drop; + uint16_t cksum = ip->hdr_checksum; + uint8_t ttl = ip->time_to_live; + uint8_t ttl_diff = TTL_DEC_GET(data); + + cksum += ttl_diff; + ttl -= ttl_diff; + + ip->hdr_checksum = cksum; + ip->time_to_live = ttl; + + drop = (ttl == 0) ? 1 : 0; + TTL_STATS_ADD(data, drop); + + return drop; +} + +static __rte_always_inline uint64_t +pkt_ipv6_work_ttl(struct ipv6_hdr *ip, + struct ttl_data *data) +{ + uint32_t drop; + uint8_t ttl = ip->hop_limits; + uint8_t ttl_diff = TTL_DEC_GET(data); + + ttl -= ttl_diff; + + ip->hop_limits = ttl; + + drop = (ttl == 0) ? 1 : 0; + TTL_STATS_ADD(data, drop); + + return drop; +} + +/** + * RTE_TABLE_ACTION_STATS + */ +static int +stats_cfg_check(struct rte_table_action_stats_config *stats) +{ + if ((stats->n_packets_enabled == 0) && (stats->n_bytes_enabled == 0)) + return -EINVAL; + + return 0; +} + +struct stats_data { + uint64_t n_packets; + uint64_t n_bytes; +} __attribute__((__packed__)); + +static int +stats_apply(struct stats_data *data, + struct rte_table_action_stats_params *p) +{ + data->n_packets = p->n_packets; + data->n_bytes = p->n_bytes; + + return 0; +} + +static __rte_always_inline void +pkt_work_stats(struct stats_data *data, + uint16_t total_length) +{ + data->n_packets++; + data->n_bytes += total_length; +} + +/** + * RTE_TABLE_ACTION_TIME + */ +struct time_data { + uint64_t time; +} __attribute__((__packed__)); + +static int +time_apply(struct time_data *data, + struct rte_table_action_time_params *p) +{ + data->time = p->time; + return 0; +} + +static __rte_always_inline void +pkt_work_time(struct time_data *data, + uint64_t time) +{ + data->time = time; +} + +/** + * Action profile + */ +static int +action_valid(enum rte_table_action_type action) +{ + switch (action) { + case RTE_TABLE_ACTION_FWD: + case RTE_TABLE_ACTION_LB: + case RTE_TABLE_ACTION_MTR: + case RTE_TABLE_ACTION_TM: + case RTE_TABLE_ACTION_ENCAP: + case RTE_TABLE_ACTION_NAT: + case RTE_TABLE_ACTION_TTL: + case RTE_TABLE_ACTION_STATS: + case RTE_TABLE_ACTION_TIME: + return 1; + default: + return 0; + } +} + + +#define RTE_TABLE_ACTION_MAX 64 + +struct ap_config { + uint64_t action_mask; + struct rte_table_action_common_config common; + struct rte_table_action_lb_config lb; + struct rte_table_action_mtr_config mtr; + struct rte_table_action_tm_config tm; + struct rte_table_action_encap_config encap; + struct rte_table_action_nat_config nat; + struct rte_table_action_ttl_config ttl; + struct rte_table_action_stats_config stats; +}; + +static size_t +action_cfg_size(enum rte_table_action_type action) +{ + switch (action) { + case RTE_TABLE_ACTION_LB: + return sizeof(struct rte_table_action_lb_config); + case RTE_TABLE_ACTION_MTR: + return sizeof(struct rte_table_action_mtr_config); + case RTE_TABLE_ACTION_TM: + return sizeof(struct rte_table_action_tm_config); + case RTE_TABLE_ACTION_ENCAP: + return sizeof(struct rte_table_action_encap_config); + case RTE_TABLE_ACTION_NAT: + return sizeof(struct rte_table_action_nat_config); + case RTE_TABLE_ACTION_TTL: + return sizeof(struct rte_table_action_ttl_config); + case RTE_TABLE_ACTION_STATS: + return sizeof(struct rte_table_action_stats_config); + default: + return 0; + } +} + +static void* +action_cfg_get(struct ap_config *ap_config, + enum rte_table_action_type type) +{ + switch (type) { + case RTE_TABLE_ACTION_LB: + return &ap_config->lb; + + case RTE_TABLE_ACTION_MTR: + return &ap_config->mtr; + + case RTE_TABLE_ACTION_TM: + return &ap_config->tm; + + case RTE_TABLE_ACTION_ENCAP: + return &ap_config->encap; + + case RTE_TABLE_ACTION_NAT: + return &ap_config->nat; + + case RTE_TABLE_ACTION_TTL: + return &ap_config->ttl; + + case RTE_TABLE_ACTION_STATS: + return &ap_config->stats; + + default: + return NULL; + } +} + +static void +action_cfg_set(struct ap_config *ap_config, + enum rte_table_action_type type, + void *action_cfg) +{ + void *dst = action_cfg_get(ap_config, type); + + if (dst) + memcpy(dst, action_cfg, action_cfg_size(type)); + + ap_config->action_mask |= 1LLU << type; +} + +struct ap_data { + size_t offset[RTE_TABLE_ACTION_MAX]; + size_t total_size; +}; + +static size_t +action_data_size(enum rte_table_action_type action, + struct ap_config *ap_config) +{ + switch (action) { + case RTE_TABLE_ACTION_FWD: + return sizeof(struct fwd_data); + + case RTE_TABLE_ACTION_LB: + return sizeof(struct lb_data); + + case RTE_TABLE_ACTION_MTR: + return mtr_data_size(&ap_config->mtr); + + case RTE_TABLE_ACTION_TM: + return sizeof(struct tm_data); + + case RTE_TABLE_ACTION_ENCAP: + return encap_data_size(&ap_config->encap); + + case RTE_TABLE_ACTION_NAT: + return nat_data_size(&ap_config->nat, + &ap_config->common); + + case RTE_TABLE_ACTION_TTL: + return sizeof(struct ttl_data); + + case RTE_TABLE_ACTION_STATS: + return sizeof(struct stats_data); + + case RTE_TABLE_ACTION_TIME: + return sizeof(struct time_data); + + default: + return 0; + } +} + + +static void +action_data_offset_set(struct ap_data *ap_data, + struct ap_config *ap_config) +{ + uint64_t action_mask = ap_config->action_mask; + size_t offset; + uint32_t action; + + memset(ap_data->offset, 0, sizeof(ap_data->offset)); + + offset = 0; + for (action = 0; action < RTE_TABLE_ACTION_MAX; action++) + if (action_mask & (1LLU << action)) { + ap_data->offset[action] = offset; + offset += action_data_size((enum rte_table_action_type)action, + ap_config); + } + + ap_data->total_size = offset; +} + +struct rte_table_action_profile { + struct ap_config cfg; + struct ap_data data; + int frozen; +}; + +struct rte_table_action_profile * +rte_table_action_profile_create(struct rte_table_action_common_config *common) +{ + struct rte_table_action_profile *ap; + + /* Check input arguments */ + if (common == NULL) + return NULL; + + /* Memory allocation */ + ap = calloc(1, sizeof(struct rte_table_action_profile)); + if (ap == NULL) + return NULL; + + /* Initialization */ + memcpy(&ap->cfg.common, common, sizeof(*common)); + + return ap; +} + + +int +rte_table_action_profile_action_register(struct rte_table_action_profile *profile, + enum rte_table_action_type type, + void *action_config) +{ + int status; + + /* Check input arguments */ + if ((profile == NULL) || + profile->frozen || + (action_valid(type) == 0) || + (profile->cfg.action_mask & (1LLU << type)) || + ((action_cfg_size(type) == 0) && action_config) || + (action_cfg_size(type) && (action_config == NULL))) + return -EINVAL; + + switch (type) { + case RTE_TABLE_ACTION_LB: + status = lb_cfg_check(action_config); + break; + + case RTE_TABLE_ACTION_MTR: + status = mtr_cfg_check(action_config); + break; + + case RTE_TABLE_ACTION_TM: + status = tm_cfg_check(action_config); + break; + + case RTE_TABLE_ACTION_ENCAP: + status = encap_cfg_check(action_config); + break; + + case RTE_TABLE_ACTION_NAT: + status = nat_cfg_check(action_config); + break; + + case RTE_TABLE_ACTION_TTL: + status = ttl_cfg_check(action_config); + break; + + case RTE_TABLE_ACTION_STATS: + status = stats_cfg_check(action_config); + break; + + default: + status = 0; + break; + } + + if (status) + return status; + + /* Action enable */ + action_cfg_set(&profile->cfg, type, action_config); + + return 0; +} + +int +rte_table_action_profile_freeze(struct rte_table_action_profile *profile) +{ + if (profile->frozen) + return -EBUSY; + + profile->cfg.action_mask |= 1LLU << RTE_TABLE_ACTION_FWD; + action_data_offset_set(&profile->data, &profile->cfg); + profile->frozen = 1; + + return 0; +} + +int +rte_table_action_profile_free(struct rte_table_action_profile *profile) +{ + if (profile == NULL) + return 0; + + free(profile); + return 0; +} + +/** + * Action + */ +#define METER_PROFILES_MAX 32 + +struct rte_table_action { + struct ap_config cfg; + struct ap_data data; + struct dscp_table_data dscp_table; + struct meter_profile_data mp[METER_PROFILES_MAX]; +}; + +struct rte_table_action * +rte_table_action_create(struct rte_table_action_profile *profile, + uint32_t socket_id) +{ + struct rte_table_action *action; + + /* Check input arguments */ + if ((profile == NULL) || + (profile->frozen == 0)) + return NULL; + + /* Memory allocation */ + action = rte_zmalloc_socket(NULL, + sizeof(struct rte_table_action), + RTE_CACHE_LINE_SIZE, + socket_id); + if (action == NULL) + return NULL; + + /* Initialization */ + memcpy(&action->cfg, &profile->cfg, sizeof(profile->cfg)); + memcpy(&action->data, &profile->data, sizeof(profile->data)); + + return action; +} + +static __rte_always_inline void * +action_data_get(void *data, + struct rte_table_action *action, + enum rte_table_action_type type) +{ + size_t offset = action->data.offset[type]; + uint8_t *data_bytes = data; + + return &data_bytes[offset]; +} + +int +rte_table_action_apply(struct rte_table_action *action, + void *data, + enum rte_table_action_type type, + void *action_params) +{ + void *action_data; + + /* Check input arguments */ + if ((action == NULL) || + (data == NULL) || + (action_valid(type) == 0) || + ((action->cfg.action_mask & (1LLU << type)) == 0) || + (action_params == NULL)) + return -EINVAL; + + /* Data update */ + action_data = action_data_get(data, action, type); + + switch (type) { + case RTE_TABLE_ACTION_FWD: + return fwd_apply(action_data, + action_params); + + case RTE_TABLE_ACTION_LB: + return lb_apply(action_data, + action_params); + + case RTE_TABLE_ACTION_MTR: + return mtr_apply(action_data, + action_params, + &action->cfg.mtr, + action->mp, + RTE_DIM(action->mp)); + + case RTE_TABLE_ACTION_TM: + return tm_apply(action_data, + action_params, + &action->cfg.tm); + + case RTE_TABLE_ACTION_ENCAP: + return encap_apply(action_data, + action_params, + &action->cfg.encap, + &action->cfg.common); + + case RTE_TABLE_ACTION_NAT: + return nat_apply(action_data, + action_params, + &action->cfg.common); + + case RTE_TABLE_ACTION_TTL: + return ttl_apply(action_data, + action_params); + + case RTE_TABLE_ACTION_STATS: + return stats_apply(action_data, + action_params); + + case RTE_TABLE_ACTION_TIME: + return time_apply(action_data, + action_params); + + default: + return -EINVAL; + } +} + +int +rte_table_action_dscp_table_update(struct rte_table_action *action, + uint64_t dscp_mask, + struct rte_table_action_dscp_table *table) +{ + uint32_t i; + + /* Check input arguments */ + if ((action == NULL) || + ((action->cfg.action_mask & ((1LLU << RTE_TABLE_ACTION_MTR) | + (1LLU << RTE_TABLE_ACTION_TM))) == 0) || + (dscp_mask == 0) || + (table == NULL)) + return -EINVAL; + + for (i = 0; i < RTE_DIM(table->entry); i++) { + struct dscp_table_entry_data *data = + &action->dscp_table.entry[i]; + struct rte_table_action_dscp_table_entry *entry = + &table->entry[i]; + uint16_t queue_tc_color = + MBUF_SCHED_QUEUE_TC_COLOR(entry->tc_queue_id, + entry->tc_id, + entry->color); + + if ((dscp_mask & (1LLU << i)) == 0) + continue; + + data->color = entry->color; + data->tc = entry->tc_id; + data->queue_tc_color = queue_tc_color; + } + + return 0; +} + +int +rte_table_action_meter_profile_add(struct rte_table_action *action, + uint32_t meter_profile_id, + struct rte_table_action_meter_profile *profile) +{ + struct meter_profile_data *mp_data; + uint32_t status; + + /* Check input arguments */ + if ((action == NULL) || + ((action->cfg.action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) == 0) || + (profile == NULL)) + return -EINVAL; + + if (profile->alg != RTE_TABLE_ACTION_METER_TRTCM) + return -ENOTSUP; + + mp_data = meter_profile_data_find(action->mp, + RTE_DIM(action->mp), + meter_profile_id); + if (mp_data) + return -EEXIST; + + mp_data = meter_profile_data_find_unused(action->mp, + RTE_DIM(action->mp)); + if (!mp_data) + return -ENOSPC; + + /* Install new profile */ + status = rte_meter_trtcm_profile_config(&mp_data->profile, + &profile->trtcm); + if (status) + return status; + + mp_data->profile_id = meter_profile_id; + mp_data->valid = 1; + + return 0; +} + +int +rte_table_action_meter_profile_delete(struct rte_table_action *action, + uint32_t meter_profile_id) +{ + struct meter_profile_data *mp_data; + + /* Check input arguments */ + if ((action == NULL) || + ((action->cfg.action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) == 0)) + return -EINVAL; + + mp_data = meter_profile_data_find(action->mp, + RTE_DIM(action->mp), + meter_profile_id); + if (!mp_data) + return 0; + + /* Uninstall profile */ + mp_data->valid = 0; + + return 0; +} + +int +rte_table_action_meter_read(struct rte_table_action *action, + void *data, + uint32_t tc_mask, + struct rte_table_action_mtr_counters *stats, + int clear) +{ + struct mtr_trtcm_data *mtr_data; + uint32_t i; + + /* Check input arguments */ + if ((action == NULL) || + ((action->cfg.action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) == 0) || + (data == NULL) || + (tc_mask > RTE_LEN2MASK(action->cfg.mtr.n_tc, uint32_t))) + return -EINVAL; + + mtr_data = action_data_get(data, action, RTE_TABLE_ACTION_MTR); + + /* Read */ + if (stats) { + for (i = 0; i < RTE_TABLE_ACTION_TC_MAX; i++) { + struct rte_table_action_mtr_counters_tc *dst = + &stats->stats[i]; + struct mtr_trtcm_data *src = &mtr_data[i]; + + if ((tc_mask & (1 << i)) == 0) + continue; + + dst->n_packets[e_RTE_METER_GREEN] = + mtr_trtcm_data_stats_get(src, e_RTE_METER_GREEN); + + dst->n_packets[e_RTE_METER_YELLOW] = + mtr_trtcm_data_stats_get(src, e_RTE_METER_YELLOW); + + dst->n_packets[e_RTE_METER_RED] = + mtr_trtcm_data_stats_get(src, e_RTE_METER_RED); + + dst->n_packets_valid = 1; + dst->n_bytes_valid = 0; + } + + stats->tc_mask = tc_mask; + } + + /* Clear */ + if (clear) + for (i = 0; i < RTE_TABLE_ACTION_TC_MAX; i++) { + struct mtr_trtcm_data *src = &mtr_data[i]; + + if ((tc_mask & (1 << i)) == 0) + continue; + + mtr_trtcm_data_stats_reset(src, e_RTE_METER_GREEN); + mtr_trtcm_data_stats_reset(src, e_RTE_METER_YELLOW); + mtr_trtcm_data_stats_reset(src, e_RTE_METER_RED); + } + + + return 0; +} + +int +rte_table_action_ttl_read(struct rte_table_action *action, + void *data, + struct rte_table_action_ttl_counters *stats, + int clear) +{ + struct ttl_data *ttl_data; + + /* Check input arguments */ + if ((action == NULL) || + ((action->cfg.action_mask & + (1LLU << RTE_TABLE_ACTION_TTL)) == 0) || + (data == NULL)) + return -EINVAL; + + ttl_data = action_data_get(data, action, RTE_TABLE_ACTION_TTL); + + /* Read */ + if (stats) + stats->n_packets = TTL_STATS_READ(ttl_data); + + /* Clear */ + if (clear) + TTL_STATS_RESET(ttl_data); + + return 0; +} + +int +rte_table_action_stats_read(struct rte_table_action *action, + void *data, + struct rte_table_action_stats_counters *stats, + int clear) +{ + struct stats_data *stats_data; + + /* Check input arguments */ + if ((action == NULL) || + ((action->cfg.action_mask & + (1LLU << RTE_TABLE_ACTION_STATS)) == 0) || + (data == NULL)) + return -EINVAL; + + stats_data = action_data_get(data, action, + RTE_TABLE_ACTION_STATS); + + /* Read */ + if (stats) { + stats->n_packets = stats_data->n_packets; + stats->n_bytes = stats_data->n_bytes; + stats->n_packets_valid = 1; + stats->n_bytes_valid = 1; + } + + /* Clear */ + if (clear) { + stats_data->n_packets = 0; + stats_data->n_bytes = 0; + } + + return 0; +} + +int +rte_table_action_time_read(struct rte_table_action *action, + void *data, + uint64_t *timestamp) +{ + struct time_data *time_data; + + /* Check input arguments */ + if ((action == NULL) || + ((action->cfg.action_mask & + (1LLU << RTE_TABLE_ACTION_TIME)) == 0) || + (data == NULL) || + (timestamp == NULL)) + return -EINVAL; + + time_data = action_data_get(data, action, RTE_TABLE_ACTION_TIME); + + /* Read */ + *timestamp = time_data->time; + + return 0; +} + +static __rte_always_inline uint64_t +pkt_work(struct rte_mbuf *mbuf, + struct rte_pipeline_table_entry *table_entry, + uint64_t time, + struct rte_table_action *action, + struct ap_config *cfg) +{ + uint64_t drop_mask = 0; + + uint32_t ip_offset = action->cfg.common.ip_offset; + void *ip = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ip_offset); + + uint32_t dscp; + uint16_t total_length; + + if (cfg->common.ip_version) { + struct ipv4_hdr *hdr = ip; + + dscp = hdr->type_of_service >> 2; + total_length = rte_ntohs(hdr->total_length); + } else { + struct ipv6_hdr *hdr = ip; + + dscp = (rte_ntohl(hdr->vtc_flow) & 0x0F600000) >> 18; + total_length = + rte_ntohs(hdr->payload_len) + sizeof(struct ipv6_hdr); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_LB)) { + void *data = + action_data_get(table_entry, action, RTE_TABLE_ACTION_LB); + + pkt_work_lb(mbuf, + data, + &cfg->lb); + } + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) { + void *data = + action_data_get(table_entry, action, RTE_TABLE_ACTION_MTR); + + drop_mask |= pkt_work_mtr(mbuf, + data, + &action->dscp_table, + action->mp, + time, + dscp, + total_length); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TM)) { + void *data = + action_data_get(table_entry, action, RTE_TABLE_ACTION_TM); + + pkt_work_tm(mbuf, + data, + &action->dscp_table, + dscp); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_ENCAP)) { + void *data = + action_data_get(table_entry, action, RTE_TABLE_ACTION_ENCAP); + + pkt_work_encap(mbuf, + data, + &cfg->encap, + ip, + total_length, + ip_offset); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_NAT)) { + void *data = + action_data_get(table_entry, action, RTE_TABLE_ACTION_NAT); + + if (cfg->common.ip_version) + pkt_ipv4_work_nat(ip, data, &cfg->nat); + else + pkt_ipv6_work_nat(ip, data, &cfg->nat); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TTL)) { + void *data = + action_data_get(table_entry, action, RTE_TABLE_ACTION_TTL); + + if (cfg->common.ip_version) + drop_mask |= pkt_ipv4_work_ttl(ip, data); + else + drop_mask |= pkt_ipv6_work_ttl(ip, data); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_STATS)) { + void *data = + action_data_get(table_entry, action, RTE_TABLE_ACTION_STATS); + + pkt_work_stats(data, total_length); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TIME)) { + void *data = + action_data_get(table_entry, action, RTE_TABLE_ACTION_TIME); + + pkt_work_time(data, time); + } + + return drop_mask; +} + +static __rte_always_inline uint64_t +pkt4_work(struct rte_mbuf **mbufs, + struct rte_pipeline_table_entry **table_entries, + uint64_t time, + struct rte_table_action *action, + struct ap_config *cfg) +{ + uint64_t drop_mask0 = 0; + uint64_t drop_mask1 = 0; + uint64_t drop_mask2 = 0; + uint64_t drop_mask3 = 0; + + struct rte_mbuf *mbuf0 = mbufs[0]; + struct rte_mbuf *mbuf1 = mbufs[1]; + struct rte_mbuf *mbuf2 = mbufs[2]; + struct rte_mbuf *mbuf3 = mbufs[3]; + + struct rte_pipeline_table_entry *table_entry0 = table_entries[0]; + struct rte_pipeline_table_entry *table_entry1 = table_entries[1]; + struct rte_pipeline_table_entry *table_entry2 = table_entries[2]; + struct rte_pipeline_table_entry *table_entry3 = table_entries[3]; + + uint32_t ip_offset = action->cfg.common.ip_offset; + void *ip0 = RTE_MBUF_METADATA_UINT32_PTR(mbuf0, ip_offset); + void *ip1 = RTE_MBUF_METADATA_UINT32_PTR(mbuf1, ip_offset); + void *ip2 = RTE_MBUF_METADATA_UINT32_PTR(mbuf2, ip_offset); + void *ip3 = RTE_MBUF_METADATA_UINT32_PTR(mbuf3, ip_offset); + + uint32_t dscp0, dscp1, dscp2, dscp3; + uint16_t total_length0, total_length1, total_length2, total_length3; + + if (cfg->common.ip_version) { + struct ipv4_hdr *hdr0 = ip0; + struct ipv4_hdr *hdr1 = ip1; + struct ipv4_hdr *hdr2 = ip2; + struct ipv4_hdr *hdr3 = ip3; + + dscp0 = hdr0->type_of_service >> 2; + dscp1 = hdr1->type_of_service >> 2; + dscp2 = hdr2->type_of_service >> 2; + dscp3 = hdr3->type_of_service >> 2; + + total_length0 = rte_ntohs(hdr0->total_length); + total_length1 = rte_ntohs(hdr1->total_length); + total_length2 = rte_ntohs(hdr2->total_length); + total_length3 = rte_ntohs(hdr3->total_length); + } else { + struct ipv6_hdr *hdr0 = ip0; + struct ipv6_hdr *hdr1 = ip1; + struct ipv6_hdr *hdr2 = ip2; + struct ipv6_hdr *hdr3 = ip3; + + dscp0 = (rte_ntohl(hdr0->vtc_flow) & 0x0F600000) >> 18; + dscp1 = (rte_ntohl(hdr1->vtc_flow) & 0x0F600000) >> 18; + dscp2 = (rte_ntohl(hdr2->vtc_flow) & 0x0F600000) >> 18; + dscp3 = (rte_ntohl(hdr3->vtc_flow) & 0x0F600000) >> 18; + + total_length0 = + rte_ntohs(hdr0->payload_len) + sizeof(struct ipv6_hdr); + total_length1 = + rte_ntohs(hdr1->payload_len) + sizeof(struct ipv6_hdr); + total_length2 = + rte_ntohs(hdr2->payload_len) + sizeof(struct ipv6_hdr); + total_length3 = + rte_ntohs(hdr3->payload_len) + sizeof(struct ipv6_hdr); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_LB)) { + void *data0 = + action_data_get(table_entry0, action, RTE_TABLE_ACTION_LB); + void *data1 = + action_data_get(table_entry1, action, RTE_TABLE_ACTION_LB); + void *data2 = + action_data_get(table_entry2, action, RTE_TABLE_ACTION_LB); + void *data3 = + action_data_get(table_entry3, action, RTE_TABLE_ACTION_LB); + + pkt_work_lb(mbuf0, + data0, + &cfg->lb); + + pkt_work_lb(mbuf1, + data1, + &cfg->lb); + + pkt_work_lb(mbuf2, + data2, + &cfg->lb); + + pkt_work_lb(mbuf3, + data3, + &cfg->lb); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) { + void *data0 = + action_data_get(table_entry0, action, RTE_TABLE_ACTION_MTR); + void *data1 = + action_data_get(table_entry1, action, RTE_TABLE_ACTION_MTR); + void *data2 = + action_data_get(table_entry2, action, RTE_TABLE_ACTION_MTR); + void *data3 = + action_data_get(table_entry3, action, RTE_TABLE_ACTION_MTR); + + drop_mask0 |= pkt_work_mtr(mbuf0, + data0, + &action->dscp_table, + action->mp, + time, + dscp0, + total_length0); + + drop_mask1 |= pkt_work_mtr(mbuf1, + data1, + &action->dscp_table, + action->mp, + time, + dscp1, + total_length1); + + drop_mask2 |= pkt_work_mtr(mbuf2, + data2, + &action->dscp_table, + action->mp, + time, + dscp2, + total_length2); + + drop_mask3 |= pkt_work_mtr(mbuf3, + data3, + &action->dscp_table, + action->mp, + time, + dscp3, + total_length3); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TM)) { + void *data0 = + action_data_get(table_entry0, action, RTE_TABLE_ACTION_TM); + void *data1 = + action_data_get(table_entry1, action, RTE_TABLE_ACTION_TM); + void *data2 = + action_data_get(table_entry2, action, RTE_TABLE_ACTION_TM); + void *data3 = + action_data_get(table_entry3, action, RTE_TABLE_ACTION_TM); + + pkt_work_tm(mbuf0, + data0, + &action->dscp_table, + dscp0); + + pkt_work_tm(mbuf1, + data1, + &action->dscp_table, + dscp1); + + pkt_work_tm(mbuf2, + data2, + &action->dscp_table, + dscp2); + + pkt_work_tm(mbuf3, + data3, + &action->dscp_table, + dscp3); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_ENCAP)) { + void *data0 = + action_data_get(table_entry0, action, RTE_TABLE_ACTION_ENCAP); + void *data1 = + action_data_get(table_entry1, action, RTE_TABLE_ACTION_ENCAP); + void *data2 = + action_data_get(table_entry2, action, RTE_TABLE_ACTION_ENCAP); + void *data3 = + action_data_get(table_entry3, action, RTE_TABLE_ACTION_ENCAP); + + pkt_work_encap(mbuf0, + data0, + &cfg->encap, + ip0, + total_length0, + ip_offset); + + pkt_work_encap(mbuf1, + data1, + &cfg->encap, + ip1, + total_length1, + ip_offset); + + pkt_work_encap(mbuf2, + data2, + &cfg->encap, + ip2, + total_length2, + ip_offset); + + pkt_work_encap(mbuf3, + data3, + &cfg->encap, + ip3, + total_length3, + ip_offset); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_NAT)) { + void *data0 = + action_data_get(table_entry0, action, RTE_TABLE_ACTION_NAT); + void *data1 = + action_data_get(table_entry1, action, RTE_TABLE_ACTION_NAT); + void *data2 = + action_data_get(table_entry2, action, RTE_TABLE_ACTION_NAT); + void *data3 = + action_data_get(table_entry3, action, RTE_TABLE_ACTION_NAT); + + if (cfg->common.ip_version) { + pkt_ipv4_work_nat(ip0, data0, &cfg->nat); + pkt_ipv4_work_nat(ip1, data1, &cfg->nat); + pkt_ipv4_work_nat(ip2, data2, &cfg->nat); + pkt_ipv4_work_nat(ip3, data3, &cfg->nat); + } else { + pkt_ipv6_work_nat(ip0, data0, &cfg->nat); + pkt_ipv6_work_nat(ip1, data1, &cfg->nat); + pkt_ipv6_work_nat(ip2, data2, &cfg->nat); + pkt_ipv6_work_nat(ip3, data3, &cfg->nat); + } + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TTL)) { + void *data0 = + action_data_get(table_entry0, action, RTE_TABLE_ACTION_TTL); + void *data1 = + action_data_get(table_entry1, action, RTE_TABLE_ACTION_TTL); + void *data2 = + action_data_get(table_entry2, action, RTE_TABLE_ACTION_TTL); + void *data3 = + action_data_get(table_entry3, action, RTE_TABLE_ACTION_TTL); + + if (cfg->common.ip_version) { + drop_mask0 |= pkt_ipv4_work_ttl(ip0, data0); + drop_mask1 |= pkt_ipv4_work_ttl(ip1, data1); + drop_mask2 |= pkt_ipv4_work_ttl(ip2, data2); + drop_mask3 |= pkt_ipv4_work_ttl(ip3, data3); + } else { + drop_mask0 |= pkt_ipv6_work_ttl(ip0, data0); + drop_mask1 |= pkt_ipv6_work_ttl(ip1, data1); + drop_mask2 |= pkt_ipv6_work_ttl(ip2, data2); + drop_mask3 |= pkt_ipv6_work_ttl(ip3, data3); + } + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_STATS)) { + void *data0 = + action_data_get(table_entry0, action, RTE_TABLE_ACTION_STATS); + void *data1 = + action_data_get(table_entry1, action, RTE_TABLE_ACTION_STATS); + void *data2 = + action_data_get(table_entry2, action, RTE_TABLE_ACTION_STATS); + void *data3 = + action_data_get(table_entry3, action, RTE_TABLE_ACTION_STATS); + + pkt_work_stats(data0, total_length0); + pkt_work_stats(data1, total_length1); + pkt_work_stats(data2, total_length2); + pkt_work_stats(data3, total_length3); + } + + if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TIME)) { + void *data0 = + action_data_get(table_entry0, action, RTE_TABLE_ACTION_TIME); + void *data1 = + action_data_get(table_entry1, action, RTE_TABLE_ACTION_TIME); + void *data2 = + action_data_get(table_entry2, action, RTE_TABLE_ACTION_TIME); + void *data3 = + action_data_get(table_entry3, action, RTE_TABLE_ACTION_TIME); + + pkt_work_time(data0, time); + pkt_work_time(data1, time); + pkt_work_time(data2, time); + pkt_work_time(data3, time); + } + + return drop_mask0 | + (drop_mask1 << 1) | + (drop_mask2 << 2) | + (drop_mask3 << 3); +} + +static __rte_always_inline int +ah(struct rte_pipeline *p, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + struct rte_pipeline_table_entry **entries, + struct rte_table_action *action, + struct ap_config *cfg) +{ + uint64_t pkts_drop_mask = 0; + uint64_t time = 0; + + if (cfg->action_mask & ((1LLU << RTE_TABLE_ACTION_MTR) | + (1LLU << RTE_TABLE_ACTION_TIME))) + time = rte_rdtsc(); + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) { + uint64_t drop_mask; + + drop_mask = pkt4_work(&pkts[i], + &entries[i], + time, + action, + cfg); + + pkts_drop_mask |= drop_mask << i; + } + + for ( ; i < n_pkts; i++) { + uint64_t drop_mask; + + drop_mask = pkt_work(pkts[i], + entries[i], + time, + action, + cfg); + + pkts_drop_mask |= drop_mask << i; + } + } else + for ( ; pkts_mask; ) { + uint32_t pos = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pos; + uint64_t drop_mask; + + drop_mask = pkt_work(pkts[pos], + entries[pos], + time, + action, + cfg); + + pkts_mask &= ~pkt_mask; + pkts_drop_mask |= drop_mask << pos; + } + + rte_pipeline_ah_packet_drop(p, pkts_drop_mask); + + return 0; +} + +static int +ah_default(struct rte_pipeline *p, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + struct rte_pipeline_table_entry **entries, + void *arg) +{ + struct rte_table_action *action = arg; + + return ah(p, + pkts, + pkts_mask, + entries, + action, + &action->cfg); +} + +static rte_pipeline_table_action_handler_hit +ah_selector(struct rte_table_action *action) +{ + if (action->cfg.action_mask == (1LLU << RTE_TABLE_ACTION_FWD)) + return NULL; + + return ah_default; +} + +int +rte_table_action_table_params_get(struct rte_table_action *action, + struct rte_pipeline_table_params *params) +{ + rte_pipeline_table_action_handler_hit f_action_hit; + uint32_t total_size; + + /* Check input arguments */ + if ((action == NULL) || + (params == NULL)) + return -EINVAL; + + f_action_hit = ah_selector(action); + total_size = rte_align32pow2(action->data.total_size); + + /* Fill in params */ + params->f_action_hit = f_action_hit; + params->f_action_miss = NULL; + params->arg_ah = (f_action_hit) ? action : NULL; + params->action_data_size = total_size - + sizeof(struct rte_pipeline_table_entry); + + return 0; +} + +int +rte_table_action_free(struct rte_table_action *action) +{ + if (action == NULL) + return 0; + + rte_free(action); + + return 0; +} diff --git a/lib/librte_pipeline/rte_table_action.h b/lib/librte_pipeline/rte_table_action.h new file mode 100644 index 00000000..c7f751aa --- /dev/null +++ b/lib/librte_pipeline/rte_table_action.h @@ -0,0 +1,905 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef __INCLUDE_RTE_TABLE_ACTION_H__ +#define __INCLUDE_RTE_TABLE_ACTION_H__ + +/** + * @file + * RTE Pipeline Table Actions + * + * This API provides a common set of actions for pipeline tables to speed up + * application development. + * + * Each match-action rule added to a pipeline table has associated data that + * stores the action context. This data is input to the table action handler + * called for every input packet that hits the rule as part of the table lookup + * during the pipeline execution. The pipeline library allows the user to define + * his own table actions by providing customized table action handlers (table + * lookup) and complete freedom of setting the rules and their data (table rule + * add/delete). While the user can still follow this process, this API is + * intended to provide a quicker development alternative for a set of predefined + * actions. + * + * The typical steps to use this API are: + * - Define a table action profile. This is a configuration template that can + * potentially be shared by multiple tables from the same or different + * pipelines, with different tables from the same pipeline likely to use + * different action profiles. For every table using a given action profile, + * the profile defines the set of actions and the action configuration to be + * implemented for all the table rules. API functions: + * rte_table_action_profile_create(), + * rte_table_action_profile_action_register(), + * rte_table_action_profile_freeze(). + * + * - Instantiate the table action profile to create table action objects. Each + * pipeline table has its own table action object. API functions: + * rte_table_action_create(). + * + * - Use the table action object to generate the pipeline table action handlers + * (invoked by the pipeline table lookup operation). API functions: + * rte_table_action_table_params_get(). + * + * - Use the table action object to generate the rule data (for the pipeline + * table rule add operation) based on given action parameters. API functions: + * rte_table_action_apply(). + * + * - Use the table action object to read action data (e.g. stats counters) for + * any given rule. API functions: rte_table_action_XYZ_read(). + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include +#include +#include +#include + +#include "rte_pipeline.h" + +/** Table actions. */ +enum rte_table_action_type { + /** Forward to next pipeline table, output port or drop. */ + RTE_TABLE_ACTION_FWD = 0, + + /** Load balance. */ + RTE_TABLE_ACTION_LB, + + /** Traffic Metering and Policing. */ + RTE_TABLE_ACTION_MTR, + + /** Traffic Management. */ + RTE_TABLE_ACTION_TM, + + /** Packet encapsulations. */ + RTE_TABLE_ACTION_ENCAP, + + /** Network Address Translation (NAT). */ + RTE_TABLE_ACTION_NAT, + + /** Time to Live (TTL) update. */ + RTE_TABLE_ACTION_TTL, + + /** Statistics. */ + RTE_TABLE_ACTION_STATS, + + /** Timestamp. */ + RTE_TABLE_ACTION_TIME, +}; + +/** Common action configuration (per table action profile). */ +struct rte_table_action_common_config { + /** Input packet Internet Protocol (IP) version. Non-zero for IPv4, zero + * for IPv6. + */ + int ip_version; + + /** IP header offset within the input packet buffer. Offset 0 points to + * the first byte of the MBUF structure. + */ + uint32_t ip_offset; +}; + +/** + * RTE_TABLE_ACTION_FWD + */ +/** Forward action parameters (per table rule). */ +struct rte_table_action_fwd_params { + /** Forward action. */ + enum rte_pipeline_action action; + + /** Pipeline table ID or output port ID. */ + uint32_t id; +}; + +/** + * RTE_TABLE_ACTION_LB + */ +/** Load balance key size min (number of bytes). */ +#define RTE_TABLE_ACTION_LB_KEY_SIZE_MIN 8 + +/** Load balance key size max (number of bytes). */ +#define RTE_TABLE_ACTION_LB_KEY_SIZE_MAX 64 + +/** Load balance table size. */ +#define RTE_TABLE_ACTION_LB_TABLE_SIZE 8 + +/** Load balance action configuration (per table action profile). */ +struct rte_table_action_lb_config { + /** Key size (number of bytes). */ + uint32_t key_size; + + /** Key offset within the input packet buffer. Offset 0 points to the + * first byte of the MBUF structure. + */ + uint32_t key_offset; + + /** Key mask (*key_size* bytes are valid). */ + uint8_t key_mask[RTE_TABLE_ACTION_LB_KEY_SIZE_MAX]; + + /** Hash function. */ + rte_table_hash_op_hash f_hash; + + /** Seed value for *f_hash*. */ + uint64_t seed; + + /** Output value offset within the input packet buffer. Offset 0 points + * to the first byte of the MBUF structure. + */ + uint32_t out_offset; +}; + +/** Load balance action parameters (per table rule). */ +struct rte_table_action_lb_params { + /** Table defining the output values and their weights. The weights are + * set in 1/RTE_TABLE_ACTION_LB_TABLE_SIZE increments. To assign a + * weight of N/RTE_TABLE_ACTION_LB_TABLE_SIZE to a given output value + * (0 <= N <= RTE_TABLE_ACTION_LB_TABLE_SIZE), the same output value + * needs to show up exactly N times in this table. + */ + uint32_t out[RTE_TABLE_ACTION_LB_TABLE_SIZE]; +}; + +/** + * RTE_TABLE_ACTION_MTR + */ +/** Max number of traffic classes (TCs). */ +#define RTE_TABLE_ACTION_TC_MAX 4 + +/** Max number of queues per traffic class. */ +#define RTE_TABLE_ACTION_TC_QUEUE_MAX 4 + +/** Differentiated Services Code Point (DSCP) translation table entry. */ +struct rte_table_action_dscp_table_entry { + /** Traffic class. Used by the meter or the traffic management actions. + * Has to be strictly smaller than *RTE_TABLE_ACTION_TC_MAX*. Traffic + * class 0 is the highest priority. + */ + uint32_t tc_id; + + /** Traffic class queue. Used by the traffic management action. Has to + * be strictly smaller than *RTE_TABLE_ACTION_TC_QUEUE_MAX*. + */ + uint32_t tc_queue_id; + + /** Packet color. Used by the meter action as the packet input color + * for the color aware mode of the traffic metering algorithm. + */ + enum rte_meter_color color; +}; + +/** DSCP translation table. */ +struct rte_table_action_dscp_table { + /** Array of DSCP table entries */ + struct rte_table_action_dscp_table_entry entry[64]; +}; + +/** Supported traffic metering algorithms. */ +enum rte_table_action_meter_algorithm { + /** Single Rate Three Color Marker (srTCM) - IETF RFC 2697. */ + RTE_TABLE_ACTION_METER_SRTCM, + + /** Two Rate Three Color Marker (trTCM) - IETF RFC 2698. */ + RTE_TABLE_ACTION_METER_TRTCM, +}; + +/** Traffic metering profile (configuration template). */ +struct rte_table_action_meter_profile { + /** Traffic metering algorithm. */ + enum rte_table_action_meter_algorithm alg; + + RTE_STD_C11 + union { + /** Only valid when *alg* is set to srTCM - IETF RFC 2697. */ + struct rte_meter_srtcm_params srtcm; + + /** Only valid when *alg* is set to trTCM - IETF RFC 2698. */ + struct rte_meter_trtcm_params trtcm; + }; +}; + +/** Policer actions. */ +enum rte_table_action_policer { + /** Recolor the packet as green. */ + RTE_TABLE_ACTION_POLICER_COLOR_GREEN = 0, + + /** Recolor the packet as yellow. */ + RTE_TABLE_ACTION_POLICER_COLOR_YELLOW, + + /** Recolor the packet as red. */ + RTE_TABLE_ACTION_POLICER_COLOR_RED, + + /** Drop the packet. */ + RTE_TABLE_ACTION_POLICER_DROP, + + /** Number of policer actions. */ + RTE_TABLE_ACTION_POLICER_MAX +}; + +/** Meter action configuration per traffic class. */ +struct rte_table_action_mtr_tc_params { + /** Meter profile ID. */ + uint32_t meter_profile_id; + + /** Policer actions. */ + enum rte_table_action_policer policer[e_RTE_METER_COLORS]; +}; + +/** Meter action statistics counters per traffic class. */ +struct rte_table_action_mtr_counters_tc { + /** Number of packets per color at the output of the traffic metering + * and before the policer actions are executed. Only valid when + * *n_packets_valid* is non-zero. + */ + uint64_t n_packets[e_RTE_METER_COLORS]; + + /** Number of packet bytes per color at the output of the traffic + * metering and before the policer actions are executed. Only valid when + * *n_bytes_valid* is non-zero. + */ + uint64_t n_bytes[e_RTE_METER_COLORS]; + + /** When non-zero, the *n_packets* field is valid. */ + int n_packets_valid; + + /** When non-zero, the *n_bytes* field is valid. */ + int n_bytes_valid; +}; + +/** Meter action configuration (per table action profile). */ +struct rte_table_action_mtr_config { + /** Meter algorithm. */ + enum rte_table_action_meter_algorithm alg; + + /** Number of traffic classes. Each traffic class has its own traffic + * meter and policer instances. Needs to be equal to either 1 or to + * *RTE_TABLE_ACTION_TC_MAX*. + */ + uint32_t n_tc; + + /** When non-zero, the *n_packets* meter stats counter is enabled, + * otherwise it is disabled. + * + * @see struct rte_table_action_mtr_counters_tc + */ + int n_packets_enabled; + + /** When non-zero, the *n_bytes* meter stats counter is enabled, + * otherwise it is disabled. + * + * @see struct rte_table_action_mtr_counters_tc + */ + int n_bytes_enabled; +}; + +/** Meter action parameters (per table rule). */ +struct rte_table_action_mtr_params { + /** Traffic meter and policer parameters for each of the *tc_mask* + * traffic classes. + */ + struct rte_table_action_mtr_tc_params mtr[RTE_TABLE_ACTION_TC_MAX]; + + /** Bit mask defining which traffic class parameters are valid in *mtr*. + * If bit N is set in *tc_mask*, then parameters for traffic class N are + * valid in *mtr*. + */ + uint32_t tc_mask; +}; + +/** Meter action statistics counters (per table rule). */ +struct rte_table_action_mtr_counters { + /** Stats counters for each of the *tc_mask* traffic classes. */ + struct rte_table_action_mtr_counters_tc stats[RTE_TABLE_ACTION_TC_MAX]; + + /** Bit mask defining which traffic class parameters are valid in *mtr*. + * If bit N is set in *tc_mask*, then parameters for traffic class N are + * valid in *mtr*. + */ + uint32_t tc_mask; +}; + +/** + * RTE_TABLE_ACTION_TM + */ +/** Traffic management action configuration (per table action profile). */ +struct rte_table_action_tm_config { + /** Number of subports per port. */ + uint32_t n_subports_per_port; + + /** Number of pipes per subport. */ + uint32_t n_pipes_per_subport; +}; + +/** Traffic management action parameters (per table rule). */ +struct rte_table_action_tm_params { + /** Subport ID. */ + uint32_t subport_id; + + /** Pipe ID. */ + uint32_t pipe_id; +}; + +/** + * RTE_TABLE_ACTION_ENCAP + */ +/** Supported packet encapsulation types. */ +enum rte_table_action_encap_type { + /** IP -> { Ether | IP } */ + RTE_TABLE_ACTION_ENCAP_ETHER = 0, + + /** IP -> { Ether | VLAN | IP } */ + RTE_TABLE_ACTION_ENCAP_VLAN, + + /** IP -> { Ether | S-VLAN | C-VLAN | IP } */ + RTE_TABLE_ACTION_ENCAP_QINQ, + + /** IP -> { Ether | MPLS | IP } */ + RTE_TABLE_ACTION_ENCAP_MPLS, + + /** IP -> { Ether | PPPoE | PPP | IP } */ + RTE_TABLE_ACTION_ENCAP_PPPOE, +}; + +/** Pre-computed Ethernet header fields for encapsulation action. */ +struct rte_table_action_ether_hdr { + struct ether_addr da; /**< Destination address. */ + struct ether_addr sa; /**< Source address. */ +}; + +/** Pre-computed VLAN header fields for encapsulation action. */ +struct rte_table_action_vlan_hdr { + uint8_t pcp; /**< Priority Code Point (PCP). */ + uint8_t dei; /**< Drop Eligibility Indicator (DEI). */ + uint16_t vid; /**< VLAN Identifier (VID). */ +}; + +/** Pre-computed MPLS header fields for encapsulation action. */ +struct rte_table_action_mpls_hdr { + uint32_t label; /**< Label. */ + uint8_t tc; /**< Traffic Class (TC). */ + uint8_t ttl; /**< Time to Live (TTL). */ +}; + +/** Pre-computed PPPoE header fields for encapsulation action. */ +struct rte_table_action_pppoe_hdr { + uint16_t session_id; /**< Session ID. */ +}; + +/** Ether encap parameters. */ +struct rte_table_action_encap_ether_params { + struct rte_table_action_ether_hdr ether; /**< Ethernet header. */ +}; + +/** VLAN encap parameters. */ +struct rte_table_action_encap_vlan_params { + struct rte_table_action_ether_hdr ether; /**< Ethernet header. */ + struct rte_table_action_vlan_hdr vlan; /**< VLAN header. */ +}; + +/** QinQ encap parameters. */ +struct rte_table_action_encap_qinq_params { + struct rte_table_action_ether_hdr ether; /**< Ethernet header. */ + struct rte_table_action_vlan_hdr svlan; /**< Service VLAN header. */ + struct rte_table_action_vlan_hdr cvlan; /**< Customer VLAN header. */ +}; + +/** Max number of MPLS labels per output packet for MPLS encapsulation. */ +#ifndef RTE_TABLE_ACTION_MPLS_LABELS_MAX +#define RTE_TABLE_ACTION_MPLS_LABELS_MAX 4 +#endif + +/** MPLS encap parameters. */ +struct rte_table_action_encap_mpls_params { + /** Ethernet header. */ + struct rte_table_action_ether_hdr ether; + + /** MPLS header. */ + struct rte_table_action_mpls_hdr mpls[RTE_TABLE_ACTION_MPLS_LABELS_MAX]; + + /** Number of MPLS labels in MPLS header. */ + uint32_t mpls_count; + + /** Non-zero for MPLS unicast, zero for MPLS multicast. */ + int unicast; +}; + +/** PPPoE encap parameters. */ +struct rte_table_action_encap_pppoe_params { + struct rte_table_action_ether_hdr ether; /**< Ethernet header. */ + struct rte_table_action_pppoe_hdr pppoe; /**< PPPoE/PPP headers. */ +}; + +/** Encap action configuration (per table action profile). */ +struct rte_table_action_encap_config { + /** Bit mask defining the set of packet encapsulations enabled for the + * current table action profile. If bit (1 << N) is set in *encap_mask*, + * then packet encapsulation N is enabled, otherwise it is disabled. + * + * @see enum rte_table_action_encap_type + */ + uint64_t encap_mask; +}; + +/** Encap action parameters (per table rule). */ +struct rte_table_action_encap_params { + /** Encapsulation type. */ + enum rte_table_action_encap_type type; + + RTE_STD_C11 + union { + /** Only valid when *type* is set to Ether. */ + struct rte_table_action_encap_ether_params ether; + + /** Only valid when *type* is set to VLAN. */ + struct rte_table_action_encap_vlan_params vlan; + + /** Only valid when *type* is set to QinQ. */ + struct rte_table_action_encap_qinq_params qinq; + + /** Only valid when *type* is set to MPLS. */ + struct rte_table_action_encap_mpls_params mpls; + + /** Only valid when *type* is set to PPPoE. */ + struct rte_table_action_encap_pppoe_params pppoe; + }; +}; + +/** + * RTE_TABLE_ACTION_NAT + */ +/** NAT action configuration (per table action profile). */ +struct rte_table_action_nat_config { + /** When non-zero, the IP source address and L4 protocol source port are + * translated. When zero, the IP destination address and L4 protocol + * destination port are translated. + */ + int source_nat; + + /** Layer 4 protocol, for example TCP (0x06) or UDP (0x11). The checksum + * field is computed differently and placed at different header offset + * by each layer 4 protocol. + */ + uint8_t proto; +}; + +/** NAT action parameters (per table rule). */ +struct rte_table_action_nat_params { + /** IP version for *addr*: non-zero for IPv4, zero for IPv6. */ + int ip_version; + + /** IP address. */ + union { + /** IPv4 address; only valid when *ip_version* is non-zero. */ + uint32_t ipv4; + + /** IPv6 address; only valid when *ip_version* is set to 0. */ + uint8_t ipv6[16]; + } addr; + + /** Port. */ + uint16_t port; +}; + +/** + * RTE_TABLE_ACTION_TTL + */ +/** TTL action configuration (per table action profile). */ +struct rte_table_action_ttl_config { + /** When non-zero, the input packets whose updated IPv4 Time to Live + * (TTL) field or IPv6 Hop Limit (HL) field is zero are dropped. + * When zero, the input packets whose updated IPv4 TTL field or IPv6 HL + * field is zero are forwarded as usual (typically for debugging + * purpose). + */ + int drop; + + /** When non-zero, the *n_packets* stats counter for TTL action is + * enabled, otherwise disabled. + * + * @see struct rte_table_action_ttl_counters + */ + int n_packets_enabled; +}; + +/** TTL action parameters (per table rule). */ +struct rte_table_action_ttl_params { + /** When non-zero, decrement the IPv4 TTL field and update the checksum + * field, or decrement the IPv6 HL field. When zero, the IPv4 TTL field + * or the IPv6 HL field is not changed. + */ + int decrement; +}; + +/** TTL action statistics packets (per table rule). */ +struct rte_table_action_ttl_counters { + /** Number of IPv4 packets whose updated TTL field is zero or IPv6 + * packets whose updated HL field is zero. + */ + uint64_t n_packets; +}; + +/** + * RTE_TABLE_ACTION_STATS + */ +/** Stats action configuration (per table action profile). */ +struct rte_table_action_stats_config { + /** When non-zero, the *n_packets* stats counter is enabled, otherwise + * disabled. + * + * @see struct rte_table_action_stats_counters + */ + int n_packets_enabled; + + /** When non-zero, the *n_bytes* stats counter is enabled, otherwise + * disabled. + * + * @see struct rte_table_action_stats_counters + */ + int n_bytes_enabled; +}; + +/** Stats action parameters (per table rule). */ +struct rte_table_action_stats_params { + /** Initial value for the *n_packets* stats counter. Typically set to 0. + * + * @see struct rte_table_action_stats_counters + */ + uint64_t n_packets; + + /** Initial value for the *n_bytes* stats counter. Typically set to 0. + * + * @see struct rte_table_action_stats_counters + */ + uint64_t n_bytes; +}; + +/** Stats action counters (per table rule). */ +struct rte_table_action_stats_counters { + /** Number of packets. Valid only when *n_packets_valid* is non-zero. */ + uint64_t n_packets; + + /** Number of bytes. Valid only when *n_bytes_valid* is non-zero. */ + uint64_t n_bytes; + + /** When non-zero, the *n_packets* field is valid, otherwise invalid. */ + int n_packets_valid; + + /** When non-zero, the *n_bytes* field is valid, otherwise invalid. */ + int n_bytes_valid; +}; + +/** + * RTE_TABLE_ACTION_TIME + */ +/** Timestamp action parameters (per table rule). */ +struct rte_table_action_time_params { + /** Initial timestamp value. Typically set to current time. */ + uint64_t time; +}; + +/** + * Table action profile. + */ +struct rte_table_action_profile; + +/** + * Table action profile create. + * + * @param[in] common + * Common action configuration. + * @return + * Table action profile handle on success, NULL otherwise. + */ +struct rte_table_action_profile * __rte_experimental +rte_table_action_profile_create(struct rte_table_action_common_config *common); + +/** + * Table action profile free. + * + * @param[in] profile + * Table profile action handle (needs to be valid). + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_profile_free(struct rte_table_action_profile *profile); + +/** + * Table action profile action register. + * + * @param[in] profile + * Table profile action handle (needs to be valid and not in frozen state). + * @param[in] type + * Specific table action to be registered for *profile*. + * @param[in] action_config + * Configuration for the *type* action. + * If struct rte_table_action_*type*_config is defined by the Table Action + * API, it needs to point to a valid instance of this structure, otherwise it + * needs to be set to NULL. + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_profile_action_register(struct rte_table_action_profile *profile, + enum rte_table_action_type type, + void *action_config); + +/** + * Table action profile freeze. + * + * Once this function is called successfully, the given profile enters the + * frozen state with the following immediate effects: no more actions can be + * registered for this profile, so the profile can be instantiated to create + * table action objects. + * + * @param[in] profile + * Table profile action handle (needs to be valid and not in frozen state). + * @return + * Zero on success, non-zero error code otherwise. + * + * @see rte_table_action_create() + */ +int __rte_experimental +rte_table_action_profile_freeze(struct rte_table_action_profile *profile); + +/** + * Table action. + */ +struct rte_table_action; + +/** + * Table action create. + * + * Instantiates the given table action profile to create a table action object. + * + * @param[in] profile + * Table profile action handle (needs to be valid and in frozen state). + * @param[in] socket_id + * CPU socket ID where the internal data structures required by the new table + * action object should be allocated. + * @return + * Handle to table action object on success, NULL on error. + * + * @see rte_table_action_create() + */ +struct rte_table_action * __rte_experimental +rte_table_action_create(struct rte_table_action_profile *profile, + uint32_t socket_id); + +/** + * Table action free. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_free(struct rte_table_action *action); + +/** + * Table action table params get. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @param[inout] params + * Pipeline table parameters (needs to be pre-allocated). + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_table_params_get(struct rte_table_action *action, + struct rte_pipeline_table_params *params); + +/** + * Table action apply. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @param[in] data + * Data byte array (typically table rule data) to apply action *type* on. + * @param[in] type + * Specific table action previously registered for the table action profile of + * the *action* object. + * @param[in] action_params + * Parameters for the *type* action. + * If struct rte_table_action_*type*_params is defined by the Table Action + * API, it needs to point to a valid instance of this structure, otherwise it + * needs to be set to NULL. + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_apply(struct rte_table_action *action, + void *data, + enum rte_table_action_type type, + void *action_params); + +/** + * Table action DSCP table update. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @param[in] dscp_mask + * 64-bit mask defining the DSCP table entries to be updated. If bit N is set + * in this bit mask, then DSCP table entry N is to be updated, otherwise not. + * @param[in] table + * DSCP table. + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_dscp_table_update(struct rte_table_action *action, + uint64_t dscp_mask, + struct rte_table_action_dscp_table *table); + +/** + * Table action meter profile add. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @param[in] meter_profile_id + * Meter profile ID to be used for the *profile* once it is successfully added + * to the *action* object (needs to be unused by the set of meter profiles + * currently registered for the *action* object). + * @param[in] profile + * Meter profile to be added. + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_meter_profile_add(struct rte_table_action *action, + uint32_t meter_profile_id, + struct rte_table_action_meter_profile *profile); + +/** + * Table action meter profile delete. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @param[in] meter_profile_id + * Meter profile ID of the meter profile to be deleted from the *action* + * object (needs to be valid for the *action* object). + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_meter_profile_delete(struct rte_table_action *action, + uint32_t meter_profile_id); + +/** + * Table action meter read. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @param[in] data + * Data byte array (typically table rule data) with meter action previously + * applied on it. + * @param[in] tc_mask + * Bit mask defining which traffic classes should have the meter stats + * counters read from *data* and stored into *stats*. If bit N is set in this + * bit mask, then traffic class N is part of this operation, otherwise it is + * not. If bit N is set in this bit mask, then traffic class N must be one of + * the traffic classes that are enabled for the meter action in the table + * action profile used by the *action* object. + * @param[inout] stats + * When non-NULL, it points to the area where the meter stats counters read + * from *data* are saved. Only the meter stats counters for the *tc_mask* + * traffic classes are read and stored to *stats*. + * @param[in] clear + * When non-zero, the meter stats counters are cleared (i.e. set to zero), + * otherwise the counters are not modified. When the read operation is enabled + * (*stats* is non-NULL), the clear operation is performed after the read + * operation is completed. + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_meter_read(struct rte_table_action *action, + void *data, + uint32_t tc_mask, + struct rte_table_action_mtr_counters *stats, + int clear); + +/** + * Table action TTL read. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @param[in] data + * Data byte array (typically table rule data) with TTL action previously + * applied on it. + * @param[inout] stats + * When non-NULL, it points to the area where the TTL stats counters read from + * *data* are saved. + * @param[in] clear + * When non-zero, the TTL stats counters are cleared (i.e. set to zero), + * otherwise the counters are not modified. When the read operation is enabled + * (*stats* is non-NULL), the clear operation is performed after the read + * operation is completed. + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_ttl_read(struct rte_table_action *action, + void *data, + struct rte_table_action_ttl_counters *stats, + int clear); + +/** + * Table action stats read. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @param[in] data + * Data byte array (typically table rule data) with stats action previously + * applied on it. + * @param[inout] stats + * When non-NULL, it points to the area where the stats counters read from + * *data* are saved. + * @param[in] clear + * When non-zero, the stats counters are cleared (i.e. set to zero), otherwise + * the counters are not modified. When the read operation is enabled (*stats* + * is non-NULL), the clear operation is performed after the read operation is + * completed. + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_stats_read(struct rte_table_action *action, + void *data, + struct rte_table_action_stats_counters *stats, + int clear); + +/** + * Table action timestamp read. + * + * @param[in] action + * Handle to table action object (needs to be valid). + * @param[in] data + * Data byte array (typically table rule data) with timestamp action + * previously applied on it. + * @param[inout] timestamp + * Pre-allocated memory where the timestamp read from *data* is saved (has to + * be non-NULL). + * @return + * Zero on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_table_action_time_read(struct rte_table_action *action, + void *data, + uint64_t *timestamp); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_TABLE_ACTION_H__ */ diff --git a/lib/librte_port/meson.build b/lib/librte_port/meson.build index debb5eb9..f3d8b443 100644 --- a/lib/librte_port/meson.build +++ b/lib/librte_port/meson.build @@ -5,23 +5,21 @@ version = 3 sources = files( 'rte_port_ethdev.c', 'rte_port_fd.c', + 'rte_port_frag.c', + 'rte_port_ras.c', 'rte_port_ring.c', 'rte_port_sched.c', 'rte_port_source_sink.c') headers = files( 'rte_port_ethdev.h', 'rte_port_fd.h', + 'rte_port_frag.h', + 'rte_port_ras.h', 'rte_port.h', 'rte_port_ring.h', 'rte_port_sched.h', 'rte_port_source_sink.h') -deps += ['ethdev', 'sched'] - -if dpdk_conf.has('RTE_LIBRTE_IP_FRAG') - sources += files('rte_port_frag.c', 'rte_port_ras.c') - headers += files('rte_port_frag.h', 'rte_port_ras.h') - deps += ['ip_frag'] -endif +deps += ['ethdev', 'sched', 'ip_frag'] if dpdk_conf.has('RTE_LIBRTE_KNI') sources += files('rte_port_kni.c') diff --git a/lib/librte_power/channel_commands.h b/lib/librte_power/channel_commands.h index 5e8b4ab5..ee638eef 100644 --- a/lib/librte_power/channel_commands.h +++ b/lib/librte_power/channel_commands.h @@ -48,7 +48,8 @@ enum workload {HIGH, MEDIUM, LOW}; enum policy_to_use { TRAFFIC, TIME, - WORKLOAD + WORKLOAD, + BRANCH_RATIO }; struct traffic { diff --git a/lib/librte_power/power_acpi_cpufreq.c b/lib/librte_power/power_acpi_cpufreq.c index bce933e9..cd5978d5 100644 --- a/lib/librte_power/power_acpi_cpufreq.c +++ b/lib/librte_power/power_acpi_cpufreq.c @@ -623,3 +623,24 @@ power_acpi_disable_turbo(unsigned int lcore_id) return 0; } + +int power_acpi_get_capabilities(unsigned int lcore_id, + struct rte_power_core_capabilities *caps) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return -1; + } + if (caps == NULL) { + RTE_LOG(ERR, POWER, "Invalid argument\n"); + return -1; + } + + pi = &lcore_power_info[lcore_id]; + caps->capabilities = 0; + caps->turbo = !!(pi->turbo_available); + + return 0; +} diff --git a/lib/librte_power/power_acpi_cpufreq.h b/lib/librte_power/power_acpi_cpufreq.h index edeeb27a..1af74160 100644 --- a/lib/librte_power/power_acpi_cpufreq.h +++ b/lib/librte_power/power_acpi_cpufreq.h @@ -14,6 +14,7 @@ #include #include #include +#include "rte_power.h" #ifdef __cplusplus extern "C" { @@ -196,6 +197,21 @@ int power_acpi_enable_turbo(unsigned int lcore_id); */ int power_acpi_disable_turbo(unsigned int lcore_id); +/** + * Returns power capabilities for a specific lcore. + * + * @param lcore_id + * lcore id. + * @param caps + * pointer to rte_power_core_capabilities object. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int power_acpi_get_capabilities(unsigned int lcore_id, + struct rte_power_core_capabilities *caps); + #ifdef __cplusplus } #endif diff --git a/lib/librte_power/power_kvm_vm.c b/lib/librte_power/power_kvm_vm.c index 38e9066f..20659b72 100644 --- a/lib/librte_power/power_kvm_vm.c +++ b/lib/librte_power/power_kvm_vm.c @@ -124,3 +124,11 @@ power_kvm_vm_disable_turbo(unsigned int lcore_id) { return send_msg(lcore_id, CPU_POWER_DISABLE_TURBO); } + +struct rte_power_core_capabilities; +int power_kvm_vm_get_capabilities(__rte_unused unsigned int lcore_id, + __rte_unused struct rte_power_core_capabilities *caps) +{ + RTE_LOG(ERR, POWER, "rte_power_get_capabilities is not implemented for Virtual Machine Power Management\n"); + return -ENOTSUP; +} diff --git a/lib/librte_power/power_kvm_vm.h b/lib/librte_power/power_kvm_vm.h index 446d6997..94d4aa12 100644 --- a/lib/librte_power/power_kvm_vm.h +++ b/lib/librte_power/power_kvm_vm.h @@ -14,6 +14,7 @@ #include #include #include +#include "rte_power.h" #ifdef __cplusplus extern "C" { @@ -177,6 +178,22 @@ int power_kvm_vm_enable_turbo(unsigned int lcore_id); * - Negative on error. */ int power_kvm_vm_disable_turbo(unsigned int lcore_id); + +/** + * Returns power capabilities for a specific lcore. + * + * @param lcore_id + * lcore id. + * @param caps + * pointer to rte_power_core_capabilities object. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int power_kvm_vm_get_capabilities(unsigned int lcore_id, + struct rte_power_core_capabilities *caps); + #ifdef __cplusplus } #endif diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c index 6c8fb403..208b7919 100644 --- a/lib/librte_power/rte_power.c +++ b/lib/librte_power/rte_power.c @@ -24,6 +24,7 @@ rte_power_freq_change_t rte_power_freq_min = NULL; rte_power_freq_change_t rte_power_turbo_status; rte_power_freq_change_t rte_power_freq_enable_turbo; rte_power_freq_change_t rte_power_freq_disable_turbo; +rte_power_get_capabilities_t rte_power_get_capabilities; int rte_power_set_env(enum power_management_env env) @@ -42,6 +43,7 @@ rte_power_set_env(enum power_management_env env) rte_power_turbo_status = power_acpi_turbo_status; rte_power_freq_enable_turbo = power_acpi_enable_turbo; rte_power_freq_disable_turbo = power_acpi_disable_turbo; + rte_power_get_capabilities = power_acpi_get_capabilities; } else if (env == PM_ENV_KVM_VM) { rte_power_freqs = power_kvm_vm_freqs; rte_power_get_freq = power_kvm_vm_get_freq; @@ -53,6 +55,7 @@ rte_power_set_env(enum power_management_env env) rte_power_turbo_status = power_kvm_vm_turbo_status; rte_power_freq_enable_turbo = power_kvm_vm_enable_turbo; rte_power_freq_disable_turbo = power_kvm_vm_disable_turbo; + rte_power_get_capabilities = power_kvm_vm_get_capabilities; } else { RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n", env); diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h index b4b7357b..d70bc0b3 100644 --- a/lib/librte_power/rte_power.h +++ b/lib/librte_power/rte_power.h @@ -247,6 +247,38 @@ extern rte_power_freq_change_t rte_power_freq_enable_turbo; */ extern rte_power_freq_change_t rte_power_freq_disable_turbo; +/** + * Power capabilities summary. + */ +struct rte_power_core_capabilities { + RTE_STD_C11 + union { + uint64_t capabilities; + RTE_STD_C11 + struct { + uint64_t turbo:1; /**< Turbo can be enabled. */ + }; + }; +}; + +/** + * Returns power capabilities for a specific lcore. + * Function pointer definition. Review each environments + * specific documentation for usage. + * + * @param lcore_id + * lcore id. + * @param caps + * pointer to rte_power_core_capabilities object. + * + * @return + * - 0 on success. + * - Negative on error. + */ +typedef int (*rte_power_get_capabilities_t)(unsigned int lcore_id, + struct rte_power_core_capabilities *caps); + +extern rte_power_get_capabilities_t rte_power_get_capabilities; #ifdef __cplusplus } diff --git a/lib/librte_power/rte_power_version.map b/lib/librte_power/rte_power_version.map index 96dc42ec..dd587dfb 100644 --- a/lib/librte_power/rte_power_version.map +++ b/lib/librte_power/rte_power_version.map @@ -25,4 +25,11 @@ DPDK_17.11 { rte_power_freq_enable_turbo; rte_power_turbo_status; -} DPDK_2.0; \ No newline at end of file +} DPDK_2.0; + +DPDK_18.08 { + global: + + rte_power_get_capabilities; + +} DPDK_17.11; diff --git a/lib/librte_rawdev/Makefile b/lib/librte_rawdev/Makefile index b9105b06..addb288d 100644 --- a/lib/librte_rawdev/Makefile +++ b/lib/librte_rawdev/Makefile @@ -10,7 +10,6 @@ LIB = librte_rawdev.a LIBABIVER := 1 # build flags -CFLAGS += -DALLOW_EXPERIMENTAL_API CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) LDLIBS += -lrte_eal diff --git a/lib/librte_rawdev/meson.build b/lib/librte_rawdev/meson.build new file mode 100644 index 00000000..a20fbdc0 --- /dev/null +++ b/lib/librte_rawdev/meson.build @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +sources = files('rte_rawdev.c') +headers = files('rte_rawdev.h', 'rte_rawdev_pmd.h') diff --git a/lib/librte_rawdev/rte_rawdev.c b/lib/librte_rawdev/rte_rawdev.c index d314ef96..62b6b97e 100644 --- a/lib/librte_rawdev/rte_rawdev.c +++ b/lib/librte_rawdev/rte_rawdev.c @@ -46,13 +46,13 @@ static struct rte_rawdev_global rawdev_globals = { struct rte_rawdev_global *rte_rawdev_globals = &rawdev_globals; /* Raw device, northbound API implementation */ -uint8_t __rte_experimental +uint8_t rte_rawdev_count(void) { return rte_rawdev_globals->nb_devs; } -uint16_t __rte_experimental +uint16_t rte_rawdev_get_dev_id(const char *name) { uint16_t i; @@ -69,7 +69,7 @@ rte_rawdev_get_dev_id(const char *name) return -ENODEV; } -int __rte_experimental +int rte_rawdev_socket_id(uint16_t dev_id) { struct rte_rawdev *dev; @@ -80,7 +80,7 @@ rte_rawdev_socket_id(uint16_t dev_id) return dev->socket_id; } -int __rte_experimental +int rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info) { struct rte_rawdev *rawdev; @@ -88,9 +88,6 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info) RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); RTE_FUNC_PTR_OR_ERR_RET(dev_info, -EINVAL); - if (dev_info == NULL) - return -EINVAL; - rawdev = &rte_rawdevs[dev_id]; RTE_FUNC_PTR_OR_ERR_RET(*rawdev->dev_ops->dev_info_get, -ENOTSUP); @@ -105,7 +102,7 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info) return 0; } -int __rte_experimental +int rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf) { struct rte_rawdev *dev; @@ -134,7 +131,7 @@ rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf) return diag; } -int __rte_experimental +int rte_rawdev_queue_conf_get(uint16_t dev_id, uint16_t queue_id, rte_rawdev_obj_t queue_conf) @@ -149,7 +146,7 @@ rte_rawdev_queue_conf_get(uint16_t dev_id, return 0; } -int __rte_experimental +int rte_rawdev_queue_setup(uint16_t dev_id, uint16_t queue_id, rte_rawdev_obj_t queue_conf) @@ -163,7 +160,7 @@ rte_rawdev_queue_setup(uint16_t dev_id, return (*dev->dev_ops->queue_setup)(dev, queue_id, queue_conf); } -int __rte_experimental +int rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id) { struct rte_rawdev *dev; @@ -175,7 +172,19 @@ rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id) return (*dev->dev_ops->queue_release)(dev, queue_id); } -int __rte_experimental +uint16_t +rte_rawdev_queue_count(uint16_t dev_id) +{ + struct rte_rawdev *dev; + + RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + dev = &rte_rawdevs[dev_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_count, -ENOTSUP); + return (*dev->dev_ops->queue_count)(dev); +} + +int rte_rawdev_get_attr(uint16_t dev_id, const char *attr_name, uint64_t *attr_value) @@ -189,7 +198,7 @@ rte_rawdev_get_attr(uint16_t dev_id, return (*dev->dev_ops->attr_get)(dev, attr_name, attr_value); } -int __rte_experimental +int rte_rawdev_set_attr(uint16_t dev_id, const char *attr_name, const uint64_t attr_value) @@ -203,7 +212,7 @@ rte_rawdev_set_attr(uint16_t dev_id, return (*dev->dev_ops->attr_set)(dev, attr_name, attr_value); } -int __rte_experimental +int rte_rawdev_enqueue_buffers(uint16_t dev_id, struct rte_rawdev_buf **buffers, unsigned int count, @@ -218,7 +227,7 @@ rte_rawdev_enqueue_buffers(uint16_t dev_id, return (*dev->dev_ops->enqueue_bufs)(dev, buffers, count, context); } -int __rte_experimental +int rte_rawdev_dequeue_buffers(uint16_t dev_id, struct rte_rawdev_buf **buffers, unsigned int count, @@ -233,7 +242,7 @@ rte_rawdev_dequeue_buffers(uint16_t dev_id, return (*dev->dev_ops->dequeue_bufs)(dev, buffers, count, context); } -int __rte_experimental +int rte_rawdev_dump(uint16_t dev_id, FILE *f) { struct rte_rawdev *dev; @@ -254,7 +263,7 @@ xstats_get_count(uint16_t dev_id) return (*dev->dev_ops->xstats_get_names)(dev, NULL, 0); } -int __rte_experimental +int rte_rawdev_xstats_names_get(uint16_t dev_id, struct rte_rawdev_xstats_name *xstats_names, unsigned int size) @@ -277,7 +286,7 @@ rte_rawdev_xstats_names_get(uint16_t dev_id, } /* retrieve rawdev extended statistics */ -int __rte_experimental +int rte_rawdev_xstats_get(uint16_t dev_id, const unsigned int ids[], uint64_t values[], @@ -290,7 +299,7 @@ rte_rawdev_xstats_get(uint16_t dev_id, return (*dev->dev_ops->xstats_get)(dev, ids, values, n); } -uint64_t __rte_experimental +uint64_t rte_rawdev_xstats_by_name_get(uint16_t dev_id, const char *name, unsigned int *id) @@ -309,7 +318,7 @@ rte_rawdev_xstats_by_name_get(uint16_t dev_id, return (*dev->dev_ops->xstats_get_by_name)(dev, name, id); } -int __rte_experimental +int rte_rawdev_xstats_reset(uint16_t dev_id, const uint32_t ids[], uint32_t nb_ids) { @@ -320,7 +329,7 @@ rte_rawdev_xstats_reset(uint16_t dev_id, return (*dev->dev_ops->xstats_reset)(dev, ids, nb_ids); } -int __rte_experimental +int rte_rawdev_firmware_status_get(uint16_t dev_id, rte_rawdev_obj_t status_info) { RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); @@ -330,7 +339,7 @@ rte_rawdev_firmware_status_get(uint16_t dev_id, rte_rawdev_obj_t status_info) return (*dev->dev_ops->firmware_status_get)(dev, status_info); } -int __rte_experimental +int rte_rawdev_firmware_version_get(uint16_t dev_id, rte_rawdev_obj_t version_info) { RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); @@ -340,7 +349,7 @@ rte_rawdev_firmware_version_get(uint16_t dev_id, rte_rawdev_obj_t version_info) return (*dev->dev_ops->firmware_version_get)(dev, version_info); } -int __rte_experimental +int rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image) { RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); @@ -353,7 +362,7 @@ rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image) return (*dev->dev_ops->firmware_load)(dev, firmware_image); } -int __rte_experimental +int rte_rawdev_firmware_unload(uint16_t dev_id) { RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); @@ -363,7 +372,7 @@ rte_rawdev_firmware_unload(uint16_t dev_id) return (*dev->dev_ops->firmware_unload)(dev); } -int __rte_experimental +int rte_rawdev_selftest(uint16_t dev_id) { RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); @@ -373,7 +382,7 @@ rte_rawdev_selftest(uint16_t dev_id) return (*dev->dev_ops->dev_selftest)(); } -int __rte_experimental +int rte_rawdev_start(uint16_t dev_id) { struct rte_rawdev *dev; @@ -400,7 +409,7 @@ rte_rawdev_start(uint16_t dev_id) return 0; } -void __rte_experimental +void rte_rawdev_stop(uint16_t dev_id) { struct rte_rawdev *dev; @@ -422,7 +431,7 @@ rte_rawdev_stop(uint16_t dev_id) dev->started = 0; } -int __rte_experimental +int rte_rawdev_close(uint16_t dev_id) { struct rte_rawdev *dev; @@ -441,7 +450,7 @@ rte_rawdev_close(uint16_t dev_id) return (*dev->dev_ops->dev_close)(dev); } -int __rte_experimental +int rte_rawdev_reset(uint16_t dev_id) { struct rte_rawdev *dev; @@ -468,7 +477,7 @@ rte_rawdev_find_free_device_index(void) return RTE_RAWDEV_MAX_DEVS; } -struct rte_rawdev * __rte_experimental +struct rte_rawdev * rte_rawdev_pmd_allocate(const char *name, size_t dev_priv_size, int socket_id) { struct rte_rawdev *rawdev; @@ -509,7 +518,7 @@ rte_rawdev_pmd_allocate(const char *name, size_t dev_priv_size, int socket_id) return rawdev; } -int __rte_experimental +int rte_rawdev_pmd_release(struct rte_rawdev *rawdev) { int ret; @@ -535,10 +544,7 @@ rte_rawdev_pmd_release(struct rte_rawdev *rawdev) return 0; } -RTE_INIT(librawdev_init_log); - -static void -librawdev_init_log(void) +RTE_INIT(librawdev_init_log) { librawdev_logtype = rte_log_register("lib.rawdev"); if (librawdev_logtype >= 0) diff --git a/lib/librte_rawdev/rte_rawdev.h b/lib/librte_rawdev/rte_rawdev.h index 2e14919b..684bfdb8 100644 --- a/lib/librte_rawdev/rte_rawdev.h +++ b/lib/librte_rawdev/rte_rawdev.h @@ -35,7 +35,7 @@ typedef void *rte_rawdev_obj_t; * @return * The total number of usable raw devices. */ -uint8_t __rte_experimental +uint8_t rte_rawdev_count(void); /** @@ -48,7 +48,7 @@ rte_rawdev_count(void); * Returns raw device identifier on success. * - <0: Failure to find named raw device. */ -uint16_t __rte_experimental +uint16_t rte_rawdev_get_dev_id(const char *name); /** @@ -61,7 +61,7 @@ rte_rawdev_get_dev_id(const char *name); * a default of zero if the socket could not be determined. * -(-EINVAL) dev_id value is out of range. */ -int __rte_experimental +int rte_rawdev_socket_id(uint16_t dev_id); /** @@ -84,7 +84,7 @@ struct rte_rawdev_info; * - <0: Error code returned by the driver info get function. * */ -int __rte_experimental +int rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info); /** @@ -111,7 +111,7 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info); * - 0: Success, device configured. * - <0: Error code returned by the driver configuration function. */ -int __rte_experimental +int rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf); @@ -137,7 +137,7 @@ rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf); * @see rte_raw_queue_setup() * */ -int __rte_experimental +int rte_rawdev_queue_conf_get(uint16_t dev_id, uint16_t queue_id, rte_rawdev_obj_t queue_conf); @@ -160,7 +160,7 @@ rte_rawdev_queue_conf_get(uint16_t dev_id, * - 0: Success, raw queue correctly set up. * - <0: raw queue configuration failed */ -int __rte_experimental +int rte_rawdev_queue_setup(uint16_t dev_id, uint16_t queue_id, rte_rawdev_obj_t queue_conf); @@ -180,8 +180,9 @@ rte_rawdev_queue_setup(uint16_t dev_id, * - 0: Success, raw queue released. * - <0: raw queue configuration failed */ -int __rte_experimental +int rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id); + /** * Get the number of raw queues on a specific raw device * @@ -190,7 +191,7 @@ rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id); * @return * - The number of configured raw queues */ -uint16_t __rte_experimental +uint16_t rte_rawdev_queue_count(uint16_t dev_id); /** @@ -208,7 +209,7 @@ rte_rawdev_queue_count(uint16_t dev_id); * - 0: Success, device started. * < 0: Failure */ -int __rte_experimental +int rte_rawdev_start(uint16_t dev_id); /** @@ -218,7 +219,7 @@ rte_rawdev_start(uint16_t dev_id); * @param dev_id * Raw device identifier. */ -void __rte_experimental +void rte_rawdev_stop(uint16_t dev_id); /** @@ -232,7 +233,7 @@ rte_rawdev_stop(uint16_t dev_id); * - <0 on failure to close device * - (-EAGAIN) if device is busy */ -int __rte_experimental +int rte_rawdev_close(uint16_t dev_id); /** @@ -246,7 +247,7 @@ rte_rawdev_close(uint16_t dev_id); * 0 for sucessful reset, * !0 for failure in resetting */ -int __rte_experimental +int rte_rawdev_reset(uint16_t dev_id); #define RTE_RAWDEV_NAME_MAX_LEN (64) @@ -316,7 +317,7 @@ struct rte_rawdev_buf { * - 0: on success * - <0: on failure. */ -int __rte_experimental +int rte_rawdev_dump(uint16_t dev_id, FILE *f); /** @@ -338,7 +339,7 @@ rte_rawdev_dump(uint16_t dev_id, FILE *f); * 0 for success * !0 Error; attr_value remains untouched in case of error. */ -int __rte_experimental +int rte_rawdev_get_attr(uint16_t dev_id, const char *attr_name, uint64_t *attr_value); @@ -357,7 +358,7 @@ rte_rawdev_get_attr(uint16_t dev_id, * 0 for success * !0 Error */ -int __rte_experimental +int rte_rawdev_set_attr(uint16_t dev_id, const char *attr_name, const uint64_t attr_value); @@ -383,7 +384,7 @@ rte_rawdev_set_attr(uint16_t dev_id, * Whether partial enqueue is failure or success is defined between app * and driver implementation. */ -int __rte_experimental +int rte_rawdev_enqueue_buffers(uint16_t dev_id, struct rte_rawdev_buf **buffers, unsigned int count, @@ -414,7 +415,7 @@ rte_rawdev_enqueue_buffers(uint16_t dev_id, * Whether partial enqueue is failure or success is defined between app * and driver implementation. */ -int __rte_experimental +int rte_rawdev_dequeue_buffers(uint16_t dev_id, struct rte_rawdev_buf **buffers, unsigned int count, @@ -454,7 +455,7 @@ struct rte_rawdev_xstats_name { * -ENODEV for invalid *dev_id* * -ENOTSUP if the device doesn't support this function. */ -int __rte_experimental +int rte_rawdev_xstats_names_get(uint16_t dev_id, struct rte_rawdev_xstats_name *xstats_names, unsigned int size); @@ -478,7 +479,7 @@ rte_rawdev_xstats_names_get(uint16_t dev_id, * -ENODEV for invalid *dev_id* * -ENOTSUP if the device doesn't support this function. */ -int __rte_experimental +int rte_rawdev_xstats_get(uint16_t dev_id, const unsigned int ids[], uint64_t values[], @@ -500,7 +501,7 @@ rte_rawdev_xstats_get(uint16_t dev_id, * - positive value or zero: the stat value * - negative value: -EINVAL if stat not found, -ENOTSUP if not supported. */ -uint64_t __rte_experimental +uint64_t rte_rawdev_xstats_by_name_get(uint16_t dev_id, const char *name, unsigned int *id); @@ -520,7 +521,7 @@ rte_rawdev_xstats_by_name_get(uint16_t dev_id, * - zero: successfully reset the statistics to zero * - negative value: -EINVAL invalid parameters, -ENOTSUP if not supported. */ -int __rte_experimental +int rte_rawdev_xstats_reset(uint16_t dev_id, const uint32_t ids[], uint32_t nb_ids); @@ -539,7 +540,7 @@ rte_rawdev_xstats_reset(uint16_t dev_id, * 0 for success, * !0 for failure, `status_info` argument state is undefined */ -int __rte_experimental +int rte_rawdev_firmware_status_get(uint16_t dev_id, rte_rawdev_obj_t status_info); @@ -557,7 +558,7 @@ rte_rawdev_firmware_status_get(uint16_t dev_id, * 0 for success, * !0 for failure, `version_info` argument state is undefined */ -int __rte_experimental +int rte_rawdev_firmware_version_get(uint16_t dev_id, rte_rawdev_obj_t version_info); @@ -574,7 +575,7 @@ rte_rawdev_firmware_version_get(uint16_t dev_id, * 0 for successful load * !0 for failure to load the provided image, or image incorrect. */ -int __rte_experimental +int rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image); /** @@ -586,7 +587,7 @@ rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image); * 0 for successful Unload * !0 for failure in unloading */ -int __rte_experimental +int rte_rawdev_firmware_unload(uint16_t dev_id); /** @@ -599,7 +600,7 @@ rte_rawdev_firmware_unload(uint16_t dev_id); * - -ENOTSUP if the device doesn't support selftest * - other values < 0 on failure. */ -int __rte_experimental +int rte_rawdev_selftest(uint16_t dev_id); #ifdef __cplusplus diff --git a/lib/librte_rawdev/rte_rawdev_pmd.h b/lib/librte_rawdev/rte_rawdev_pmd.h index 408adf0f..bb9bbc35 100644 --- a/lib/librte_rawdev/rte_rawdev_pmd.h +++ b/lib/librte_rawdev/rte_rawdev_pmd.h @@ -250,6 +250,24 @@ typedef int (*rawdev_queue_setup_t)(struct rte_rawdev *dev, typedef int (*rawdev_queue_release_t)(struct rte_rawdev *dev, uint16_t queue_id); +/** + * Get the count of number of queues configured on this device. + * + * Another way to fetch this information is to fetch the device configuration. + * But, that assumes that the device configuration managed by the driver has + * that kind of information. + * + * This function helps in getting queue count supported, independently. It + * can help in cases where iterator needs to be implemented. + * + * @param + * Raw device pointer + * @return + * Number of queues; 0 is assumed to be a valid response. + * + */ +typedef uint16_t (*rawdev_queue_count_t)(struct rte_rawdev *dev); + /** * Enqueue an array of raw buffers to the device. * @@ -506,6 +524,8 @@ struct rte_rawdev_ops { rawdev_queue_setup_t queue_setup; /**< Release an raw queue. */ rawdev_queue_release_t queue_release; + /**< Get the number of queues attached to the device */ + rawdev_queue_count_t queue_count; /**< Enqueue an array of raw buffers to device. */ rawdev_enqueue_bufs_t enqueue_bufs; @@ -556,7 +576,7 @@ struct rte_rawdev_ops { * @return * - Slot in the rte_dev_devices array for a new device; */ -struct rte_rawdev * __rte_experimental +struct rte_rawdev * rte_rawdev_pmd_allocate(const char *name, size_t dev_private_size, int socket_id); @@ -568,7 +588,7 @@ rte_rawdev_pmd_allocate(const char *name, size_t dev_private_size, * @return * - 0 on success, negative on error */ -int __rte_experimental +int rte_rawdev_pmd_release(struct rte_rawdev *rawdev); /** @@ -585,7 +605,7 @@ rte_rawdev_pmd_release(struct rte_rawdev *rawdev); * - Raw device pointer if device is successfully created. * - NULL if device cannot be created. */ -struct rte_rawdev * __rte_experimental +struct rte_rawdev * rte_rawdev_pmd_init(const char *name, size_t dev_private_size, int socket_id); @@ -597,7 +617,7 @@ rte_rawdev_pmd_init(const char *name, size_t dev_private_size, * @return * - 0 on success, negative on error */ -int __rte_experimental +int rte_rawdev_pmd_uninit(const char *name); #ifdef __cplusplus diff --git a/lib/librte_rawdev/rte_rawdev_version.map b/lib/librte_rawdev/rte_rawdev_version.map index af4465e2..b61dbff1 100644 --- a/lib/librte_rawdev/rte_rawdev_version.map +++ b/lib/librte_rawdev/rte_rawdev_version.map @@ -1,4 +1,4 @@ -EXPERIMENTAL { +DPDK_18.08 { global: rte_rawdev_close; @@ -16,6 +16,7 @@ EXPERIMENTAL { rte_rawdev_pmd_allocate; rte_rawdev_pmd_release; rte_rawdev_queue_conf_get; + rte_rawdev_queue_count; rte_rawdev_queue_setup; rte_rawdev_queue_release; rte_rawdev_reset; diff --git a/lib/librte_ring/Makefile b/lib/librte_ring/Makefile index bde8907d..21a36770 100644 --- a/lib/librte_ring/Makefile +++ b/lib/librte_ring/Makefile @@ -11,7 +11,7 @@ LDLIBS += -lrte_eal EXPORT_MAP := rte_ring_version.map -LIBABIVER := 1 +LIBABIVER := 2 # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_ring.c diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h index 253cdc96..7a731d07 100644 --- a/lib/librte_ring/rte_ring.h +++ b/lib/librte_ring/rte_ring.h @@ -26,8 +26,9 @@ * - Bulk dequeue. * - Bulk enqueue. * - * Note: the ring implementation is not preemptable. A lcore must not - * be interrupted by another task that uses the same ring. + * Note: the ring implementation is not preemptible. Refer to Programmer's + * guide/Environment Abstraction Layer/Multiple pthread/Known Issues/rte_ring + * for more information. * */ @@ -62,14 +63,6 @@ enum rte_ring_queue_behavior { struct rte_memzone; /* forward declaration, so as not to require memzone.h */ -#if RTE_CACHE_LINE_SIZE < 128 -#define PROD_ALIGN (RTE_CACHE_LINE_SIZE * 2) -#define CONS_ALIGN (RTE_CACHE_LINE_SIZE * 2) -#else -#define PROD_ALIGN RTE_CACHE_LINE_SIZE -#define CONS_ALIGN RTE_CACHE_LINE_SIZE -#endif - /* structure to hold a pair of head/tail values and other metadata */ struct rte_ring_headtail { volatile uint32_t head; /**< Prod/consumer head. */ @@ -101,11 +94,15 @@ struct rte_ring { uint32_t mask; /**< Mask (size-1) of ring. */ uint32_t capacity; /**< Usable size of ring */ + char pad0 __rte_cache_aligned; /**< empty cache line */ + /** Ring producer status. */ - struct rte_ring_headtail prod __rte_aligned(PROD_ALIGN); + struct rte_ring_headtail prod __rte_cache_aligned; + char pad1 __rte_cache_aligned; /**< empty cache line */ /** Ring consumer status. */ - struct rte_ring_headtail cons __rte_aligned(CONS_ALIGN); + struct rte_ring_headtail cons __rte_cache_aligned; + char pad2 __rte_cache_aligned; /**< empty cache line */ }; #define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */ @@ -339,7 +336,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r); static __rte_always_inline unsigned int __rte_ring_do_enqueue(struct rte_ring *r, void * const *obj_table, unsigned int n, enum rte_ring_queue_behavior behavior, - int is_sp, unsigned int *free_space) + unsigned int is_sp, unsigned int *free_space) { uint32_t prod_head, prod_next; uint32_t free_entries; @@ -381,12 +378,12 @@ end: static __rte_always_inline unsigned int __rte_ring_do_dequeue(struct rte_ring *r, void **obj_table, unsigned int n, enum rte_ring_queue_behavior behavior, - int is_sc, unsigned int *available) + unsigned int is_sc, unsigned int *available) { uint32_t cons_head, cons_next; uint32_t entries; - n = __rte_ring_move_cons_head(r, is_sc, n, behavior, + n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior, &cons_head, &cons_next, &entries); if (n == 0) goto end; diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h index 08825ea5..94df3c4a 100644 --- a/lib/librte_ring/rte_ring_c11_mem.h +++ b/lib/librte_ring/rte_ring_c11_mem.h @@ -51,7 +51,7 @@ update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val, * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ static __rte_always_inline unsigned int -__rte_ring_move_prod_head(struct rte_ring *r, int is_sp, +__rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, unsigned int n, enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *new_head, uint32_t *free_entries) @@ -66,14 +66,14 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp, *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE); - const uint32_t cons_tail = r->cons.tail; + /* * The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * *old_head > cons_tail). So 'free_entries' is always between 0 * and capacity (which is < size). */ - *free_entries = (capacity + cons_tail - *old_head); + *free_entries = (capacity + r->cons.tail - *old_head); /* check that we have enough room in ring */ if (unlikely(n > *free_entries)) @@ -133,13 +133,13 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, n = max; *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_ACQUIRE); - const uint32_t prod_tail = r->prod.tail; + /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - *entries = (prod_tail - *old_head); + *entries = (r->prod.tail - *old_head); /* Set the actual entries for dequeue */ if (n > *entries) diff --git a/lib/librte_ring/rte_ring_generic.h b/lib/librte_ring/rte_ring_generic.h index 5b110425..ea7dbe5b 100644 --- a/lib/librte_ring/rte_ring_generic.h +++ b/lib/librte_ring/rte_ring_generic.h @@ -53,7 +53,7 @@ update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val, * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ static __rte_always_inline unsigned int -__rte_ring_move_prod_head(struct rte_ring *r, int is_sp, +__rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, unsigned int n, enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *new_head, uint32_t *free_entries) @@ -73,14 +73,13 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp, */ rte_smp_rmb(); - const uint32_t cons_tail = r->cons.tail; /* * The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * *old_head > cons_tail). So 'free_entries' is always between 0 * and capacity (which is < size). */ - *free_entries = (capacity + cons_tail - *old_head); + *free_entries = (capacity + r->cons.tail - *old_head); /* check that we have enough room in ring */ if (unlikely(n > *free_entries)) @@ -124,7 +123,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp, * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ static __rte_always_inline unsigned int -__rte_ring_move_cons_head(struct rte_ring *r, int is_sc, +__rte_ring_move_cons_head(struct rte_ring *r, unsigned int is_sc, unsigned int n, enum rte_ring_queue_behavior behavior, uint32_t *old_head, uint32_t *new_head, uint32_t *entries) @@ -144,13 +143,12 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, */ rte_smp_rmb(); - const uint32_t prod_tail = r->prod.tail; /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - *entries = (prod_tail - *old_head); + *entries = (r->prod.tail - *old_head); /* Set the actual entries for dequeue */ if (n > *entries) diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 634486c8..9269e5c7 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -275,10 +275,55 @@ rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex) return port->qsize[tc]; } +static int +pipe_profile_check(struct rte_sched_pipe_params *params, + uint32_t rate) +{ + uint32_t i; + + /* Pipe parameters */ + if (params == NULL) + return -10; + + /* TB rate: non-zero, not greater than port rate */ + if (params->tb_rate == 0 || + params->tb_rate > rate) + return -11; + + /* TB size: non-zero */ + if (params->tb_size == 0) + return -12; + + /* TC rate: non-zero, less than pipe rate */ + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + if (params->tc_rate[i] == 0 || + params->tc_rate[i] > params->tb_rate) + return -13; + } + + /* TC period: non-zero */ + if (params->tc_period == 0) + return -14; + +#ifdef RTE_SCHED_SUBPORT_TC_OV + /* TC3 oversubscription weight: non-zero */ + if (params->tc_ov_weight == 0) + return -15; +#endif + + /* Queue WRR weights: non-zero */ + for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) { + if (params->wrr_weights[i] == 0) + return -16; + } + + return 0; +} + static int rte_sched_port_check_params(struct rte_sched_port_params *params) { - uint32_t i, j; + uint32_t i; if (params == NULL) return -1; @@ -324,36 +369,11 @@ rte_sched_port_check_params(struct rte_sched_port_params *params) for (i = 0; i < params->n_pipe_profiles; i++) { struct rte_sched_pipe_params *p = params->pipe_profiles + i; + int status; - /* TB rate: non-zero, not greater than port rate */ - if (p->tb_rate == 0 || p->tb_rate > params->rate) - return -10; - - /* TB size: non-zero */ - if (p->tb_size == 0) - return -11; - - /* TC rate: non-zero, less than pipe rate */ - for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { - if (p->tc_rate[j] == 0 || p->tc_rate[j] > p->tb_rate) - return -12; - } - - /* TC period: non-zero */ - if (p->tc_period == 0) - return -13; - -#ifdef RTE_SCHED_SUBPORT_TC_OV - /* TC3 oversubscription weight: non-zero */ - if (p->tc_ov_weight == 0) - return -14; -#endif - - /* Queue WRR weights: non-zero */ - for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j++) { - if (p->wrr_weights[j] == 0) - return -15; - } + status = pipe_profile_check(p, params->rate); + if (status != 0) + return status; } return 0; @@ -514,69 +534,80 @@ rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate) } static void -rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, struct rte_sched_port_params *params) +rte_sched_pipe_profile_convert(struct rte_sched_pipe_params *src, + struct rte_sched_pipe_profile *dst, + uint32_t rate) { - uint32_t i, j; + uint32_t i; - for (i = 0; i < port->n_pipe_profiles; i++) { - struct rte_sched_pipe_params *src = params->pipe_profiles + i; - struct rte_sched_pipe_profile *dst = port->pipe_profiles + i; + /* Token Bucket */ + if (src->tb_rate == rate) { + dst->tb_credits_per_period = 1; + dst->tb_period = 1; + } else { + double tb_rate = (double) src->tb_rate + / (double) rate; + double d = RTE_SCHED_TB_RATE_CONFIG_ERR; - /* Token Bucket */ - if (src->tb_rate == params->rate) { - dst->tb_credits_per_period = 1; - dst->tb_period = 1; - } else { - double tb_rate = (double) src->tb_rate - / (double) params->rate; - double d = RTE_SCHED_TB_RATE_CONFIG_ERR; - - rte_approx(tb_rate, d, - &dst->tb_credits_per_period, &dst->tb_period); - } - dst->tb_size = src->tb_size; + rte_approx(tb_rate, d, + &dst->tb_credits_per_period, &dst->tb_period); + } + + dst->tb_size = src->tb_size; - /* Traffic Classes */ - dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, - params->rate); + /* Traffic Classes */ + dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, + rate); - for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) - dst->tc_credits_per_period[j] - = rte_sched_time_ms_to_bytes(src->tc_period, - src->tc_rate[j]); + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + dst->tc_credits_per_period[i] + = rte_sched_time_ms_to_bytes(src->tc_period, + src->tc_rate[i]); #ifdef RTE_SCHED_SUBPORT_TC_OV - dst->tc_ov_weight = src->tc_ov_weight; + dst->tc_ov_weight = src->tc_ov_weight; #endif - /* WRR */ - for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { - uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; - uint32_t lcd, lcd1, lcd2; - uint32_t qindex; - - qindex = j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; - - wrr_cost[0] = src->wrr_weights[qindex]; - wrr_cost[1] = src->wrr_weights[qindex + 1]; - wrr_cost[2] = src->wrr_weights[qindex + 2]; - wrr_cost[3] = src->wrr_weights[qindex + 3]; - - lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]); - lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]); - lcd = rte_get_lcd(lcd1, lcd2); - - wrr_cost[0] = lcd / wrr_cost[0]; - wrr_cost[1] = lcd / wrr_cost[1]; - wrr_cost[2] = lcd / wrr_cost[2]; - wrr_cost[3] = lcd / wrr_cost[3]; - - dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0]; - dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1]; - dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2]; - dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3]; - } + /* WRR */ + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; + uint32_t lcd, lcd1, lcd2; + uint32_t qindex; + + qindex = i * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; + + wrr_cost[0] = src->wrr_weights[qindex]; + wrr_cost[1] = src->wrr_weights[qindex + 1]; + wrr_cost[2] = src->wrr_weights[qindex + 2]; + wrr_cost[3] = src->wrr_weights[qindex + 3]; + + lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]); + lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]); + lcd = rte_get_lcd(lcd1, lcd2); + + wrr_cost[0] = lcd / wrr_cost[0]; + wrr_cost[1] = lcd / wrr_cost[1]; + wrr_cost[2] = lcd / wrr_cost[2]; + wrr_cost[3] = lcd / wrr_cost[3]; + + dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0]; + dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1]; + dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2]; + dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3]; + } +} + +static void +rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, + struct rte_sched_port_params *params) +{ + uint32_t i; + + for (i = 0; i < port->n_pipe_profiles; i++) { + struct rte_sched_pipe_params *src = params->pipe_profiles + i; + struct rte_sched_pipe_profile *dst = port->pipe_profiles + i; + rte_sched_pipe_profile_convert(src, dst, params->rate); rte_sched_port_log_pipe_profile(port, i); } @@ -932,6 +963,48 @@ rte_sched_pipe_config(struct rte_sched_port *port, return 0; } +int __rte_experimental +rte_sched_port_pipe_profile_add(struct rte_sched_port *port, + struct rte_sched_pipe_params *params, + uint32_t *pipe_profile_id) +{ + struct rte_sched_pipe_profile *pp; + uint32_t i; + int status; + + /* Port */ + if (port == NULL) + return -1; + + /* Pipe profiles not exceeds the max limit */ + if (port->n_pipe_profiles >= RTE_SCHED_PIPE_PROFILES_PER_PORT) + return -2; + + /* Pipe params */ + status = pipe_profile_check(params, port->rate); + if (status != 0) + return status; + + pp = &port->pipe_profiles[port->n_pipe_profiles]; + rte_sched_pipe_profile_convert(params, pp, port->rate); + + /* Pipe profile not exists */ + for (i = 0; i < port->n_pipe_profiles; i++) + if (memcmp(port->pipe_profiles + i, pp, sizeof(*pp)) == 0) + return -3; + + /* Pipe profile commit */ + *pipe_profile_id = port->n_pipe_profiles; + port->n_pipe_profiles++; + + if (port->pipe_tc3_rate_max < params->tc_rate[3]) + port->pipe_tc3_rate_max = params->tc_rate[3]; + + rte_sched_port_log_pipe_profile(port, *pipe_profile_id); + + return 0; +} + void rte_sched_port_pkt_write(struct rte_mbuf *pkt, uint32_t subport, uint32_t pipe, uint32_t traffic_class, diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h index 5d2a688d..84fa896d 100644 --- a/lib/librte_sched/rte_sched.h +++ b/lib/librte_sched/rte_sched.h @@ -57,6 +57,7 @@ extern "C" { */ #include +#include #include #include @@ -233,6 +234,26 @@ rte_sched_port_config(struct rte_sched_port_params *params); void rte_sched_port_free(struct rte_sched_port *port); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Hierarchical scheduler pipe profile add + * + * @param port + * Handle to port scheduler instance + * @param params + * Pipe profile parameters + * @param pipe_profile_id + * Set to valid profile id when profile is added successfully. + * @return + * 0 upon success, error code otherwise + */ +int __rte_experimental +rte_sched_port_pipe_profile_add(struct rte_sched_port *port, + struct rte_sched_pipe_params *params, + uint32_t *pipe_profile_id); + /** * Hierarchical scheduler subport configuration * diff --git a/lib/librte_sched/rte_sched_version.map b/lib/librte_sched/rte_sched_version.map index 3aa159ab..72958879 100644 --- a/lib/librte_sched/rte_sched_version.map +++ b/lib/librte_sched/rte_sched_version.map @@ -29,3 +29,9 @@ DPDK_2.1 { rte_sched_port_pkt_read_color; } DPDK_2.0; + +EXPERIMENTAL { + global: + + rte_sched_port_pipe_profile_add; +}; diff --git a/lib/librte_security/rte_security.c b/lib/librte_security/rte_security.c index 1e559c99..1954960a 100644 --- a/lib/librte_security/rte_security.c +++ b/lib/librte_security/rte_security.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 NXP. - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of NXP nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 NXP. + * Copyright(c) 2017 Intel Corporation. */ #include @@ -91,7 +63,6 @@ rte_security_session_destroy(struct rte_security_ctx *instance, struct rte_security_session *sess) { int ret; - struct rte_mempool *mp = rte_mempool_from_obj(sess); RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP); @@ -100,7 +71,7 @@ rte_security_session_destroy(struct rte_security_ctx *instance, ret = instance->ops->session_destroy(instance->device, sess); if (!ret) - rte_mempool_put(mp, (void *)sess); + rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess); return ret; } diff --git a/lib/librte_security/rte_security.h b/lib/librte_security/rte_security.h index c75c1218..b0d1b97e 100644 --- a/lib/librte_security/rte_security.h +++ b/lib/librte_security/rte_security.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 NXP. - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of NXP nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 NXP. + * Copyright(c) 2017 Intel Corporation. */ #ifndef _RTE_SECURITY_H_ @@ -222,6 +194,8 @@ struct rte_security_ipsec_xform { /**< IPsec SA Mode - transport/tunnel */ struct rte_security_ipsec_tunnel_param tunnel; /**< Tunnel parameters, NULL for transport mode */ + uint64_t esn_soft_limit; + /**< ESN for which the overflow event need to be raised */ }; /** @@ -362,15 +336,17 @@ rte_security_set_pkt_metadata(struct rte_security_ctx *instance, struct rte_mbuf *mb, void *params); /** - * Get userdata associated with the security session which processed the - * packet. This userdata would be registered while creating the session, and - * application can use this to identify the SA etc. Device-specific metadata - * in the mbuf would be used for this. + * Get userdata associated with the security session. Device specific metadata + * provided would be used to uniquely identify the security session being + * referred to. This userdata would be registered while creating the session, + * and application can use this to identify the SA etc. * - * This is valid only for inline processed ingress packets. + * Device specific metadata would be set in mbuf for inline processed inbound + * packets. In addition, the same metadata would be set for IPsec events + * reported by rte_eth_event framework. * * @param instance security instance - * @param md device-specific metadata set in mbuf + * @param md device-specific metadata * * @return * - On success, userdata diff --git a/lib/librte_security/rte_security_driver.h b/lib/librte_security/rte_security_driver.h index 46239049..42f42ffe 100644 --- a/lib/librte_security/rte_security_driver.h +++ b/lib/librte_security/rte_security_driver.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * Copyright 2017 NXP. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 NXP. + * Copyright(c) 2017 Intel Corporation. */ #ifndef _RTE_SECURITY_DRIVER_H_ @@ -134,9 +106,9 @@ typedef int (*security_set_pkt_metadata_t)(void *device, void *params); /** - * Get application specific userdata associated with the security session which - * processed the packet. This would be retrieved using the metadata obtained - * from packet. + * Get application specific userdata associated with the security session. + * Device specific metadata provided would be used to uniquely identify + * the security session being referred to. * * @param device Crypto/eth device pointer * @param md Metadata diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile index c4a9acb0..276d476a 100644 --- a/lib/librte_table/Makefile +++ b/lib/librte_table/Makefile @@ -45,6 +45,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_ACL),y) SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_acl.h endif SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash.h +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash_cuckoo.h SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru.h ifeq ($(CONFIG_RTE_ARCH_X86),y) SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru_x86.h diff --git a/lib/librte_table/meson.build b/lib/librte_table/meson.build index 13e797b4..8b2f8413 100644 --- a/lib/librte_table/meson.build +++ b/lib/librte_table/meson.build @@ -1,24 +1,29 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -version = 2 -sources = files('rte_table_lpm.c', 'rte_table_lpm_ipv6.c', - 'rte_table_hash_cuckoo.c', 'rte_table_hash_key8.c', - 'rte_table_hash_key16.c', 'rte_table_hash_key32.c', - 'rte_table_hash_ext.c', 'rte_table_hash_lru.c', - 'rte_table_array.c', 'rte_table_stub.c') -headers = files('rte_table.h', 'rte_table_lpm.h', - 'rte_table_lpm_ipv6.h', 'rte_table_hash.h', - 'rte_lru.h', 'rte_table_array.h', +version = 3 +sources = files('rte_table_acl.c', + 'rte_table_lpm.c', + 'rte_table_lpm_ipv6.c', + 'rte_table_hash_cuckoo.c', + 'rte_table_hash_key8.c', + 'rte_table_hash_key16.c', + 'rte_table_hash_key32.c', + 'rte_table_hash_ext.c', + 'rte_table_hash_lru.c', + 'rte_table_array.c', + 'rte_table_stub.c') +headers = files('rte_table.h', + 'rte_table_acl.h', + 'rte_table_lpm.h', + 'rte_table_lpm_ipv6.h', + 'rte_table_hash.h', + 'rte_table_hash_cuckoo.h', + 'rte_lru.h', + 'rte_table_array.h', 'rte_table_stub.h') -deps += ['mbuf', 'port', 'lpm', 'hash'] +deps += ['mbuf', 'port', 'lpm', 'hash', 'acl'] if arch_subdir == 'x86' headers += files('rte_lru_x86.h') endif - -if dpdk_conf.has('RTE_LIBRTE_ACL') - sources += files('rte_table_acl.c') - headers += files('rte_table_acl.h') - deps += ['acl'] -endif diff --git a/lib/librte_table/rte_table_acl.c b/lib/librte_table/rte_table_acl.c index 73d3910e..14d54019 100644 --- a/lib/librte_table/rte_table_acl.c +++ b/lib/librte_table/rte_table_acl.c @@ -472,12 +472,6 @@ rte_table_acl_entry_add_bulk( return -EINVAL; } - if (entries_ptr[i] == NULL) { - RTE_LOG(ERR, TABLE, "%s: entries_ptr[%" PRIu32 "] parameter is NULL\n", - __func__, i); - return -EINVAL; - } - rule = keys[i]; if (rule->priority > RTE_ACL_MAX_PRIORITY) { RTE_LOG(ERR, TABLE, "%s: Priority is too high\n", __func__); diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h index 7aad84fa..6f55bd57 100644 --- a/lib/librte_table/rte_table_hash.h +++ b/lib/librte_table/rte_table_hash.h @@ -99,9 +99,6 @@ extern struct rte_table_ops rte_table_hash_key8_lru_ops; extern struct rte_table_ops rte_table_hash_key16_lru_ops; extern struct rte_table_ops rte_table_hash_key32_lru_ops; -/** Cuckoo hash table operations */ -extern struct rte_table_ops rte_table_hash_cuckoo_ops; - #ifdef __cplusplus } #endif diff --git a/lib/librte_table/rte_table_hash_cuckoo.c b/lib/librte_table/rte_table_hash_cuckoo.c index dcb4fe97..f0243033 100644 --- a/lib/librte_table/rte_table_hash_cuckoo.c +++ b/lib/librte_table/rte_table_hash_cuckoo.c @@ -10,8 +10,7 @@ #include #include -#include -#include "rte_table_hash.h" +#include "rte_table_hash_cuckoo.h" #ifdef RTE_TABLE_STATS_COLLECT @@ -35,7 +34,7 @@ struct rte_table_hash { uint32_t key_size; uint32_t entry_size; uint32_t n_keys; - rte_table_hash_op_hash f_hash; + rte_hash_function f_hash; uint32_t seed; uint32_t key_offset; @@ -47,7 +46,7 @@ struct rte_table_hash { }; static int -check_params_create_hash_cuckoo(struct rte_table_hash_params *params) +check_params_create_hash_cuckoo(struct rte_table_hash_cuckoo_params *params) { if (params == NULL) { RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n"); @@ -82,7 +81,7 @@ rte_table_hash_cuckoo_create(void *params, int socket_id, uint32_t entry_size) { - struct rte_table_hash_params *p = params; + struct rte_table_hash_cuckoo_params *p = params; struct rte_hash *h_table; struct rte_table_hash *t; uint32_t total_size; @@ -107,7 +106,7 @@ rte_table_hash_cuckoo_create(void *params, struct rte_hash_parameters hash_cuckoo_params = { .entries = p->n_keys, .key_len = p->key_size, - .hash_func = (rte_hash_function)(p->f_hash), + .hash_func = p->f_hash, .hash_func_init_val = p->seed, .socket_id = socket_id, .name = p->name diff --git a/lib/librte_table/rte_table_hash_cuckoo.h b/lib/librte_table/rte_table_hash_cuckoo.h new file mode 100644 index 00000000..d9d43121 --- /dev/null +++ b/lib/librte_table/rte_table_hash_cuckoo.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__ +#define __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table Hash Cuckoo + * + ***/ +#include + +#include + +#include "rte_table.h" + +/** Hash table parameters */ +struct rte_table_hash_cuckoo_params { + /** Name */ + const char *name; + + /** Key size (number of bytes) */ + uint32_t key_size; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Key mask */ + uint8_t *key_mask; + + /** Number of keys */ + uint32_t n_keys; + + /** Number of buckets */ + uint32_t n_buckets; + + /** Hash function */ + rte_hash_function f_hash; + + /** Seed value for the hash function */ + uint32_t seed; +}; + +/** Cuckoo hash table operations */ +extern struct rte_table_ops rte_table_hash_cuckoo_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c index 4bbcd067..590488c7 100644 --- a/lib/librte_timer/rte_timer.c +++ b/lib/librte_timer/rte_timer.c @@ -403,7 +403,7 @@ rte_timer_reset(struct rte_timer *tim, uint64_t ticks, if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) && !(rte_lcore_is_enabled(tim_lcore) || - rte_lcore_has_role(tim_lcore, ROLE_SERVICE) == 0))) + rte_lcore_has_role(tim_lcore, ROLE_SERVICE)))) return -1; if (type == PERIODICAL) diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index 5d6c6aba..de431fbb 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -18,13 +18,20 @@ LDLIBS += -lpthread ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) LDLIBS += -lnuma endif -LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net \ + -lrte_cryptodev -lrte_hash # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \ - vhost_user.c virtio_net.c + vhost_user.c virtio_net.c vdpa.c # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h + +# only compile vhost crypto when cryptodev is enabled +ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y) +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_crypto.c +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost_crypto.h +endif include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c index 181711c2..38347ab1 100644 --- a/lib/librte_vhost/fd_man.c +++ b/lib/librte_vhost/fd_man.c @@ -16,6 +16,9 @@ #include "fd_man.h" + +#define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1 + #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL) static int @@ -171,6 +174,38 @@ fdset_del(struct fdset *pfdset, int fd) return dat; } +/** + * Unregister the fd from the fdset. + * + * If parameters are invalid, return directly -2. + * And check whether fd is busy, if yes, return -1. + * Otherwise, try to delete the fd from fdset and + * return true. + */ +int +fdset_try_del(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL || fd == -1) + return -2; + + pthread_mutex_lock(&pfdset->fd_mutex); + i = fdset_find_fd(pfdset, fd); + if (i != -1 && pfdset->fd[i].busy) { + pthread_mutex_unlock(&pfdset->fd_mutex); + return -1; + } + + if (i != -1) { + pfdset->fd[i].fd = -1; + pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; + pfdset->fd[i].dat = NULL; + } + + pthread_mutex_unlock(&pfdset->fd_mutex); + return 0; +} /** * This functions runs in infinite blocking loop until there is no fd in @@ -258,7 +293,7 @@ fdset_event_dispatch(void *arg) * because the fd is closed in the cb, * the old fd val could be reused by when creates new * listen fd in another thread, we couldn't call - * fd_set_del. + * fdset_del. */ if (remove1 || remove2) { pfdentry->fd = -1; @@ -272,3 +307,64 @@ fdset_event_dispatch(void *arg) return NULL; } + +static void +fdset_pipe_read_cb(int readfd, void *dat __rte_unused, + int *remove __rte_unused) +{ + char charbuf[16]; + int r = read(readfd, charbuf, sizeof(charbuf)); + /* + * Just an optimization, we don't care if read() failed + * so ignore explicitly its return value to make the + * compiler happy + */ + RTE_SET_USED(r); +} + +void +fdset_pipe_uninit(struct fdset *fdset) +{ + fdset_del(fdset, fdset->u.readfd); + close(fdset->u.readfd); + close(fdset->u.writefd); +} + +int +fdset_pipe_init(struct fdset *fdset) +{ + int ret; + + if (pipe(fdset->u.pipefd) < 0) { + RTE_LOG(ERR, VHOST_FDMAN, + "failed to create pipe for vhost fdset\n"); + return -1; + } + + ret = fdset_add(fdset, fdset->u.readfd, + fdset_pipe_read_cb, NULL, NULL); + + if (ret < 0) { + RTE_LOG(ERR, VHOST_FDMAN, + "failed to add pipe readfd %d into vhost server fdset\n", + fdset->u.readfd); + + fdset_pipe_uninit(fdset); + return -1; + } + + return 0; +} + +void +fdset_pipe_notify(struct fdset *fdset) +{ + int r = write(fdset->u.writefd, "1", 1); + /* + * Just an optimization, we don't care if write() failed + * so ignore explicitly its return value to make the + * compiler happy + */ + RTE_SET_USED(r); + +} diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h index 3a9276c3..3331bcd9 100644 --- a/lib/librte_vhost/fd_man.h +++ b/lib/librte_vhost/fd_man.h @@ -25,6 +25,16 @@ struct fdset { struct fdentry fd[MAX_FDS]; pthread_mutex_t fd_mutex; int num; /* current fd number of this fdset */ + + union pipefds { + struct { + int pipefd[2]; + }; + struct { + int readfd; + int writefd; + }; + } u; }; @@ -34,7 +44,14 @@ int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat); void *fdset_del(struct fdset *pfdset, int fd); +int fdset_try_del(struct fdset *pfdset, int fd); void *fdset_event_dispatch(void *arg); +int fdset_pipe_init(struct fdset *fdset); + +void fdset_pipe_uninit(struct fdset *fdset); + +void fdset_pipe_notify(struct fdset *fdset); + #endif diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c index c11ebcaa..c6354fef 100644 --- a/lib/librte_vhost/iotlb.c +++ b/lib/librte_vhost/iotlb.c @@ -303,6 +303,13 @@ out: return vva; } +void +vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) +{ + vhost_user_iotlb_cache_remove_all(vq); + vhost_user_iotlb_pending_remove_all(vq); +} + int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) { @@ -315,8 +322,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) * The cache has already been initialized, * just drop all cached and pending entries. */ - vhost_user_iotlb_cache_remove_all(vq); - vhost_user_iotlb_pending_remove_all(vq); + vhost_user_iotlb_flush_all(vq); } #ifdef RTE_LIBRTE_VHOST_NUMA diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h index e7083e37..60b9e4c5 100644 --- a/lib/librte_vhost/iotlb.h +++ b/lib/librte_vhost/iotlb.h @@ -73,7 +73,7 @@ void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova, uint8_t perm); void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size, uint8_t perm); - +void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq); int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index); #endif /* _VHOST_IOTLB_H_ */ diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build index 9e8c0e76..bd62e0e3 100644 --- a/lib/librte_vhost/meson.build +++ b/lib/librte_vhost/meson.build @@ -1,5 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2017 Intel Corporation +# Copyright(c) 2017-2018 Intel Corporation if host_machine.system() != 'linux' build = false @@ -9,7 +9,8 @@ if has_libnuma == 1 endif version = 4 allow_experimental_apis = true -sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vhost.c', 'vhost_user.c', - 'virtio_net.c') -headers = files('rte_vhost.h') -deps += ['ethdev'] +sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vdpa.c', + 'vhost.c', 'vhost_user.c', + 'virtio_net.c', 'vhost_crypto.c') +headers = files('rte_vhost.h', 'rte_vdpa.h', 'rte_vhost_crypto.h') +deps += ['ethdev', 'cryptodev', 'hash', 'pci'] diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h new file mode 100644 index 00000000..90465ca2 --- /dev/null +++ b/lib/librte_vhost/rte_vdpa.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _RTE_VDPA_H_ +#define _RTE_VDPA_H_ + +/** + * @file + * + * Device specific vhost lib + */ + +#include +#include "rte_vhost.h" + +#define MAX_VDPA_NAME_LEN 128 + +enum vdpa_addr_type { + PCI_ADDR, + VDPA_ADDR_MAX +}; + +struct rte_vdpa_dev_addr { + enum vdpa_addr_type type; + union { + uint8_t __dummy[64]; + struct rte_pci_addr pci_addr; + }; +}; + +struct rte_vdpa_dev_ops { + /* Get capabilities of this device */ + int (*get_queue_num)(int did, uint32_t *queue_num); + int (*get_features)(int did, uint64_t *features); + int (*get_protocol_features)(int did, uint64_t *protocol_features); + + /* Driver configure/close the device */ + int (*dev_conf)(int vid); + int (*dev_close)(int vid); + + /* Enable/disable this vring */ + int (*set_vring_state)(int vid, int vring, int state); + + /* Set features when changed */ + int (*set_features)(int vid); + + /* Destination operations when migration done */ + int (*migration_done)(int vid); + + /* Get the vfio group fd */ + int (*get_vfio_group_fd)(int vid); + + /* Get the vfio device fd */ + int (*get_vfio_device_fd)(int vid); + + /* Get the notify area info of the queue */ + int (*get_notify_area)(int vid, int qid, + uint64_t *offset, uint64_t *size); + + /* Reserved for future extension */ + void *reserved[5]; +}; + +struct rte_vdpa_device { + struct rte_vdpa_dev_addr addr; + struct rte_vdpa_dev_ops *ops; +} __rte_cache_aligned; + +/* Register a vdpa device, return did if successful, -1 on failure */ +int __rte_experimental +rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr, + struct rte_vdpa_dev_ops *ops); + +/* Unregister a vdpa device, return -1 on failure */ +int __rte_experimental +rte_vdpa_unregister_device(int did); + +/* Find did of a vdpa device, return -1 on failure */ +int __rte_experimental +rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr); + +/* Find a vdpa device based on did */ +struct rte_vdpa_device * __rte_experimental +rte_vdpa_get_device(int did); + +#endif /* _RTE_VDPA_H_ */ diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index d3320699..b02673d4 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -29,6 +29,48 @@ extern "C" { #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3) +/** Protocol features. */ +#ifndef VHOST_USER_PROTOCOL_F_MQ +#define VHOST_USER_PROTOCOL_F_MQ 0 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD +#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_RARP +#define VHOST_USER_PROTOCOL_F_RARP 2 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK +#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_NET_MTU +#define VHOST_USER_PROTOCOL_F_NET_MTU 4 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION +#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD +#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER +#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 +#endif + +/** Indicate whether protocol features negotiation is supported. */ +#ifndef VHOST_USER_F_PROTOCOL_FEATURES +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#endif + /** * Information relating to memory regions including offsets to * addresses in QEMUs memory file. @@ -90,6 +132,11 @@ struct vhost_device_ops { /** * Convert guest physical address to host virtual address * + * This function is deprecated because unsafe. + * New rte_vhost_va_from_guest_pa() should be used instead to ensure + * guest physical ranges are fully and contiguously mapped into + * process virtual address space. + * * @param mem * the guest memory regions * @param gpa @@ -97,6 +144,7 @@ struct vhost_device_ops { * @return * the host virtual address on success, 0 on failure */ +__rte_deprecated static __rte_always_inline uint64_t rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) { @@ -115,6 +163,46 @@ rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) return 0; } +/** + * Convert guest physical address to host virtual address safely + * + * This variant of rte_vhost_gpa_to_vva() takes care all the + * requested length is mapped and contiguous in process address + * space. + * + * @param mem + * the guest memory regions + * @param gpa + * the guest physical address for querying + * @param len + * the size of the requested area to map, updated with actual size mapped + * @return + * the host virtual address on success, 0 on failure + */ +static __rte_always_inline uint64_t +rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem, + uint64_t gpa, uint64_t *len) +{ + struct rte_vhost_mem_region *r; + uint32_t i; + + for (i = 0; i < mem->nregions; i++) { + r = &mem->regions[i]; + if (gpa >= r->guest_phys_addr && + gpa < r->guest_phys_addr + r->size) { + + if (unlikely(*len > r->guest_phys_addr + r->size - gpa)) + *len = r->guest_phys_addr + r->size - gpa; + + return gpa - r->guest_phys_addr + + r->host_user_addr; + } + } + *len = 0; + + return 0; +} + #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL)) /** @@ -169,6 +257,41 @@ int rte_vhost_driver_register(const char *path, uint64_t flags); /* Unregister vhost driver. This is only meaningful to vhost user. */ int rte_vhost_driver_unregister(const char *path); +/** + * Set the vdpa device id, enforce single connection per socket + * + * @param path + * The vhost-user socket file path + * @param did + * Device id + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_attach_vdpa_device(const char *path, int did); + +/** + * Unset the vdpa device id + * + * @param path + * The vhost-user socket file path + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_detach_vdpa_device(const char *path); + +/** + * Get the device id + * + * @param path + * The vhost-user socket file path + * @return + * Device id, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_get_vdpa_device_id(const char *path); + /** * Set the feature bits the vhost-user driver supports. * @@ -224,6 +347,33 @@ int rte_vhost_driver_disable_features(const char *path, uint64_t features); */ int rte_vhost_driver_get_features(const char *path, uint64_t *features); +/** + * Get the protocol feature bits before feature negotiation. + * + * @param path + * The vhost-user socket file path + * @param protocol_features + * A pointer to store the queried protocol feature bits + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_get_protocol_features(const char *path, + uint64_t *protocol_features); + +/** + * Get the queue number bits before feature negotiation. + * + * @param path + * The vhost-user socket file path + * @param queue_num + * A pointer to store the queried queue number bits + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num); + /** * Get the feature bits after negotiation * @@ -434,6 +584,68 @@ int rte_vhost_vring_call(int vid, uint16_t vring_idx); */ uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid); +/** + * Get log base and log size of the vhost device + * + * @param vid + * vhost device ID + * @param log_base + * vhost log base + * @param log_size + * vhost log size + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size); + +/** + * Get last_avail/used_idx of the vhost virtqueue + * + * @param vid + * vhost device ID + * @param queue_id + * vhost queue index + * @param last_avail_idx + * vhost last_avail_idx to get + * @param last_used_idx + * vhost last_used_idx to get + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_vring_base(int vid, uint16_t queue_id, + uint16_t *last_avail_idx, uint16_t *last_used_idx); + +/** + * Set last_avail/used_idx of the vhost virtqueue + * + * @param vid + * vhost device ID + * @param queue_id + * vhost queue index + * @param last_avail_idx + * last_avail_idx to set + * @param last_used_idx + * last_used_idx to set + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_vring_base(int vid, uint16_t queue_id, + uint16_t last_avail_idx, uint16_t last_used_idx); + +/** + * Get vdpa device id for vhost device. + * + * @param vid + * vhost device id + * @return + * device id + */ +int __rte_experimental +rte_vhost_get_vdpa_device_id(int vid); + #ifdef __cplusplus } #endif diff --git a/lib/librte_vhost/rte_vhost_crypto.h b/lib/librte_vhost/rte_vhost_crypto.h new file mode 100644 index 00000000..f9fbc054 --- /dev/null +++ b/lib/librte_vhost/rte_vhost_crypto.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#ifndef _VHOST_CRYPTO_H_ +#define _VHOST_CRYPTO_H_ + +#define VHOST_CRYPTO_MBUF_POOL_SIZE (8192) +#define VHOST_CRYPTO_MAX_BURST_SIZE (64) +#define VHOST_CRYPTO_SESSION_MAP_ENTRIES (1024) /**< Max nb sessions */ +/** max nb virtual queues in a burst for finalizing*/ +#define VIRTIO_CRYPTO_MAX_NUM_BURST_VQS (64) + +enum rte_vhost_crypto_zero_copy { + RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE = 0, + RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE = 1, + RTE_VHOST_CRYPTO_MAX_ZERO_COPY_OPTIONS +}; + +/** + * Create Vhost-crypto instance + * + * @param vid + * The identifier of the vhost device. + * @param cryptodev_id + * The identifier of DPDK Cryptodev, the same cryptodev_id can be assigned to + * multiple Vhost-crypto devices. + * @param sess_pool + * The pointer to the created cryptodev session pool with the private data size + * matches the target DPDK Cryptodev. + * @param socket_id + * NUMA Socket ID to allocate resources on. * + * @return + * 0 if the Vhost Crypto Instance is created successfully. + * Negative integer if otherwise + */ +int __rte_experimental +rte_vhost_crypto_create(int vid, uint8_t cryptodev_id, + struct rte_mempool *sess_pool, int socket_id); + +/** + * Free the Vhost-crypto instance + * + * @param vid + * The identifier of the vhost device. + * @return + * 0 if the Vhost Crypto Instance is created successfully. + * Negative integer if otherwise. + */ +int __rte_experimental +rte_vhost_crypto_free(int vid); + +/** + * Enable or disable zero copy feature + * + * @param vid + * The identifier of the vhost device. + * @param option + * Flag of zero copy feature. + * @return + * 0 if completed successfully. + * Negative integer if otherwise. + */ +int __rte_experimental +rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option); + +/** + * Fetch a number of vring descriptors from virt-queue and translate to DPDK + * crypto operations. After this function is executed, the user can enqueue + * the processed ops to the target cryptodev. + * + * @param vid + * The identifier of the vhost device. + * @param qid + * Virtio queue index. + * @param ops + * The address of an array of pointers to *rte_crypto_op* structures that must + * be large enough to store *nb_ops* pointers in it. + * @param nb_ops + * The maximum number of operations to be fetched and translated. + * @return + * The number of fetched and processed vhost crypto request operations. + */ +uint16_t __rte_experimental +rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + struct rte_crypto_op **ops, uint16_t nb_ops); +/** + * Finalize the dequeued crypto ops. After the translated crypto ops are + * dequeued from the cryptodev, this function shall be called to write the + * processed data back to the vring descriptor (if no-copy is turned off). + * + * @param ops + * The address of an array of *rte_crypto_op* structure that was dequeued + * from cryptodev. + * @param nb_ops + * The number of operations contained in the array. + * @callfds + * The callfd number(s) contained in this burst, this shall be an array with + * no less than VIRTIO_CRYPTO_MAX_NUM_BURST_VQS elements. + * @nb_callfds + * The number of call_fd numbers exist in the callfds. + * @return + * The number of ops processed. + */ +uint16_t __rte_experimental +rte_vhost_crypto_finalize_requests(struct rte_crypto_op **ops, + uint16_t nb_ops, int *callfds, uint16_t *nb_callfds); + +#endif /**< _VHOST_CRYPTO_H_ */ diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index df010312..da220dd0 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -59,3 +59,27 @@ DPDK_18.02 { rte_vhost_vring_call; } DPDK_17.08; + +EXPERIMENTAL { + global: + + rte_vdpa_register_device; + rte_vdpa_unregister_device; + rte_vdpa_find_device_id; + rte_vdpa_get_device; + rte_vhost_driver_attach_vdpa_device; + rte_vhost_driver_detach_vdpa_device; + rte_vhost_driver_get_vdpa_device_id; + rte_vhost_get_vdpa_device_id; + rte_vhost_driver_get_protocol_features; + rte_vhost_driver_get_queue_num; + rte_vhost_get_log_base; + rte_vhost_get_vring_base; + rte_vhost_set_vring_base; + rte_vhost_crypto_create; + rte_vhost_crypto_free; + rte_vhost_crypto_fetch_requests; + rte_vhost_crypto_finalize_requests; + rte_vhost_crypto_set_zero_copy; + rte_vhost_va_from_guest_pa; +}; diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 83befdce..d6303174 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -52,6 +51,13 @@ struct vhost_user_socket { uint64_t supported_features; uint64_t features; + /* + * Device id to identify a specific backend device. + * It's set to -1 for the default software implementation. + * If valid, one socket can have 1 connection only. + */ + int vdpa_dev_id; + struct vhost_device_ops const *notify_ops; }; @@ -97,6 +103,7 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) size_t fdsize = fd_num * sizeof(int); char control[CMSG_SPACE(fdsize)]; struct cmsghdr *cmsg; + int got_fds = 0; int ret; memset(&msgh, 0, sizeof(msgh)); @@ -123,11 +130,16 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) cmsg = CMSG_NXTHDR(&msgh, cmsg)) { if ((cmsg->cmsg_level == SOL_SOCKET) && (cmsg->cmsg_type == SCM_RIGHTS)) { - memcpy(fds, CMSG_DATA(cmsg), fdsize); + got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int)); break; } } + /* Clear out unused file descriptors */ + while (got_fds < fd_num) + fds[got_fds++] = -1; + return ret; } @@ -153,6 +165,11 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) msgh.msg_control = control; msgh.msg_controllen = sizeof(control); cmsg = CMSG_FIRSTHDR(&msgh); + if (cmsg == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n"); + errno = EINVAL; + return -1; + } cmsg->cmsg_len = CMSG_LEN(fdsize); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; @@ -163,7 +180,7 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) } do { - ret = sendmsg(sockfd, &msgh, 0); + ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL); } while (ret < 0 && errno == EINTR); if (ret < 0) { @@ -182,6 +199,9 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) struct vhost_user_connection *conn; int ret; + if (vsocket == NULL) + return; + conn = malloc(sizeof(*conn)); if (conn == NULL) { close(fd); @@ -198,6 +218,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net); + vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id); + if (vsocket->dequeue_zero_copy) vhost_enable_dequeue_zero_copy(vid); @@ -232,6 +254,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) pthread_mutex_lock(&vsocket->conn_mutex); TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); pthread_mutex_unlock(&vsocket->conn_mutex); + + fdset_pipe_notify(&vhost_user.fdset); return; err: @@ -318,6 +342,16 @@ vhost_user_start_server(struct vhost_user_socket *vsocket) int fd = vsocket->socket_fd; const char *path = vsocket->path; + /* + * bind () may fail if the socket file with the same name already + * exists. But the library obviously should not delete the file + * provided by the user, since we can not be sure that it is not + * being used by other applications. Moreover, many applications form + * socket names based on user input, which is prone to errors. + * + * The user must ensure that the socket does not exist before + * registering the vhost driver in server mode. + */ ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, @@ -436,7 +470,6 @@ static int vhost_user_reconnect_init(void) { int ret; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; ret = pthread_mutex_init(&reconn_list.mutex, NULL); if (ret < 0) { @@ -445,7 +478,7 @@ vhost_user_reconnect_init(void) } TAILQ_INIT(&reconn_list.head); - ret = pthread_create(&reconn_tid, NULL, + ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL, vhost_user_client_reconnect, NULL); if (ret != 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); @@ -453,14 +486,6 @@ vhost_user_reconnect_init(void) RTE_LOG(ERR, VHOST_CONFIG, "failed to destroy reconnect mutex"); } - } else { - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, - "vhost-reconn"); - - if (rte_thread_setname(reconn_tid, thread_name)) { - RTE_LOG(DEBUG, VHOST_CONFIG, - "failed to set reconnect thread name"); - } } return ret; @@ -523,6 +548,52 @@ find_vhost_user_socket(const char *path) return NULL; } +int +rte_vhost_driver_attach_vdpa_device(const char *path, int did) +{ + struct vhost_user_socket *vsocket; + + if (rte_vdpa_get_device(did) == NULL) + return -1; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) + vsocket->vdpa_dev_id = did; + pthread_mutex_unlock(&vhost_user.mutex); + + return vsocket ? 0 : -1; +} + +int +rte_vhost_driver_detach_vdpa_device(const char *path) +{ + struct vhost_user_socket *vsocket; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) + vsocket->vdpa_dev_id = -1; + pthread_mutex_unlock(&vhost_user.mutex); + + return vsocket ? 0 : -1; +} + +int +rte_vhost_driver_get_vdpa_device_id(const char *path) +{ + struct vhost_user_socket *vsocket; + int did = -1; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) + did = vsocket->vdpa_dev_id; + pthread_mutex_unlock(&vhost_user.mutex); + + return did; +} + int rte_vhost_driver_disable_features(const char *path, uint64_t features) { @@ -591,19 +662,136 @@ int rte_vhost_driver_get_features(const char *path, uint64_t *features) { struct vhost_user_socket *vsocket; + uint64_t vdpa_features; + struct rte_vdpa_device *vdpa_dev; + int did = -1; + int ret = 0; pthread_mutex_lock(&vhost_user.mutex); vsocket = find_vhost_user_socket(path); - if (vsocket) + if (!vsocket) { + RTE_LOG(ERR, VHOST_CONFIG, + "socket file %s is not registered yet.\n", path); + ret = -1; + goto unlock_exit; + } + + did = vsocket->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (!vdpa_dev || !vdpa_dev->ops->get_features) { *features = vsocket->features; + goto unlock_exit; + } + + if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to get vdpa features " + "for socket file %s.\n", path); + ret = -1; + goto unlock_exit; + } + + *features = vsocket->features & vdpa_features; + +unlock_exit: pthread_mutex_unlock(&vhost_user.mutex); + return ret; +} + +int +rte_vhost_driver_get_protocol_features(const char *path, + uint64_t *protocol_features) +{ + struct vhost_user_socket *vsocket; + uint64_t vdpa_protocol_features; + struct rte_vdpa_device *vdpa_dev; + int did = -1; + int ret = 0; + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); if (!vsocket) { RTE_LOG(ERR, VHOST_CONFIG, "socket file %s is not registered yet.\n", path); - return -1; - } else { - return 0; + ret = -1; + goto unlock_exit; + } + + did = vsocket->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) { + *protocol_features = VHOST_USER_PROTOCOL_FEATURES; + goto unlock_exit; + } + + if (vdpa_dev->ops->get_protocol_features(did, + &vdpa_protocol_features) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to get vdpa protocol features " + "for socket file %s.\n", path); + ret = -1; + goto unlock_exit; + } + + *protocol_features = VHOST_USER_PROTOCOL_FEATURES + & vdpa_protocol_features; + +unlock_exit: + pthread_mutex_unlock(&vhost_user.mutex); + return ret; +} + +int +rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num) +{ + struct vhost_user_socket *vsocket; + uint32_t vdpa_queue_num; + struct rte_vdpa_device *vdpa_dev; + int did = -1; + int ret = 0; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (!vsocket) { + RTE_LOG(ERR, VHOST_CONFIG, + "socket file %s is not registered yet.\n", path); + ret = -1; + goto unlock_exit; + } + + did = vsocket->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) { + *queue_num = VHOST_MAX_QUEUE_PAIRS; + goto unlock_exit; + } + + if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to get vdpa queue number " + "for socket file %s.\n", path); + ret = -1; + goto unlock_exit; + } + + *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num); + +unlock_exit: + pthread_mutex_unlock(&vhost_user.mutex); + return ret; +} + +static void +vhost_user_socket_mem_free(struct vhost_user_socket *vsocket) +{ + if (vsocket && vsocket->path) { + free(vsocket->path); + vsocket->path = NULL; + } + + if (vsocket) { + free(vsocket); + vsocket = NULL; } } @@ -637,7 +825,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags) if (vsocket->path == NULL) { RTE_LOG(ERR, VHOST_CONFIG, "error: failed to copy socket path string\n"); - free(vsocket); + vhost_user_socket_mem_free(vsocket); goto out; } TAILQ_INIT(&vsocket->conn_list); @@ -665,6 +853,12 @@ rte_vhost_driver_register(const char *path, uint64_t flags) vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES; vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES; + /* Dequeue zero copy can't assure descriptors returned in order */ + if (vsocket->dequeue_zero_copy) { + vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER); + vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER); + } + if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) { vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); @@ -695,8 +889,7 @@ out_mutex: "error: failed to destroy connection mutex\n"); } out_free: - free(vsocket->path); - free(vsocket); + vhost_user_socket_mem_free(vsocket); out: pthread_mutex_unlock(&vhost_user.mutex); @@ -743,21 +936,25 @@ rte_vhost_driver_unregister(const char *path) struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; if (!strcmp(vsocket->path, path)) { - if (vsocket->is_server) { - fdset_del(&vhost_user.fdset, vsocket->socket_fd); - close(vsocket->socket_fd); - unlink(path); - } else if (vsocket->reconnect) { - vhost_user_remove_reconnect(vsocket); - } - +again: pthread_mutex_lock(&vsocket->conn_mutex); for (conn = TAILQ_FIRST(&vsocket->conn_list); conn != NULL; conn = next) { next = TAILQ_NEXT(conn, next); - fdset_del(&vhost_user.fdset, conn->connfd); + /* + * If r/wcb is executing, release the + * conn_mutex lock, and try again since + * the r/wcb may use the conn_mutex lock. + */ + if (fdset_try_del(&vhost_user.fdset, + conn->connfd) == -1) { + pthread_mutex_unlock( + &vsocket->conn_mutex); + goto again; + } + RTE_LOG(INFO, VHOST_CONFIG, "free connfd = %d for device '%s'\n", conn->connfd, path); @@ -768,9 +965,17 @@ rte_vhost_driver_unregister(const char *path) } pthread_mutex_unlock(&vsocket->conn_mutex); + if (vsocket->is_server) { + fdset_del(&vhost_user.fdset, + vsocket->socket_fd); + close(vsocket->socket_fd); + unlink(path); + } else if (vsocket->reconnect) { + vhost_user_remove_reconnect(vsocket); + } + pthread_mutex_destroy(&vsocket->conn_mutex); - free(vsocket->path); - free(vsocket); + vhost_user_socket_mem_free(vsocket); count = --vhost_user.vsocket_cnt; vhost_user.vsockets[i] = vhost_user.vsockets[count]; @@ -829,11 +1034,26 @@ rte_vhost_driver_start(const char *path) return -1; if (fdset_tid == 0) { - int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch, - &vhost_user.fdset); - if (ret != 0) + /** + * create a pipe which will be waited by poll and notified to + * rebuild the wait list of poll. + */ + if (fdset_pipe_init(&vhost_user.fdset) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to create pipe for vhost fdset\n"); + return -1; + } + + int ret = rte_ctrl_thread_create(&fdset_tid, + "vhost-events", NULL, fdset_event_dispatch, + &vhost_user.fdset); + if (ret != 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to create fdset handling thread"); + + fdset_pipe_uninit(&vhost_user.fdset); + return -1; + } } if (vsocket->is_server) diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c new file mode 100644 index 00000000..c82fd437 --- /dev/null +++ b/lib/librte_vhost/vdpa.c @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +/** + * @file + * + * Device specific vhost lib + */ + +#include + +#include +#include "rte_vdpa.h" +#include "vhost.h" + +static struct rte_vdpa_device *vdpa_devices[MAX_VHOST_DEVICE]; +static uint32_t vdpa_device_num; + +static bool +is_same_vdpa_device(struct rte_vdpa_dev_addr *a, + struct rte_vdpa_dev_addr *b) +{ + bool ret = true; + + if (a->type != b->type) + return false; + + switch (a->type) { + case PCI_ADDR: + if (a->pci_addr.domain != b->pci_addr.domain || + a->pci_addr.bus != b->pci_addr.bus || + a->pci_addr.devid != b->pci_addr.devid || + a->pci_addr.function != b->pci_addr.function) + ret = false; + break; + default: + break; + } + + return ret; +} + +int +rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr, + struct rte_vdpa_dev_ops *ops) +{ + struct rte_vdpa_device *dev; + char device_name[MAX_VDPA_NAME_LEN]; + int i; + + if (vdpa_device_num >= MAX_VHOST_DEVICE) + return -1; + + for (i = 0; i < MAX_VHOST_DEVICE; i++) { + dev = vdpa_devices[i]; + if (dev && is_same_vdpa_device(&dev->addr, addr)) + return -1; + } + + for (i = 0; i < MAX_VHOST_DEVICE; i++) { + if (vdpa_devices[i] == NULL) + break; + } + + sprintf(device_name, "vdpa-dev-%d", i); + dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device), + RTE_CACHE_LINE_SIZE); + if (!dev) + return -1; + + memcpy(&dev->addr, addr, sizeof(struct rte_vdpa_dev_addr)); + dev->ops = ops; + vdpa_devices[i] = dev; + vdpa_device_num++; + + return i; +} + +int +rte_vdpa_unregister_device(int did) +{ + if (did < 0 || did >= MAX_VHOST_DEVICE || vdpa_devices[did] == NULL) + return -1; + + rte_free(vdpa_devices[did]); + vdpa_devices[did] = NULL; + vdpa_device_num--; + + return did; +} + +int +rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr) +{ + struct rte_vdpa_device *dev; + int i; + + for (i = 0; i < MAX_VHOST_DEVICE; ++i) { + dev = vdpa_devices[i]; + if (dev && is_same_vdpa_device(&dev->addr, addr)) + return i; + } + + return -1; +} + +struct rte_vdpa_device * +rte_vdpa_get_device(int did) +{ + if (did < 0 || did >= MAX_VHOST_DEVICE) + return NULL; + + return vdpa_devices[did]; +} diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index a407067e..3c9be10a 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2016 Intel Corporation + * Copyright(c) 2010-2017 Intel Corporation */ #include @@ -29,17 +29,17 @@ struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; /* Called with iotlb_lock read-locked */ uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint64_t iova, uint64_t size, uint8_t perm) + uint64_t iova, uint64_t *size, uint8_t perm) { uint64_t vva, tmp_size; - if (unlikely(!size)) + if (unlikely(!*size)) return 0; - tmp_size = size; + tmp_size = *size; vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm); - if (tmp_size == size) + if (tmp_size == *size) return vva; iova += tmp_size; @@ -68,19 +68,6 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, return 0; } -struct virtio_net * -get_device(int vid) -{ - struct virtio_net *dev = vhost_devices[vid]; - - if (unlikely(!dev)) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) device not found.\n", vid); - } - - return dev; -} - void cleanup_vq(struct vhost_virtqueue *vq, int destroy) { @@ -106,9 +93,12 @@ cleanup_device(struct virtio_net *dev, int destroy) } void -free_vq(struct vhost_virtqueue *vq) +free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq) { - rte_free(vq->shadow_used_ring); + if (vq_is_packed(dev)) + rte_free(vq->shadow_used_packed); + else + rte_free(vq->shadow_used_split); rte_free(vq->batch_copy_elems); rte_mempool_free(vq->iotlb_pool); rte_free(vq); @@ -123,42 +113,95 @@ free_device(struct virtio_net *dev) uint32_t i; for (i = 0; i < dev->nr_vring; i++) - free_vq(dev->virtqueue[i]); + free_vq(dev, dev->virtqueue[i]); rte_free(dev); } -int -vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq) +static int +vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq) { - uint64_t size; + uint64_t req_size, size; - if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) - goto out; - - size = sizeof(struct vring_desc) * vq->size; + req_size = sizeof(struct vring_desc) * vq->size; + size = req_size; vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr, - size, VHOST_ACCESS_RW); - if (!vq->desc) + &size, VHOST_ACCESS_RW); + if (!vq->desc || size != req_size) return -1; - size = sizeof(struct vring_avail); - size += sizeof(uint16_t) * vq->size; + req_size = sizeof(struct vring_avail); + req_size += sizeof(uint16_t) * vq->size; + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) + req_size += sizeof(uint16_t); + size = req_size; vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr, - size, VHOST_ACCESS_RW); - if (!vq->avail) + &size, VHOST_ACCESS_RW); + if (!vq->avail || size != req_size) return -1; - size = sizeof(struct vring_used); - size += sizeof(struct vring_used_elem) * vq->size; + req_size = sizeof(struct vring_used); + req_size += sizeof(struct vring_used_elem) * vq->size; + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) + req_size += sizeof(uint16_t); + size = req_size; vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr, - size, VHOST_ACCESS_RW); - if (!vq->used) + &size, VHOST_ACCESS_RW); + if (!vq->used || size != req_size) + return -1; + + return 0; +} + +static int +vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + uint64_t req_size, size; + + req_size = sizeof(struct vring_packed_desc) * vq->size; + size = req_size; + vq->desc_packed = (struct vring_packed_desc *)(uintptr_t) + vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr, + &size, VHOST_ACCESS_RW); + if (!vq->desc_packed || size != req_size) return -1; + req_size = sizeof(struct vring_packed_desc_event); + size = req_size; + vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t) + vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr, + &size, VHOST_ACCESS_RW); + if (!vq->driver_event || size != req_size) + return -1; + + req_size = sizeof(struct vring_packed_desc_event); + size = req_size; + vq->device_event = (struct vring_packed_desc_event *)(uintptr_t) + vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr, + &size, VHOST_ACCESS_RW); + if (!vq->device_event || size != req_size) + return -1; + + return 0; +} + +int +vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + + if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) + goto out; + + if (vq_is_packed(dev)) { + if (vring_translate_packed(dev, vq) < 0) + return -1; + } else { + if (vring_translate_split(dev, vq) < 0) + return -1; + } out: vq->access_ok = 1; @@ -240,6 +283,9 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) dev->virtqueue[vring_idx] = vq; init_vring_queue(dev, vring_idx); rte_spinlock_init(&vq->access_lock); + vq->avail_wrap_counter = 1; + vq->used_wrap_counter = 1; + vq->signalled_used_valid = false; dev->nr_vring += 1; @@ -274,21 +320,21 @@ vhost_new_device(void) struct virtio_net *dev; int i; - dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0); - if (dev == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to allocate memory for new dev.\n"); - return -1; - } - for (i = 0; i < MAX_VHOST_DEVICE; i++) { if (vhost_devices[i] == NULL) break; } + if (i == MAX_VHOST_DEVICE) { RTE_LOG(ERR, VHOST_CONFIG, "Failed to find a free slot for new device.\n"); - rte_free(dev); + return -1; + } + + dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0); + if (dev == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to allocate memory for new dev.\n"); return -1; } @@ -296,10 +342,28 @@ vhost_new_device(void) dev->vid = i; dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET; dev->slave_req_fd = -1; + dev->vdpa_dev_id = -1; + rte_spinlock_init(&dev->slave_req_lock); return i; } +void +vhost_destroy_device_notify(struct virtio_net *dev) +{ + struct rte_vdpa_device *vdpa_dev; + int did; + + if (dev->flags & VIRTIO_DEV_RUNNING) { + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->dev_close) + vdpa_dev->ops->dev_close(dev->vid); + dev->flags &= ~VIRTIO_DEV_RUNNING; + dev->notify_ops->destroy_device(dev->vid); + } +} + /* * Invoked when there is the vhost-user connection is broken (when * the virtio device is being detached). @@ -312,10 +376,7 @@ vhost_destroy_device(int vid) if (dev == NULL) return; - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(vid); - } + vhost_destroy_device_notify(dev); cleanup_device(dev, 1); free_device(dev); @@ -323,6 +384,33 @@ vhost_destroy_device(int vid) vhost_devices[vid] = NULL; } +void +vhost_attach_vdpa_device(int vid, int did) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return; + + if (rte_vdpa_get_device(did) == NULL) + return; + + dev->vdpa_dev_id = did; +} + +void +vhost_detach_vdpa_device(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return; + + vhost_user_host_notifier_ctrl(vid, false); + + dev->vdpa_dev_id = -1; +} + void vhost_set_ifname(int vid, const char *if_name, unsigned int if_len) { @@ -532,7 +620,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx) if (!vq) return -1; - vhost_vring_call(dev, vq); + if (vq_is_packed(dev)) + vhost_vring_call_packed(dev, vq); + else + vhost_vring_call_split(dev, vq); + return 0; } @@ -553,21 +645,52 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id) return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx; } +static inline void +vhost_enable_notify_split(struct vhost_virtqueue *vq, int enable) +{ + if (enable) + vq->used->flags &= ~VRING_USED_F_NO_NOTIFY; + else + vq->used->flags |= VRING_USED_F_NO_NOTIFY; +} + +static inline void +vhost_enable_notify_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, int enable) +{ + uint16_t flags; + + if (!enable) + vq->device_event->flags = VRING_EVENT_F_DISABLE; + + flags = VRING_EVENT_F_ENABLE; + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) { + flags = VRING_EVENT_F_DESC; + vq->device_event->off_wrap = vq->last_avail_idx | + vq->avail_wrap_counter << 15; + } + + rte_smp_wmb(); + + vq->device_event->flags = flags; +} + int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) { struct virtio_net *dev = get_device(vid); + struct vhost_virtqueue *vq; - if (dev == NULL) + if (!dev) return -1; - if (enable) { - RTE_LOG(ERR, VHOST_CONFIG, - "guest notification isn't supported.\n"); - return -1; - } + vq = dev->virtqueue[queue_id]; + + if (vq_is_packed(dev)) + vhost_enable_notify_packed(dev, vq, enable); + else + vhost_enable_notify_split(vq, enable); - dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY; return 0; } @@ -627,3 +750,76 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid) return *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx; } + +int rte_vhost_get_vdpa_device_id(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return -1; + + return dev->vdpa_dev_id; +} + +int rte_vhost_get_log_base(int vid, uint64_t *log_base, + uint64_t *log_size) +{ + struct virtio_net *dev = get_device(vid); + + if (!dev) + return -1; + + if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { + RTE_LOG(ERR, VHOST_DATA, + "(%d) %s: built-in vhost net backend is disabled.\n", + dev->vid, __func__); + return -1; + } + + *log_base = dev->log_base; + *log_size = dev->log_size; + + return 0; +} + +int rte_vhost_get_vring_base(int vid, uint16_t queue_id, + uint16_t *last_avail_idx, uint16_t *last_used_idx) +{ + struct virtio_net *dev = get_device(vid); + + if (!dev) + return -1; + + if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { + RTE_LOG(ERR, VHOST_DATA, + "(%d) %s: built-in vhost net backend is disabled.\n", + dev->vid, __func__); + return -1; + } + + *last_avail_idx = dev->virtqueue[queue_id]->last_avail_idx; + *last_used_idx = dev->virtqueue[queue_id]->last_used_idx; + + return 0; +} + +int rte_vhost_set_vring_base(int vid, uint16_t queue_id, + uint16_t last_avail_idx, uint16_t last_used_idx) +{ + struct virtio_net *dev = get_device(vid); + + if (!dev) + return -1; + + if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { + RTE_LOG(ERR, VHOST_DATA, + "(%d) %s: built-in vhost net backend is disabled.\n", + dev->vid, __func__); + return -1; + } + + dev->virtqueue[queue_id]->last_avail_idx = last_avail_idx; + dev->virtqueue[queue_id]->last_used_idx = last_used_idx; + + return 0; +} diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index d947bc9e..760a09c0 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -1,11 +1,12 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #ifndef _VHOST_NET_CDEV_H_ #define _VHOST_NET_CDEV_H_ #include #include +#include #include #include #include @@ -19,6 +20,7 @@ #include #include "rte_vhost.h" +#include "rte_vdpa.h" /* Used to indicate that the device is running on a data core */ #define VIRTIO_DEV_RUNNING 1 @@ -26,17 +28,22 @@ #define VIRTIO_DEV_READY 2 /* Used to indicate that the built-in vhost net device backend is enabled */ #define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4 +/* Used to indicate that the device has its own data path and configured */ +#define VIRTIO_DEV_VDPA_CONFIGURED 8 /* Backend value set by guest. */ #define VIRTIO_DEV_STOPPED -1 #define BUF_VECTOR_MAX 256 +#define VHOST_LOG_CACHE_NR 32 + /** * Structure contains buffer address, length and descriptor index * from vring to do scatter RX. */ struct buf_vector { + uint64_t buf_iova; uint64_t buf_addr; uint32_t buf_len; uint32_t desc_idx; @@ -49,6 +56,7 @@ struct buf_vector { struct zcopy_mbuf { struct rte_mbuf *mbuf; uint32_t desc_idx; + uint16_t desc_count; uint16_t in_use; TAILQ_ENTRY(zcopy_mbuf) next; @@ -65,19 +73,43 @@ struct batch_copy_elem { uint64_t log_addr; }; +/* + * Structure that contains the info for batched dirty logging. + */ +struct log_cache_entry { + uint32_t offset; + unsigned long val; +}; + +struct vring_used_elem_packed { + uint16_t id; + uint32_t len; + uint32_t count; +}; + /** * Structure contains variables relevant to RX/TX virtqueues. */ struct vhost_virtqueue { - struct vring_desc *desc; - struct vring_avail *avail; - struct vring_used *used; + union { + struct vring_desc *desc; + struct vring_packed_desc *desc_packed; + }; + union { + struct vring_avail *avail; + struct vring_packed_desc_event *driver_event; + }; + union { + struct vring_used *used; + struct vring_packed_desc_event *device_event; + }; uint32_t size; uint16_t last_avail_idx; uint16_t last_used_idx; /* Last used index we notify to front end. */ uint16_t signalled_used; + bool signalled_used_valid; #define VIRTIO_INVALID_EVENTFD (-1) #define VIRTIO_UNINITIALIZED_EVENTFD (-2) @@ -101,12 +133,20 @@ struct vhost_virtqueue { struct zcopy_mbuf *zmbufs; struct zcopy_mbuf_list zmbuf_list; - struct vring_used_elem *shadow_used_ring; + union { + struct vring_used_elem *shadow_used_split; + struct vring_used_elem_packed *shadow_used_packed; + }; uint16_t shadow_used_idx; struct vhost_vring_addr ring_addrs; struct batch_copy_elem *batch_copy_elems; uint16_t batch_copy_nb_elems; + bool used_wrap_counter; + bool avail_wrap_counter; + + struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR]; + uint16_t log_cache_nb_elem; rte_rwlock_t iotlb_lock; rte_rwlock_t iotlb_pending_lock; @@ -174,7 +214,41 @@ struct vhost_msg { #define VIRTIO_F_VERSION_1 32 #endif -#define VHOST_USER_F_PROTOCOL_FEATURES 30 +/* Declare packed ring related bits for older kernels */ +#ifndef VIRTIO_F_RING_PACKED + +#define VIRTIO_F_RING_PACKED 34 + +#define VRING_DESC_F_NEXT 1 +#define VRING_DESC_F_WRITE 2 +#define VRING_DESC_F_INDIRECT 4 + +#define VRING_DESC_F_AVAIL (1ULL << 7) +#define VRING_DESC_F_USED (1ULL << 15) + +struct vring_packed_desc { + uint64_t addr; + uint32_t len; + uint16_t id; + uint16_t flags; +}; + +#define VRING_EVENT_F_ENABLE 0x0 +#define VRING_EVENT_F_DISABLE 0x1 +#define VRING_EVENT_F_DESC 0x2 + +struct vring_packed_desc_event { + uint16_t off_wrap; + uint16_t flags; +}; +#endif + +/* + * Available and used descs are in same order + */ +#ifndef VIRTIO_F_IN_ORDER +#define VIRTIO_F_IN_ORDER 35 +#endif /* Features supported by this builtin vhost-user net driver. */ #define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ @@ -199,7 +273,8 @@ struct vhost_msg { (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \ (1ULL << VIRTIO_RING_F_EVENT_IDX) | \ - (1ULL << VIRTIO_NET_F_MTU) | \ + (1ULL << VIRTIO_NET_F_MTU) | \ + (1ULL << VIRTIO_F_IN_ORDER) | \ (1ULL << VIRTIO_F_IOMMU_PLATFORM)) @@ -209,6 +284,51 @@ struct guest_page { uint64_t size; }; +/** + * function prototype for the vhost backend to handler specific vhost user + * messages prior to the master message handling + * + * @param vid + * vhost device id + * @param msg + * Message pointer. + * @param require_reply + * If the handler requires sending a reply, this varaible shall be written 1, + * otherwise 0. + * @param skip_master + * If the handler requires skipping the master message handling, this variable + * shall be written 1, otherwise 0. + * @return + * 0 on success, -1 on failure + */ +typedef int (*vhost_msg_pre_handle)(int vid, void *msg, + uint32_t *require_reply, uint32_t *skip_master); + +/** + * function prototype for the vhost backend to handler specific vhost user + * messages after the master message handling is done + * + * @param vid + * vhost device id + * @param msg + * Message pointer. + * @param require_reply + * If the handler requires sending a reply, this varaible shall be written 1, + * otherwise 0. + * @return + * 0 on success, -1 on failure + */ +typedef int (*vhost_msg_post_handle)(int vid, void *msg, + uint32_t *require_reply); + +/** + * pre and post vhost user message handlers + */ +struct vhost_user_extern_ops { + vhost_msg_pre_handle pre_msg_handle; + vhost_msg_post_handle post_msg_handle; +}; + /** * Device structure contains all configuration information relating * to the device. @@ -241,8 +361,32 @@ struct virtio_net { struct guest_page *guest_pages; int slave_req_fd; + rte_spinlock_t slave_req_lock; + + /* + * Device id to identify a specific backend device. + * It's set to -1 for the default software implementation. + */ + int vdpa_dev_id; + + /* private data for virtio device */ + void *extern_data; + /* pre and post vhost user message handlers for the device */ + struct vhost_user_extern_ops extern_ops; } __rte_cache_aligned; +static __rte_always_inline bool +vq_is_packed(struct virtio_net *dev) +{ + return dev->features & (1ull << VIRTIO_F_RING_PACKED); +} + +static inline bool +desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter) +{ + return wrap_counter == !!(desc->flags & VRING_DESC_F_AVAIL) && + wrap_counter != !!(desc->flags & VRING_DESC_F_USED); +} #define VHOST_LOG_PAGE 4096 @@ -252,7 +396,15 @@ struct virtio_net { static __rte_always_inline void vhost_set_bit(unsigned int nr, volatile uint8_t *addr) { - __sync_fetch_and_or_8(addr, (1U << nr)); +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) + /* + * __sync_ built-ins are deprecated, but __atomic_ ones + * are sub-optimized in older GCC versions. + */ + __sync_fetch_and_or_1(addr, (1U << nr)); +#else + __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED); +#endif } static __rte_always_inline void @@ -283,6 +435,103 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) } } +static __rte_always_inline void +vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + unsigned long *log_base; + int i; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base)) + return; + + log_base = (unsigned long *)(uintptr_t)dev->log_base; + + /* + * It is expected a write memory barrier has been issued + * before this function is called. + */ + + for (i = 0; i < vq->log_cache_nb_elem; i++) { + struct log_cache_entry *elem = vq->log_cache + i; + +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) + /* + * '__sync' builtins are deprecated, but '__atomic' ones + * are sub-optimized in older GCC versions. + */ + __sync_fetch_and_or(log_base + elem->offset, elem->val); +#else + __atomic_fetch_or(log_base + elem->offset, elem->val, + __ATOMIC_RELAXED); +#endif + } + + rte_smp_wmb(); + + vq->log_cache_nb_elem = 0; +} + +static __rte_always_inline void +vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t page) +{ + uint32_t bit_nr = page % (sizeof(unsigned long) << 3); + uint32_t offset = page / (sizeof(unsigned long) << 3); + int i; + + for (i = 0; i < vq->log_cache_nb_elem; i++) { + struct log_cache_entry *elem = vq->log_cache + i; + + if (elem->offset == offset) { + elem->val |= (1UL << bit_nr); + return; + } + } + + if (unlikely(i >= VHOST_LOG_CACHE_NR)) { + /* + * No more room for a new log cache entry, + * so write the dirty log map directly. + */ + rte_smp_wmb(); + vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); + + return; + } + + vq->log_cache[i].offset = offset; + vq->log_cache[i].val = (1UL << bit_nr); + vq->log_cache_nb_elem++; +} + +static __rte_always_inline void +vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t addr, uint64_t len) +{ + uint64_t page; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base || !len)) + return; + + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) + return; + + page = addr / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < addr + len) { + vhost_log_cache_page(dev, vq, page); + page += 1; + } +} + +static __rte_always_inline void +vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) +{ + vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len); +} + static __rte_always_inline void vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t offset, uint64_t len) @@ -296,8 +545,8 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, #ifdef RTE_LIBRTE_VHOST_DEBUG #define VHOST_MAX_PRINT_BUFF 6072 -#define LOG_LEVEL RTE_LOG_DEBUG -#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) +#define VHOST_LOG_DEBUG(log_type, fmt, args...) \ + RTE_LOG(DEBUG, log_type, fmt, ##args) #define PRINT_PACKET(device, addr, size, header) do { \ char *pkt_addr = (char *)(addr); \ unsigned int index; \ @@ -313,11 +562,10 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, } \ snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \ \ - LOG_DEBUG(VHOST_DATA, "%s", packet); \ + VHOST_LOG_DEBUG(VHOST_DATA, "%s", packet); \ } while (0) #else -#define LOG_LEVEL RTE_LOG_INFO -#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) +#define VHOST_LOG_DEBUG(log_type, fmt, args...) do {} while (0) #define PRINT_PACKET(device, addr, size, header) do {} while (0) #endif @@ -345,18 +593,33 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) return 0; } -struct virtio_net *get_device(int vid); +static __rte_always_inline struct virtio_net * +get_device(int vid) +{ + struct virtio_net *dev = vhost_devices[vid]; + + if (unlikely(!dev)) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) device not found.\n", vid); + } + + return dev; +} int vhost_new_device(void); void cleanup_device(struct virtio_net *dev, int destroy); void reset_device(struct virtio_net *dev); void vhost_destroy_device(int); +void vhost_destroy_device_notify(struct virtio_net *dev); void cleanup_vq(struct vhost_virtqueue *vq, int destroy); -void free_vq(struct vhost_virtqueue *vq); +void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq); int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); +void vhost_attach_vdpa_device(int vid, int did); +void vhost_detach_vdpa_device(int vid); + void vhost_set_ifname(int, const char *if_name, unsigned int if_len); void vhost_enable_dequeue_zero_copy(int vid); void vhost_set_builtin_virtio_net(int vid, bool enable); @@ -371,18 +634,18 @@ struct vhost_device_ops const *vhost_driver_callback_get(const char *path); void vhost_backend_cleanup(struct virtio_net *dev); uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint64_t iova, uint64_t size, uint8_t perm); + uint64_t iova, uint64_t *len, uint8_t perm); int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq); void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq); static __rte_always_inline uint64_t vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint64_t iova, uint64_t size, uint8_t perm) + uint64_t iova, uint64_t *len, uint8_t perm) { if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) - return rte_vhost_gpa_to_vva(dev->mem, iova); + return rte_vhost_va_from_guest_pa(dev->mem, iova, len); - return __vhost_iova_to_vva(dev, vq, iova, size, perm); + return __vhost_iova_to_vva(dev, vq, iova, len, perm); } #define vhost_used_event(vr) \ @@ -401,17 +664,17 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) } static __rte_always_inline void -vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq) +vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq) { /* Flush used->idx update before we read avail->flags. */ - rte_mb(); + rte_smp_mb(); /* Don't kick guest if we don't reach index specified by guest. */ if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) { uint16_t old = vq->signalled_used; uint16_t new = vq->last_used_idx; - LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n", + VHOST_LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n", __func__, vhost_used_event(vq), old, new); @@ -428,4 +691,55 @@ vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq) } } +static __rte_always_inline void +vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + uint16_t old, new, off, off_wrap; + bool signalled_used_valid, kick = false; + + /* Flush used desc update. */ + rte_smp_mb(); + + if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) { + if (vq->driver_event->flags != + VRING_EVENT_F_DISABLE) + kick = true; + goto kick; + } + + old = vq->signalled_used; + new = vq->last_used_idx; + vq->signalled_used = new; + signalled_used_valid = vq->signalled_used_valid; + vq->signalled_used_valid = true; + + if (vq->driver_event->flags != VRING_EVENT_F_DESC) { + if (vq->driver_event->flags != VRING_EVENT_F_DISABLE) + kick = true; + goto kick; + } + + if (unlikely(!signalled_used_valid)) { + kick = true; + goto kick; + } + + rte_smp_rmb(); + + off_wrap = vq->driver_event->off_wrap; + off = off_wrap & ~(1 << 15); + + if (new <= old) + old -= vq->size; + + if (vq->used_wrap_counter != off_wrap >> 15) + off -= vq->size; + + if (vhost_need_event(off, new, old)) + kick = true; +kick: + if (kick) + eventfd_write(vq->callfd, (eventfd_t)1); +} + #endif /* _VHOST_NET_CDEV_H_ */ diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c new file mode 100644 index 00000000..57341ef8 --- /dev/null +++ b/lib/librte_vhost/vhost_crypto.c @@ -0,0 +1,1372 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ +#include +#include +#include +#include +#include + +#include "rte_vhost_crypto.h" +#include "vhost.h" +#include "vhost_user.h" +#include "virtio_crypto.h" + +#define INHDR_LEN (sizeof(struct virtio_crypto_inhdr)) +#define IV_OFFSET (sizeof(struct rte_crypto_op) + \ + sizeof(struct rte_crypto_sym_op)) + +#ifdef RTE_LIBRTE_VHOST_DEBUG +#define VC_LOG_ERR(fmt, args...) \ + RTE_LOG(ERR, USER1, "[%s] %s() line %u: " fmt "\n", \ + "Vhost-Crypto", __func__, __LINE__, ## args) +#define VC_LOG_INFO(fmt, args...) \ + RTE_LOG(INFO, USER1, "[%s] %s() line %u: " fmt "\n", \ + "Vhost-Crypto", __func__, __LINE__, ## args) + +#define VC_LOG_DBG(fmt, args...) \ + RTE_LOG(DEBUG, USER1, "[%s] %s() line %u: " fmt "\n", \ + "Vhost-Crypto", __func__, __LINE__, ## args) +#else +#define VC_LOG_ERR(fmt, args...) \ + RTE_LOG(ERR, USER1, "[VHOST-Crypto]: " fmt "\n", ## args) +#define VC_LOG_INFO(fmt, args...) \ + RTE_LOG(INFO, USER1, "[VHOST-Crypto]: " fmt "\n", ## args) +#define VC_LOG_DBG(fmt, args...) +#endif + +#define VIRTIO_CRYPTO_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) | \ + (1 << VIRTIO_RING_F_INDIRECT_DESC) | \ + (1 << VIRTIO_RING_F_EVENT_IDX) | \ + (1 << VIRTIO_CRYPTO_SERVICE_CIPHER) | \ + (1 << VIRTIO_CRYPTO_SERVICE_MAC) | \ + (1 << VIRTIO_NET_F_CTRL_VQ)) + +#define IOVA_TO_VVA(t, r, a, l, p) \ + ((t)(uintptr_t)vhost_iova_to_vva(r->dev, r->vq, a, l, p)) + +static int +cipher_algo_transform(uint32_t virtio_cipher_algo) +{ + int ret; + + switch (virtio_cipher_algo) { + case VIRTIO_CRYPTO_CIPHER_AES_CBC: + ret = RTE_CRYPTO_CIPHER_AES_CBC; + break; + case VIRTIO_CRYPTO_CIPHER_AES_CTR: + ret = RTE_CRYPTO_CIPHER_AES_CTR; + break; + case VIRTIO_CRYPTO_CIPHER_DES_ECB: + ret = -VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_CIPHER_DES_CBC: + ret = RTE_CRYPTO_CIPHER_DES_CBC; + break; + case VIRTIO_CRYPTO_CIPHER_3DES_ECB: + ret = RTE_CRYPTO_CIPHER_3DES_ECB; + break; + case VIRTIO_CRYPTO_CIPHER_3DES_CBC: + ret = RTE_CRYPTO_CIPHER_3DES_CBC; + break; + case VIRTIO_CRYPTO_CIPHER_3DES_CTR: + ret = RTE_CRYPTO_CIPHER_3DES_CTR; + break; + case VIRTIO_CRYPTO_CIPHER_KASUMI_F8: + ret = RTE_CRYPTO_CIPHER_KASUMI_F8; + break; + case VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2: + ret = RTE_CRYPTO_CIPHER_SNOW3G_UEA2; + break; + case VIRTIO_CRYPTO_CIPHER_AES_F8: + ret = RTE_CRYPTO_CIPHER_AES_F8; + break; + case VIRTIO_CRYPTO_CIPHER_AES_XTS: + ret = RTE_CRYPTO_CIPHER_AES_XTS; + break; + case VIRTIO_CRYPTO_CIPHER_ZUC_EEA3: + ret = RTE_CRYPTO_CIPHER_ZUC_EEA3; + break; + default: + ret = -VIRTIO_CRYPTO_BADMSG; + break; + } + + return ret; +} + +static int +auth_algo_transform(uint32_t virtio_auth_algo) +{ + int ret; + + switch (virtio_auth_algo) { + + case VIRTIO_CRYPTO_NO_MAC: + ret = RTE_CRYPTO_AUTH_NULL; + break; + case VIRTIO_CRYPTO_MAC_HMAC_MD5: + ret = RTE_CRYPTO_AUTH_MD5_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA1: + ret = RTE_CRYPTO_AUTH_SHA1_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA_224: + ret = RTE_CRYPTO_AUTH_SHA224_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA_256: + ret = RTE_CRYPTO_AUTH_SHA256_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA_384: + ret = RTE_CRYPTO_AUTH_SHA384_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA_512: + ret = RTE_CRYPTO_AUTH_SHA512_HMAC; + break; + case VIRTIO_CRYPTO_MAC_CMAC_3DES: + ret = -VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_MAC_CMAC_AES: + ret = RTE_CRYPTO_AUTH_AES_CMAC; + break; + case VIRTIO_CRYPTO_MAC_KASUMI_F9: + ret = RTE_CRYPTO_AUTH_KASUMI_F9; + break; + case VIRTIO_CRYPTO_MAC_SNOW3G_UIA2: + ret = RTE_CRYPTO_AUTH_SNOW3G_UIA2; + break; + case VIRTIO_CRYPTO_MAC_GMAC_AES: + ret = RTE_CRYPTO_AUTH_AES_GMAC; + break; + case VIRTIO_CRYPTO_MAC_GMAC_TWOFISH: + ret = -VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_MAC_CBCMAC_AES: + ret = RTE_CRYPTO_AUTH_AES_CBC_MAC; + break; + case VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9: + ret = -VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_MAC_XCBC_AES: + ret = RTE_CRYPTO_AUTH_AES_XCBC_MAC; + break; + default: + ret = -VIRTIO_CRYPTO_BADMSG; + break; + } + + return ret; +} + +static int get_iv_len(enum rte_crypto_cipher_algorithm algo) +{ + int len; + + switch (algo) { + case RTE_CRYPTO_CIPHER_3DES_CBC: + len = 8; + break; + case RTE_CRYPTO_CIPHER_3DES_CTR: + len = 8; + break; + case RTE_CRYPTO_CIPHER_3DES_ECB: + len = 8; + break; + case RTE_CRYPTO_CIPHER_AES_CBC: + len = 16; + break; + + /* TODO: add common algos */ + + default: + len = -1; + break; + } + + return len; +} + +/** + * vhost_crypto struct is used to maintain a number of virtio_cryptos and + * one DPDK crypto device that deals with all crypto workloads. It is declared + * here and defined in vhost_crypto.c + */ +struct vhost_crypto { + /** Used to lookup DPDK Cryptodev Session based on VIRTIO crypto + * session ID. + */ + struct rte_hash *session_map; + struct rte_mempool *mbuf_pool; + struct rte_mempool *sess_pool; + + /** DPDK cryptodev ID */ + uint8_t cid; + uint16_t nb_qps; + + uint64_t last_session_id; + + uint64_t cache_session_id; + struct rte_cryptodev_sym_session *cache_session; + /** socket id for the device */ + int socket_id; + + struct virtio_net *dev; + + uint8_t option; +} __rte_cache_aligned; + +struct vhost_crypto_data_req { + struct vring_desc *head; + struct virtio_net *dev; + struct virtio_crypto_inhdr *inhdr; + struct vhost_virtqueue *vq; + struct vring_desc *wb_desc; + uint16_t wb_len; + uint16_t desc_idx; + uint16_t len; + uint16_t zero_copy; +}; + +static int +transform_cipher_param(struct rte_crypto_sym_xform *xform, + VhostUserCryptoSessionParam *param) +{ + int ret; + + ret = cipher_algo_transform(param->cipher_algo); + if (unlikely(ret < 0)) + return ret; + + xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform->cipher.algo = (uint32_t)ret; + xform->cipher.key.length = param->cipher_key_len; + if (xform->cipher.key.length > 0) + xform->cipher.key.data = param->cipher_key_buf; + if (param->dir == VIRTIO_CRYPTO_OP_ENCRYPT) + xform->cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + else if (param->dir == VIRTIO_CRYPTO_OP_DECRYPT) + xform->cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + else { + VC_LOG_DBG("Bad operation type"); + return -VIRTIO_CRYPTO_BADMSG; + } + + ret = get_iv_len(xform->cipher.algo); + if (unlikely(ret < 0)) + return ret; + xform->cipher.iv.length = (uint16_t)ret; + xform->cipher.iv.offset = IV_OFFSET; + return 0; +} + +static int +transform_chain_param(struct rte_crypto_sym_xform *xforms, + VhostUserCryptoSessionParam *param) +{ + struct rte_crypto_sym_xform *xform_cipher, *xform_auth; + int ret; + + switch (param->chaining_dir) { + case VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER: + xform_auth = xforms; + xform_cipher = xforms->next; + xform_cipher->cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + xform_auth->auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; + break; + case VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH: + xform_cipher = xforms; + xform_auth = xforms->next; + xform_cipher->cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + xform_auth->auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + break; + default: + return -VIRTIO_CRYPTO_BADMSG; + } + + /* cipher */ + ret = cipher_algo_transform(param->cipher_algo); + if (unlikely(ret < 0)) + return ret; + xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform_cipher->cipher.algo = (uint32_t)ret; + xform_cipher->cipher.key.length = param->cipher_key_len; + xform_cipher->cipher.key.data = param->cipher_key_buf; + ret = get_iv_len(xform_cipher->cipher.algo); + if (unlikely(ret < 0)) + return ret; + xform_cipher->cipher.iv.length = (uint16_t)ret; + xform_cipher->cipher.iv.offset = IV_OFFSET; + + /* auth */ + xform_auth->type = RTE_CRYPTO_SYM_XFORM_AUTH; + ret = auth_algo_transform(param->hash_algo); + if (unlikely(ret < 0)) + return ret; + xform_auth->auth.algo = (uint32_t)ret; + xform_auth->auth.digest_length = param->digest_len; + xform_auth->auth.key.length = param->auth_key_len; + xform_auth->auth.key.data = param->auth_key_buf; + + return 0; +} + +static void +vhost_crypto_create_sess(struct vhost_crypto *vcrypto, + VhostUserCryptoSessionParam *sess_param) +{ + struct rte_crypto_sym_xform xform1 = {0}, xform2 = {0}; + struct rte_cryptodev_sym_session *session; + int ret; + + switch (sess_param->op_type) { + case VIRTIO_CRYPTO_SYM_OP_NONE: + case VIRTIO_CRYPTO_SYM_OP_CIPHER: + ret = transform_cipher_param(&xform1, sess_param); + if (unlikely(ret)) { + VC_LOG_ERR("Error transform session msg (%i)", ret); + sess_param->session_id = ret; + return; + } + break; + case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING: + if (unlikely(sess_param->hash_mode != + VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH)) { + sess_param->session_id = -VIRTIO_CRYPTO_NOTSUPP; + VC_LOG_ERR("Error transform session message (%i)", + -VIRTIO_CRYPTO_NOTSUPP); + return; + } + + xform1.next = &xform2; + + ret = transform_chain_param(&xform1, sess_param); + if (unlikely(ret)) { + VC_LOG_ERR("Error transform session message (%i)", ret); + sess_param->session_id = ret; + return; + } + + break; + default: + VC_LOG_ERR("Algorithm not yet supported"); + sess_param->session_id = -VIRTIO_CRYPTO_NOTSUPP; + return; + } + + session = rte_cryptodev_sym_session_create(vcrypto->sess_pool); + if (!session) { + VC_LOG_ERR("Failed to create session"); + sess_param->session_id = -VIRTIO_CRYPTO_ERR; + return; + } + + if (rte_cryptodev_sym_session_init(vcrypto->cid, session, &xform1, + vcrypto->sess_pool) < 0) { + VC_LOG_ERR("Failed to initialize session"); + sess_param->session_id = -VIRTIO_CRYPTO_ERR; + return; + } + + /* insert hash to map */ + if (rte_hash_add_key_data(vcrypto->session_map, + &vcrypto->last_session_id, session) < 0) { + VC_LOG_ERR("Failed to insert session to hash table"); + + if (rte_cryptodev_sym_session_clear(vcrypto->cid, session) < 0) + VC_LOG_ERR("Failed to clear session"); + else { + if (rte_cryptodev_sym_session_free(session) < 0) + VC_LOG_ERR("Failed to free session"); + } + sess_param->session_id = -VIRTIO_CRYPTO_ERR; + return; + } + + VC_LOG_INFO("Session %"PRIu64" created for vdev %i.", + vcrypto->last_session_id, vcrypto->dev->vid); + + sess_param->session_id = vcrypto->last_session_id; + vcrypto->last_session_id++; +} + +static int +vhost_crypto_close_sess(struct vhost_crypto *vcrypto, uint64_t session_id) +{ + struct rte_cryptodev_sym_session *session; + uint64_t sess_id = session_id; + int ret; + + ret = rte_hash_lookup_data(vcrypto->session_map, &sess_id, + (void **)&session); + + if (unlikely(ret < 0)) { + VC_LOG_ERR("Failed to delete session %"PRIu64".", session_id); + return -VIRTIO_CRYPTO_INVSESS; + } + + if (rte_cryptodev_sym_session_clear(vcrypto->cid, session) < 0) { + VC_LOG_DBG("Failed to clear session"); + return -VIRTIO_CRYPTO_ERR; + } + + if (rte_cryptodev_sym_session_free(session) < 0) { + VC_LOG_DBG("Failed to free session"); + return -VIRTIO_CRYPTO_ERR; + } + + if (rte_hash_del_key(vcrypto->session_map, &sess_id) < 0) { + VC_LOG_DBG("Failed to delete session from hash table."); + return -VIRTIO_CRYPTO_ERR; + } + + VC_LOG_INFO("Session %"PRIu64" deleted for vdev %i.", sess_id, + vcrypto->dev->vid); + + return 0; +} + +static int +vhost_crypto_msg_post_handler(int vid, void *msg, uint32_t *require_reply) +{ + struct virtio_net *dev = get_device(vid); + struct vhost_crypto *vcrypto; + VhostUserMsg *vmsg = msg; + int ret = 0; + + if (dev == NULL || require_reply == NULL) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + vcrypto = dev->extern_data; + if (vcrypto == NULL) { + VC_LOG_ERR("Cannot find required data, is it initialized?"); + return -ENOENT; + } + + *require_reply = 0; + + if (vmsg->request.master == VHOST_USER_CRYPTO_CREATE_SESS) { + vhost_crypto_create_sess(vcrypto, + &vmsg->payload.crypto_session); + *require_reply = 1; + } else if (vmsg->request.master == VHOST_USER_CRYPTO_CLOSE_SESS) + ret = vhost_crypto_close_sess(vcrypto, vmsg->payload.u64); + else + ret = -EINVAL; + + return ret; +} + +static __rte_always_inline struct vring_desc * +find_write_desc(struct vring_desc *head, struct vring_desc *desc) +{ + if (desc->flags & VRING_DESC_F_WRITE) + return desc; + + while (desc->flags & VRING_DESC_F_NEXT) { + desc = &head[desc->next]; + if (desc->flags & VRING_DESC_F_WRITE) + return desc; + } + + return NULL; +} + +static struct virtio_crypto_inhdr * +reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc) +{ + uint64_t dlen; + struct virtio_crypto_inhdr *inhdr; + + while (desc->flags & VRING_DESC_F_NEXT) + desc = &vc_req->head[desc->next]; + + dlen = desc->len; + inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, vc_req, desc->addr, + &dlen, VHOST_ACCESS_WO); + if (unlikely(!inhdr || dlen != desc->len)) + return NULL; + + return inhdr; +} + +static __rte_always_inline int +move_desc(struct vring_desc *head, struct vring_desc **cur_desc, + uint32_t size) +{ + struct vring_desc *desc = *cur_desc; + int left = size; + + rte_prefetch0(&head[desc->next]); + left -= desc->len; + + while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { + desc = &head[desc->next]; + rte_prefetch0(&head[desc->next]); + left -= desc->len; + } + + if (unlikely(left > 0)) { + VC_LOG_ERR("Incorrect virtio descriptor"); + return -1; + } + + *cur_desc = &head[desc->next]; + return 0; +} + +static int +copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, + struct vring_desc **cur_desc, uint32_t size) +{ + struct vring_desc *desc = *cur_desc; + uint64_t remain, addr, dlen, len; + uint32_t to_copy; + uint8_t *data = dst_data; + uint8_t *src; + int left = size; + + rte_prefetch0(&vc_req->head[desc->next]); + to_copy = RTE_MIN(desc->len, (uint32_t)left); + dlen = to_copy; + src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RO); + if (unlikely(!src || !dlen)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy((uint8_t *)data, src, dlen); + data += dlen; + + if (unlikely(dlen < to_copy)) { + remain = to_copy - dlen; + addr = desc->addr + dlen; + + while (remain) { + len = remain; + src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len, + VHOST_ACCESS_RO); + if (unlikely(!src || !len)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(data, src, len); + addr += len; + remain -= len; + data += len; + } + } + + left -= to_copy; + + while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { + desc = &vc_req->head[desc->next]; + rte_prefetch0(&vc_req->head[desc->next]); + to_copy = RTE_MIN(desc->len, (uint32_t)left); + dlen = desc->len; + src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RO); + if (unlikely(!src || !dlen)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(data, src, dlen); + data += dlen; + + if (unlikely(dlen < to_copy)) { + remain = to_copy - dlen; + addr = desc->addr + dlen; + + while (remain) { + len = remain; + src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len, + VHOST_ACCESS_RO); + if (unlikely(!src || !len)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(data, src, len); + addr += len; + remain -= len; + data += len; + } + } + + left -= to_copy; + } + + if (unlikely(left > 0)) { + VC_LOG_ERR("Incorrect virtio descriptor"); + return -1; + } + + *cur_desc = &vc_req->head[desc->next]; + + return 0; +} + +static __rte_always_inline void * +get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc **cur_desc, + uint32_t size, uint8_t perm) +{ + void *data; + uint64_t dlen = (*cur_desc)->len; + + data = IOVA_TO_VVA(void *, vc_req, (*cur_desc)->addr, &dlen, perm); + if (unlikely(!data || dlen != (*cur_desc)->len)) { + VC_LOG_ERR("Failed to map object"); + return NULL; + } + + if (unlikely(move_desc(vc_req->head, cur_desc, size) < 0)) + return NULL; + + return data; +} + +static int +write_back_data(struct rte_crypto_op *op, struct vhost_crypto_data_req *vc_req) +{ + struct rte_mbuf *mbuf = op->sym->m_dst; + struct vring_desc *head = vc_req->head; + struct vring_desc *desc = vc_req->wb_desc; + int left = vc_req->wb_len; + uint32_t to_write; + uint8_t *src_data = mbuf->buf_addr, *dst; + uint64_t dlen; + + rte_prefetch0(&head[desc->next]); + to_write = RTE_MIN(desc->len, (uint32_t)left); + dlen = desc->len; + dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RW); + if (unlikely(!dst || dlen != desc->len)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(dst, src_data, to_write); + left -= to_write; + src_data += to_write; + + while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { + desc = &head[desc->next]; + rte_prefetch0(&head[desc->next]); + to_write = RTE_MIN(desc->len, (uint32_t)left); + dlen = desc->len; + dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RW); + if (unlikely(!dst || dlen != desc->len)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(dst, src_data, to_write); + left -= to_write; + src_data += to_write; + } + + if (unlikely(left < 0)) { + VC_LOG_ERR("Incorrect virtio descriptor"); + return -1; + } + + return 0; +} + +static uint8_t +prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + struct vhost_crypto_data_req *vc_req, + struct virtio_crypto_cipher_data_req *cipher, + struct vring_desc *cur_desc) +{ + struct vring_desc *desc = cur_desc; + struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst; + uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); + uint8_t ret = 0; + + /* prepare */ + /* iv */ + if (unlikely(copy_data(iv_data, vc_req, &desc, + cipher->para.iv_len) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + m_src->data_len = cipher->para.src_data_len; + + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, + cipher->para.src_data_len); + m_src->buf_addr = get_data_ptr(vc_req, &desc, + cipher->para.src_data_len, VHOST_ACCESS_RO); + if (unlikely(m_src->buf_iova == 0 || + m_src->buf_addr == NULL)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + if (unlikely(cipher->para.src_data_len > + RTE_MBUF_DEFAULT_BUF_SIZE)) { + VC_LOG_ERR("Not enough space to do data copy"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), + vc_req, &desc, cipher->para.src_data_len) + < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + break; + default: + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + /* dst */ + desc = find_write_desc(vc_req->head, desc); + if (unlikely(!desc)) { + VC_LOG_ERR("Cannot find write location"); + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + m_dst->buf_iova = gpa_to_hpa(vcrypto->dev, + desc->addr, cipher->para.dst_data_len); + m_dst->buf_addr = get_data_ptr(vc_req, &desc, + cipher->para.dst_data_len, VHOST_ACCESS_RW); + if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + + m_dst->data_len = cipher->para.dst_data_len; + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + vc_req->wb_desc = desc; + vc_req->wb_len = cipher->para.dst_data_len; + if (unlikely(move_desc(vc_req->head, &desc, + vc_req->wb_len) < 0)) { + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; + default: + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + /* src data */ + op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + op->sess_type = RTE_CRYPTO_OP_WITH_SESSION; + + op->sym->cipher.data.offset = 0; + op->sym->cipher.data.length = cipher->para.src_data_len; + + vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO); + if (unlikely(vc_req->inhdr == NULL)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + vc_req->inhdr->status = VIRTIO_CRYPTO_OK; + vc_req->len = cipher->para.dst_data_len + INHDR_LEN; + + return 0; + +error_exit: + vc_req->len = INHDR_LEN; + return ret; +} + +static uint8_t +prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + struct vhost_crypto_data_req *vc_req, + struct virtio_crypto_alg_chain_data_req *chain, + struct vring_desc *cur_desc) +{ + struct vring_desc *desc = cur_desc; + struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst; + uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); + uint32_t digest_offset; + void *digest_addr; + uint8_t ret = 0; + + /* prepare */ + /* iv */ + if (unlikely(copy_data(iv_data, vc_req, &desc, + chain->para.iv_len) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + m_src->data_len = chain->para.src_data_len; + m_dst->data_len = chain->para.dst_data_len; + + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, + chain->para.src_data_len); + m_src->buf_addr = get_data_ptr(vc_req, &desc, + chain->para.src_data_len, VHOST_ACCESS_RO); + if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + if (unlikely(chain->para.src_data_len > + RTE_MBUF_DEFAULT_BUF_SIZE)) { + VC_LOG_ERR("Not enough space to do data copy"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), + vc_req, &desc, chain->para.src_data_len)) < 0) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + break; + default: + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + /* dst */ + desc = find_write_desc(vc_req->head, desc); + if (unlikely(!desc)) { + VC_LOG_ERR("Cannot find write location"); + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + m_dst->buf_iova = gpa_to_hpa(vcrypto->dev, + desc->addr, chain->para.dst_data_len); + m_dst->buf_addr = get_data_ptr(vc_req, &desc, + chain->para.dst_data_len, VHOST_ACCESS_RW); + if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + + op->sym->auth.digest.phys_addr = gpa_to_hpa(vcrypto->dev, + desc->addr, chain->para.hash_result_len); + op->sym->auth.digest.data = get_data_ptr(vc_req, &desc, + chain->para.hash_result_len, VHOST_ACCESS_RW); + if (unlikely(op->sym->auth.digest.phys_addr == 0)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + digest_offset = m_dst->data_len; + digest_addr = rte_pktmbuf_mtod_offset(m_dst, void *, + digest_offset); + + vc_req->wb_desc = desc; + vc_req->wb_len = m_dst->data_len + chain->para.hash_result_len; + + if (unlikely(move_desc(vc_req->head, &desc, + chain->para.dst_data_len) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + if (unlikely(copy_data(digest_addr, vc_req, &desc, + chain->para.hash_result_len)) < 0) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + op->sym->auth.digest.data = digest_addr; + op->sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(m_dst, + digest_offset); + break; + default: + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + /* record inhdr */ + vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO); + if (unlikely(vc_req->inhdr == NULL)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + vc_req->inhdr->status = VIRTIO_CRYPTO_OK; + + op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + op->sess_type = RTE_CRYPTO_OP_WITH_SESSION; + + op->sym->cipher.data.offset = chain->para.cipher_start_src_offset; + op->sym->cipher.data.length = chain->para.src_data_len - + chain->para.cipher_start_src_offset; + + op->sym->auth.data.offset = chain->para.hash_start_src_offset; + op->sym->auth.data.length = chain->para.len_to_hash; + + vc_req->len = chain->para.dst_data_len + chain->para.hash_result_len + + INHDR_LEN; + return 0; + +error_exit: + vc_req->len = INHDR_LEN; + return ret; +} + +/** + * Process on descriptor + */ +static __rte_always_inline int +vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, + struct vhost_virtqueue *vq, struct rte_crypto_op *op, + struct vring_desc *head, uint16_t desc_idx) +{ + struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(op->sym->m_src); + struct rte_cryptodev_sym_session *session; + struct virtio_crypto_op_data_req *req, tmp_req; + struct virtio_crypto_inhdr *inhdr; + struct vring_desc *desc = NULL; + uint64_t session_id; + uint64_t dlen; + int err = 0; + + vc_req->desc_idx = desc_idx; + vc_req->dev = vcrypto->dev; + vc_req->vq = vq; + + if (likely(head->flags & VRING_DESC_F_INDIRECT)) { + dlen = head->len; + desc = IOVA_TO_VVA(struct vring_desc *, vc_req, head->addr, + &dlen, VHOST_ACCESS_RO); + if (unlikely(!desc || dlen != head->len)) + return -1; + desc_idx = 0; + head = desc; + } else { + desc = head; + } + + vc_req->head = head; + vc_req->zero_copy = vcrypto->option; + + req = get_data_ptr(vc_req, &desc, sizeof(*req), VHOST_ACCESS_RO); + if (unlikely(req == NULL)) { + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + err = VIRTIO_CRYPTO_BADMSG; + VC_LOG_ERR("Invalid descriptor"); + goto error_exit; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + req = &tmp_req; + if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req)) + < 0)) { + err = VIRTIO_CRYPTO_BADMSG; + VC_LOG_ERR("Invalid descriptor"); + goto error_exit; + } + break; + default: + err = VIRTIO_CRYPTO_ERR; + VC_LOG_ERR("Invalid option"); + goto error_exit; + } + } + + switch (req->header.opcode) { + case VIRTIO_CRYPTO_CIPHER_ENCRYPT: + case VIRTIO_CRYPTO_CIPHER_DECRYPT: + session_id = req->header.session_id; + + /* one branch to avoid unnecessary table lookup */ + if (vcrypto->cache_session_id != session_id) { + err = rte_hash_lookup_data(vcrypto->session_map, + &session_id, (void **)&session); + if (unlikely(err < 0)) { + err = VIRTIO_CRYPTO_ERR; + VC_LOG_ERR("Failed to find session %"PRIu64, + session_id); + goto error_exit; + } + + vcrypto->cache_session = session; + vcrypto->cache_session_id = session_id; + } + + session = vcrypto->cache_session; + + err = rte_crypto_op_attach_sym_session(op, session); + if (unlikely(err < 0)) { + err = VIRTIO_CRYPTO_ERR; + VC_LOG_ERR("Failed to attach session to op"); + goto error_exit; + } + + switch (req->u.sym_req.op_type) { + case VIRTIO_CRYPTO_SYM_OP_NONE: + err = VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_SYM_OP_CIPHER: + err = prepare_sym_cipher_op(vcrypto, op, vc_req, + &req->u.sym_req.u.cipher, desc); + break; + case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING: + err = prepare_sym_chain_op(vcrypto, op, vc_req, + &req->u.sym_req.u.chain, desc); + break; + } + if (unlikely(err != 0)) { + VC_LOG_ERR("Failed to process sym request"); + goto error_exit; + } + break; + default: + VC_LOG_ERR("Unsupported symmetric crypto request type %u", + req->header.opcode); + goto error_exit; + } + + return 0; + +error_exit: + + inhdr = reach_inhdr(vc_req, desc); + if (likely(inhdr != NULL)) + inhdr->status = (uint8_t)err; + + return -1; +} + +static __rte_always_inline struct vhost_virtqueue * +vhost_crypto_finalize_one_request(struct rte_crypto_op *op, + struct vhost_virtqueue *old_vq) +{ + struct rte_mbuf *m_src = op->sym->m_src; + struct rte_mbuf *m_dst = op->sym->m_dst; + struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(m_src); + uint16_t desc_idx; + int ret = 0; + + if (unlikely(!vc_req)) { + VC_LOG_ERR("Failed to retrieve vc_req"); + return NULL; + } + + if (old_vq && (vc_req->vq != old_vq)) + return vc_req->vq; + + desc_idx = vc_req->desc_idx; + + if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) + vc_req->inhdr->status = VIRTIO_CRYPTO_ERR; + else { + if (vc_req->zero_copy == 0) { + ret = write_back_data(op, vc_req); + if (unlikely(ret != 0)) + vc_req->inhdr->status = VIRTIO_CRYPTO_ERR; + } + } + + vc_req->vq->used->ring[desc_idx].id = desc_idx; + vc_req->vq->used->ring[desc_idx].len = vc_req->len; + + rte_mempool_put(m_dst->pool, (void *)m_dst); + rte_mempool_put(m_src->pool, (void *)m_src); + + return vc_req->vq; +} + +static __rte_always_inline uint16_t +vhost_crypto_complete_one_vm_requests(struct rte_crypto_op **ops, + uint16_t nb_ops, int *callfd) +{ + uint16_t processed = 1; + struct vhost_virtqueue *vq, *tmp_vq; + + if (unlikely(nb_ops == 0)) + return 0; + + vq = vhost_crypto_finalize_one_request(ops[0], NULL); + if (unlikely(vq == NULL)) + return 0; + tmp_vq = vq; + + while ((processed < nb_ops)) { + tmp_vq = vhost_crypto_finalize_one_request(ops[processed], + tmp_vq); + + if (unlikely(vq != tmp_vq)) + break; + + processed++; + } + + *callfd = vq->callfd; + + *(volatile uint16_t *)&vq->used->idx += processed; + + return processed; +} + +int __rte_experimental +rte_vhost_crypto_create(int vid, uint8_t cryptodev_id, + struct rte_mempool *sess_pool, int socket_id) +{ + struct virtio_net *dev = get_device(vid); + struct rte_hash_parameters params = {0}; + struct vhost_crypto *vcrypto; + char name[128]; + int ret; + + if (!dev) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + ret = rte_vhost_driver_set_features(dev->ifname, + VIRTIO_CRYPTO_FEATURES); + if (ret < 0) { + VC_LOG_ERR("Error setting features"); + return -1; + } + + vcrypto = rte_zmalloc_socket(NULL, sizeof(*vcrypto), + RTE_CACHE_LINE_SIZE, socket_id); + if (!vcrypto) { + VC_LOG_ERR("Insufficient memory"); + return -ENOMEM; + } + + vcrypto->sess_pool = sess_pool; + vcrypto->cid = cryptodev_id; + vcrypto->cache_session_id = UINT64_MAX; + vcrypto->last_session_id = 1; + vcrypto->dev = dev; + vcrypto->option = RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE; + + snprintf(name, 127, "HASH_VHOST_CRYPT_%u", (uint32_t)vid); + params.name = name; + params.entries = VHOST_CRYPTO_SESSION_MAP_ENTRIES; + params.hash_func = rte_jhash; + params.key_len = sizeof(uint64_t); + params.socket_id = socket_id; + vcrypto->session_map = rte_hash_create(¶ms); + if (!vcrypto->session_map) { + VC_LOG_ERR("Failed to creath session map"); + ret = -ENOMEM; + goto error_exit; + } + + snprintf(name, 127, "MBUF_POOL_VM_%u", (uint32_t)vid); + vcrypto->mbuf_pool = rte_pktmbuf_pool_create(name, + VHOST_CRYPTO_MBUF_POOL_SIZE, 512, + sizeof(struct vhost_crypto_data_req), + RTE_MBUF_DEFAULT_DATAROOM * 2 + RTE_PKTMBUF_HEADROOM, + rte_socket_id()); + if (!vcrypto->mbuf_pool) { + VC_LOG_ERR("Failed to creath mbuf pool"); + ret = -ENOMEM; + goto error_exit; + } + + dev->extern_data = vcrypto; + dev->extern_ops.pre_msg_handle = NULL; + dev->extern_ops.post_msg_handle = vhost_crypto_msg_post_handler; + + return 0; + +error_exit: + if (vcrypto->session_map) + rte_hash_free(vcrypto->session_map); + if (vcrypto->mbuf_pool) + rte_mempool_free(vcrypto->mbuf_pool); + + rte_free(vcrypto); + + return ret; +} + +int __rte_experimental +rte_vhost_crypto_free(int vid) +{ + struct virtio_net *dev = get_device(vid); + struct vhost_crypto *vcrypto; + + if (unlikely(dev == NULL)) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + vcrypto = dev->extern_data; + if (unlikely(vcrypto == NULL)) { + VC_LOG_ERR("Cannot find required data, is it initialized?"); + return -ENOENT; + } + + rte_hash_free(vcrypto->session_map); + rte_mempool_free(vcrypto->mbuf_pool); + rte_free(vcrypto); + + dev->extern_data = NULL; + dev->extern_ops.pre_msg_handle = NULL; + dev->extern_ops.post_msg_handle = NULL; + + return 0; +} + +int __rte_experimental +rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option) +{ + struct virtio_net *dev = get_device(vid); + struct vhost_crypto *vcrypto; + + if (unlikely(dev == NULL)) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + if (unlikely((uint32_t)option >= + RTE_VHOST_CRYPTO_MAX_ZERO_COPY_OPTIONS)) { + VC_LOG_ERR("Invalid option %i", option); + return -EINVAL; + } + + vcrypto = (struct vhost_crypto *)dev->extern_data; + if (unlikely(vcrypto == NULL)) { + VC_LOG_ERR("Cannot find required data, is it initialized?"); + return -ENOENT; + } + + if (vcrypto->option == (uint8_t)option) + return 0; + + if (!(rte_mempool_full(vcrypto->mbuf_pool))) { + VC_LOG_ERR("Cannot update zero copy as mempool is not full"); + return -EINVAL; + } + + vcrypto->option = (uint8_t)option; + + return 0; +} + +uint16_t __rte_experimental +rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + struct rte_crypto_op **ops, uint16_t nb_ops) +{ + struct rte_mbuf *mbufs[VHOST_CRYPTO_MAX_BURST_SIZE * 2]; + struct virtio_net *dev = get_device(vid); + struct vhost_crypto *vcrypto; + struct vhost_virtqueue *vq; + uint16_t avail_idx; + uint16_t start_idx; + uint16_t required; + uint16_t count; + uint16_t i; + + if (unlikely(dev == NULL)) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + if (unlikely(qid >= VHOST_MAX_QUEUE_PAIRS)) { + VC_LOG_ERR("Invalid qid %u", qid); + return -EINVAL; + } + + vcrypto = (struct vhost_crypto *)dev->extern_data; + if (unlikely(vcrypto == NULL)) { + VC_LOG_ERR("Cannot find required data, is it initialized?"); + return -ENOENT; + } + + vq = dev->virtqueue[qid]; + + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + start_idx = vq->last_used_idx; + count = avail_idx - start_idx; + count = RTE_MIN(count, VHOST_CRYPTO_MAX_BURST_SIZE); + count = RTE_MIN(count, nb_ops); + + if (unlikely(count == 0)) + return 0; + + /* for zero copy, we need 2 empty mbufs for src and dst, otherwise + * we need only 1 mbuf as src and dst + */ + required = count * 2; + if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, (void **)mbufs, + required) < 0)) { + VC_LOG_ERR("Insufficient memory"); + return -ENOMEM; + } + + for (i = 0; i < count; i++) { + uint16_t used_idx = (start_idx + i) & (vq->size - 1); + uint16_t desc_idx = vq->avail->ring[used_idx]; + struct vring_desc *head = &vq->desc[desc_idx]; + struct rte_crypto_op *op = ops[i]; + + op->sym->m_src = mbufs[i * 2]; + op->sym->m_dst = mbufs[i * 2 + 1]; + op->sym->m_src->data_off = 0; + op->sym->m_dst->data_off = 0; + + if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, op, head, + desc_idx)) < 0) + break; + } + + vq->last_used_idx += i; + + return i; +} + +uint16_t __rte_experimental +rte_vhost_crypto_finalize_requests(struct rte_crypto_op **ops, + uint16_t nb_ops, int *callfds, uint16_t *nb_callfds) +{ + struct rte_crypto_op **tmp_ops = ops; + uint16_t count = 0, left = nb_ops; + int callfd; + uint16_t idx = 0; + + while (left) { + count = vhost_crypto_complete_one_vm_requests(tmp_ops, left, + &callfd); + if (unlikely(count == 0)) + break; + + tmp_ops = &tmp_ops[count]; + left -= count; + + callfds[idx++] = callfd; + + if (unlikely(idx >= VIRTIO_CRYPTO_MAX_NUM_BURST_VQS)) { + VC_LOG_ERR("Too many vqs"); + break; + } + } + + *nb_callfds = idx; + + return nb_ops - left; +} diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 90ed2112..a2d4c9ff 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -1,5 +1,22 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2016 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation + */ + +/* Security model + * -------------- + * The vhost-user protocol connection is an external interface, so it must be + * robust against invalid inputs. + * + * This is important because the vhost-user master is only one step removed + * from the guest. Malicious guests that have escaped will then launch further + * attacks from the vhost-user master. + * + * Even in deployments where guests are trusted, a bug in the vhost-user master + * can still cause invalid messages to be sent. Such messages must not + * compromise the stability of the DPDK application by causing crashes, memory + * corruption, or other problematic behavior. + * + * Do not assume received VhostUserMsg fields contain sensible values! */ #include @@ -50,6 +67,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NET_SET_MTU] = "VHOST_USER_NET_SET_MTU", [VHOST_USER_SET_SLAVE_REQ_FD] = "VHOST_USER_SET_SLAVE_REQ_FD", [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG", + [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS", + [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS", }; static uint64_t @@ -116,10 +135,7 @@ vhost_user_set_owner(void) static int vhost_user_reset_owner(struct virtio_net *dev) { - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } + vhost_destroy_device_notify(dev); cleanup_device(dev, 0); reset_device(dev); @@ -138,6 +154,18 @@ vhost_user_get_features(struct virtio_net *dev) return features; } +/* + * The queue number that we support are requested. + */ +static uint32_t +vhost_user_get_queue_num(struct virtio_net *dev) +{ + uint32_t queue_num = 0; + + rte_vhost_driver_get_queue_num(dev->ifname, &queue_num); + return queue_num; +} + /* * We receive the negotiated features supported by us and the virtio device. */ @@ -145,6 +173,8 @@ static int vhost_user_set_features(struct virtio_net *dev, uint64_t features) { uint64_t vhost_features = 0; + struct rte_vdpa_device *vdpa_dev; + int did = -1; rte_vhost_driver_get_features(dev->ifname, &vhost_features); if (features & ~vhost_features) { @@ -181,7 +211,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) } else { dev->vhost_hlen = sizeof(struct virtio_net_hdr); } - LOG_DEBUG(VHOST_CONFIG, + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mergeable RX buffers %s, virtio 1 %s\n", dev->vid, (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", @@ -203,10 +233,15 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) dev->virtqueue[dev->nr_vring] = NULL; cleanup_vq(vq, 1); - free_vq(vq); + free_vq(dev, vq); } } + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->set_features) + vdpa_dev->ops->set_features(dev->vid); + return 0; } @@ -221,6 +256,17 @@ vhost_user_set_vring_num(struct virtio_net *dev, vq->size = msg->payload.state.num; + /* VIRTIO 1.0, 2.4 Virtqueues says: + * + * Queue Size value is always a power of 2. The maximum Queue Size + * value is 32768. + */ + if ((vq->size & (vq->size - 1)) || vq->size > 32768) { + RTE_LOG(ERR, VHOST_CONFIG, + "invalid virtqueue size %u\n", vq->size); + return -1; + } + if (dev->dequeue_zero_copy) { vq->nr_zmbuf = 0; vq->last_zmbuf_idx = 0; @@ -236,13 +282,26 @@ vhost_user_set_vring_num(struct virtio_net *dev, TAILQ_INIT(&vq->zmbuf_list); } - vq->shadow_used_ring = rte_malloc(NULL, + if (vq_is_packed(dev)) { + vq->shadow_used_packed = rte_malloc(NULL, + vq->size * + sizeof(struct vring_used_elem_packed), + RTE_CACHE_LINE_SIZE); + if (!vq->shadow_used_packed) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to allocate memory for shadow used ring.\n"); + return -1; + } + + } else { + vq->shadow_used_split = rte_malloc(NULL, vq->size * sizeof(struct vring_used_elem), RTE_CACHE_LINE_SIZE); - if (!vq->shadow_used_ring) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to allocate memory for shadow used ring.\n"); - return -1; + if (!vq->shadow_used_split) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to allocate memory for shadow used ring.\n"); + return -1; + } } vq->batch_copy_elems = rte_malloc(NULL, @@ -269,7 +328,8 @@ numa_realloc(struct virtio_net *dev, int index) struct virtio_net *old_dev; struct vhost_virtqueue *old_vq, *vq; struct zcopy_mbuf *new_zmbuf; - struct vring_used_elem *new_shadow_used_ring; + struct vring_used_elem *new_shadow_used_split; + struct vring_used_elem_packed *new_shadow_used_packed; struct batch_copy_elem *new_batch_copy_elems; int ret; @@ -304,13 +364,26 @@ numa_realloc(struct virtio_net *dev, int index) vq->zmbufs = new_zmbuf; } - new_shadow_used_ring = rte_malloc_socket(NULL, - vq->size * sizeof(struct vring_used_elem), - RTE_CACHE_LINE_SIZE, - newnode); - if (new_shadow_used_ring) { - rte_free(vq->shadow_used_ring); - vq->shadow_used_ring = new_shadow_used_ring; + if (vq_is_packed(dev)) { + new_shadow_used_packed = rte_malloc_socket(NULL, + vq->size * + sizeof(struct vring_used_elem_packed), + RTE_CACHE_LINE_SIZE, + newnode); + if (new_shadow_used_packed) { + rte_free(vq->shadow_used_packed); + vq->shadow_used_packed = new_shadow_used_packed; + } + } else { + new_shadow_used_split = rte_malloc_socket(NULL, + vq->size * + sizeof(struct vring_used_elem), + RTE_CACHE_LINE_SIZE, + newnode); + if (new_shadow_used_split) { + rte_free(vq->shadow_used_split); + vq->shadow_used_split = new_shadow_used_split; + } } new_batch_copy_elems = rte_malloc_socket(NULL, @@ -366,21 +439,26 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused) /* Converts QEMU virtual address to Vhost virtual address. */ static uint64_t -qva_to_vva(struct virtio_net *dev, uint64_t qva) +qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len) { - struct rte_vhost_mem_region *reg; + struct rte_vhost_mem_region *r; uint32_t i; /* Find the region where the address lives. */ for (i = 0; i < dev->mem->nregions; i++) { - reg = &dev->mem->regions[i]; + r = &dev->mem->regions[i]; + + if (qva >= r->guest_user_addr && + qva < r->guest_user_addr + r->size) { - if (qva >= reg->guest_user_addr && - qva < reg->guest_user_addr + reg->size) { - return qva - reg->guest_user_addr + - reg->host_user_addr; + if (unlikely(*len > r->guest_user_addr + r->size - qva)) + *len = r->guest_user_addr + r->size - qva; + + return qva - r->guest_user_addr + + r->host_user_addr; } } + *len = 0; return 0; } @@ -393,20 +471,20 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva) */ static uint64_t ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint64_t ra, uint64_t size) + uint64_t ra, uint64_t *size) { if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) { uint64_t vva; vva = vhost_user_iotlb_cache_find(vq, ra, - &size, VHOST_ACCESS_RW); + size, VHOST_ACCESS_RW); if (!vva) vhost_user_iotlb_miss(dev, ra, VHOST_ACCESS_RW); return vva; } - return qva_to_vva(dev, ra); + return qva_to_vva(dev, ra, size); } static struct virtio_net * @@ -414,16 +492,63 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) { struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; struct vhost_vring_addr *addr = &vq->ring_addrs; + uint64_t len; + + if (vq_is_packed(dev)) { + len = sizeof(struct vring_packed_desc) * vq->size; + vq->desc_packed = (struct vring_packed_desc *)(uintptr_t) + ring_addr_to_vva(dev, vq, addr->desc_user_addr, &len); + vq->log_guest_addr = 0; + if (vq->desc_packed == NULL || + len != sizeof(struct vring_packed_desc) * + vq->size) { + RTE_LOG(DEBUG, VHOST_CONFIG, + "(%d) failed to map desc_packed ring.\n", + dev->vid); + return dev; + } + + dev = numa_realloc(dev, vq_index); + vq = dev->virtqueue[vq_index]; + addr = &vq->ring_addrs; + + len = sizeof(struct vring_packed_desc_event); + vq->driver_event = (struct vring_packed_desc_event *) + (uintptr_t)ring_addr_to_vva(dev, + vq, addr->avail_user_addr, &len); + if (vq->driver_event == NULL || + len != sizeof(struct vring_packed_desc_event)) { + RTE_LOG(DEBUG, VHOST_CONFIG, + "(%d) failed to find driver area address.\n", + dev->vid); + return dev; + } + + len = sizeof(struct vring_packed_desc_event); + vq->device_event = (struct vring_packed_desc_event *) + (uintptr_t)ring_addr_to_vva(dev, + vq, addr->used_user_addr, &len); + if (vq->device_event == NULL || + len != sizeof(struct vring_packed_desc_event)) { + RTE_LOG(DEBUG, VHOST_CONFIG, + "(%d) failed to find device area address.\n", + dev->vid); + return dev; + } + + return dev; + } /* The addresses are converted from QEMU virtual to Vhost virtual. */ if (vq->desc && vq->avail && vq->used) return dev; + len = sizeof(struct vring_desc) * vq->size; vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev, - vq, addr->desc_user_addr, sizeof(struct vring_desc)); - if (vq->desc == 0) { + vq, addr->desc_user_addr, &len); + if (vq->desc == 0 || len != sizeof(struct vring_desc) * vq->size) { RTE_LOG(DEBUG, VHOST_CONFIG, - "(%d) failed to find desc ring address.\n", + "(%d) failed to map desc ring.\n", dev->vid); return dev; } @@ -432,20 +557,26 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) vq = dev->virtqueue[vq_index]; addr = &vq->ring_addrs; + len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size; vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev, - vq, addr->avail_user_addr, sizeof(struct vring_avail)); - if (vq->avail == 0) { + vq, addr->avail_user_addr, &len); + if (vq->avail == 0 || + len != sizeof(struct vring_avail) + + sizeof(uint16_t) * vq->size) { RTE_LOG(DEBUG, VHOST_CONFIG, - "(%d) failed to find avail ring address.\n", + "(%d) failed to map avail ring.\n", dev->vid); return dev; } + len = sizeof(struct vring_used) + + sizeof(struct vring_used_elem) * vq->size; vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev, - vq, addr->used_user_addr, sizeof(struct vring_used)); - if (vq->used == 0) { + vq, addr->used_user_addr, &len); + if (vq->used == 0 || len != sizeof(struct vring_used) + + sizeof(struct vring_used_elem) * vq->size) { RTE_LOG(DEBUG, VHOST_CONFIG, - "(%d) failed to find used ring address.\n", + "(%d) failed to map used ring.\n", dev->vid); return dev; } @@ -461,13 +592,13 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) vq->log_guest_addr = addr->log_guest_addr; - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", dev->vid, vq->desc); - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", dev->vid, vq->avail); - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", dev->vid, vq->used); - LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", dev->vid, vq->log_guest_addr); return dev; @@ -500,7 +631,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg) if (vq->enabled && (dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) { - dev = translate_ring_addresses(dev, msg->payload.state.index); + dev = translate_ring_addresses(dev, msg->payload.addr.index); if (!dev) return -1; @@ -525,7 +656,7 @@ vhost_user_set_vring_base(struct virtio_net *dev, return 0; } -static void +static int add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, uint64_t host_phys_addr, uint64_t size) { @@ -535,6 +666,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, dev->max_guest_pages *= 2; dev->guest_pages = realloc(dev->guest_pages, dev->max_guest_pages * sizeof(*page)); + if (!dev->guest_pages) { + RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n"); + return -1; + } } if (dev->nr_guest_pages > 0) { @@ -543,7 +678,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, if (host_phys_addr == last_page->host_phys_addr + last_page->size) { last_page->size += size; - return; + return 0; } } @@ -551,9 +686,11 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, page->guest_phys_addr = guest_phys_addr; page->host_phys_addr = host_phys_addr; page->size = size; + + return 0; } -static void +static int add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, uint64_t page_size) { @@ -567,7 +704,9 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, size = page_size - (guest_phys_addr & (page_size - 1)); size = RTE_MIN(size, reg_size); - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0) + return -1; + host_user_addr += size; guest_phys_addr += size; reg_size -= size; @@ -576,12 +715,16 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, size = RTE_MIN(reg_size, page_size); host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t) host_user_addr); - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, + size) < 0) + return -1; host_user_addr += size; guest_phys_addr += size; reg_size -= size; } + + return 0; } #ifdef RTE_LIBRTE_VHOST_DEBUG @@ -635,8 +778,9 @@ vhost_memory_changed(struct VhostUserMemory *new, } static int -vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) +vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg) { + struct virtio_net *dev = *pdev; struct VhostUserMemory memory = pmsg->payload.memory; struct rte_vhost_mem_region *reg; void *mmap_addr; @@ -644,8 +788,15 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) uint64_t mmap_offset; uint64_t alignment; uint32_t i; + int populate; int fd; + if (memory.nregions > VHOST_MEMORY_MAX_NREGIONS) { + RTE_LOG(ERR, VHOST_CONFIG, + "too many memory regions (%u)\n", memory.nregions); + return -1; + } + if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) { RTE_LOG(INFO, VHOST_CONFIG, "(%d) memory regions not changed\n", dev->vid); @@ -662,6 +813,11 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) dev->mem = NULL; } + /* Flush IOTLB cache as previous HVAs are now invalid */ + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + for (i = 0; i < dev->nr_vring; i++) + vhost_user_iotlb_flush_all(dev->virtqueue[i]); + dev->nr_guest_pages = 0; if (!dev->guest_pages) { dev->max_guest_pages = 8; @@ -696,7 +852,17 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) reg->fd = fd; mmap_offset = memory.regions[i].mmap_offset; - mmap_size = reg->size + mmap_offset; + + /* Check for memory_size + mmap_offset overflow */ + if (mmap_offset >= -reg->size) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap_offset (%#"PRIx64") and memory_size " + "(%#"PRIx64") overflow\n", + mmap_offset, reg->size); + goto err_mmap; + } + + mmap_size = reg->size + mmap_offset; /* mmap() without flag of MAP_ANONYMOUS, should be called * with length argument aligned with hugepagesz at older @@ -714,8 +880,9 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) } mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); + populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0; mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, 0); + MAP_SHARED | populate, fd, 0); if (mmap_addr == MAP_FAILED) { RTE_LOG(ERR, VHOST_CONFIG, @@ -729,7 +896,12 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) mmap_offset; if (dev->dequeue_zero_copy) - add_guest_pages(dev, reg, alignment); + if (add_guest_pages(dev, reg, alignment) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "adding guest pages to region %u failed.\n", + i); + goto err_mmap; + } RTE_LOG(INFO, VHOST_CONFIG, "guest memory region %u, size: 0x%" PRIx64 "\n" @@ -750,6 +922,25 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) mmap_offset); } + for (i = 0; i < dev->nr_vring; i++) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + if (vq->desc || vq->avail || vq->used) { + /* + * If the memory table got updated, the ring addresses + * need to be translated again as virtual addresses have + * changed. + */ + vring_invalidate(dev, vq); + + dev = translate_ring_addresses(dev, i); + if (!dev) + return -1; + + *pdev = dev; + } + } + dump_guest_pages(dev); return 0; @@ -761,10 +952,20 @@ err_mmap: return -1; } -static int -vq_is_ready(struct vhost_virtqueue *vq) +static bool +vq_is_ready(struct virtio_net *dev, struct vhost_virtqueue *vq) { - return vq && vq->desc && vq->avail && vq->used && + bool rings_ok; + + if (!vq) + return false; + + if (vq_is_packed(dev)) + rings_ok = !!vq->desc_packed; + else + rings_ok = vq->desc && vq->avail && vq->used; + + return rings_ok && vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD && vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD; } @@ -781,7 +982,7 @@ virtio_is_ready(struct virtio_net *dev) for (i = 0; i < dev->nr_vring; i++) { vq = dev->virtqueue[i]; - if (!vq_is_ready(vq)) + if (!vq_is_ready(dev, vq)) return 0; } @@ -874,15 +1075,13 @@ vhost_user_get_vring_base(struct virtio_net *dev, struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; /* We have to stop the queue (virtio) if it is running. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } + vhost_destroy_device_notify(dev); dev->flags &= ~VIRTIO_DEV_READY; + dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED; - /* Here we are safe to get the last used index */ - msg->payload.state.num = vq->last_used_idx; + /* Here we are safe to get the last avail index */ + msg->payload.state.num = vq->last_avail_idx; RTE_LOG(INFO, VHOST_CONFIG, "vring base idx:%d file:%d\n", msg->payload.state.index, @@ -897,10 +1096,20 @@ vhost_user_get_vring_base(struct virtio_net *dev, vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; + if (vq->callfd >= 0) + close(vq->callfd); + + vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; + if (dev->dequeue_zero_copy) free_zmbufs(vq); - rte_free(vq->shadow_used_ring); - vq->shadow_used_ring = NULL; + if (vq_is_packed(dev)) { + rte_free(vq->shadow_used_packed); + vq->shadow_used_packed = NULL; + } else { + rte_free(vq->shadow_used_split); + vq->shadow_used_split = NULL; + } rte_free(vq->batch_copy_elems); vq->batch_copy_elems = NULL; @@ -917,16 +1126,24 @@ vhost_user_set_vring_enable(struct virtio_net *dev, VhostUserMsg *msg) { int enable = (int)msg->payload.state.num; + int index = (int)msg->payload.state.index; + struct rte_vdpa_device *vdpa_dev; + int did = -1; RTE_LOG(INFO, VHOST_CONFIG, "set queue enable: %d to qp idx: %d\n", - enable, msg->payload.state.index); + enable, index); + + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->set_vring_state) + vdpa_dev->ops->set_vring_state(dev->vid, index, enable); if (dev->notify_ops->vring_state_changed) dev->notify_ops->vring_state_changed(dev->vid, - msg->payload.state.index, enable); + index, enable); - dev->virtqueue[msg->payload.state.index]->enabled = enable; + dev->virtqueue[index]->enabled = enable; return 0; } @@ -935,9 +1152,10 @@ static void vhost_user_get_protocol_features(struct virtio_net *dev, struct VhostUserMsg *msg) { - uint64_t features, protocol_features = VHOST_USER_PROTOCOL_FEATURES; + uint64_t features, protocol_features; rte_vhost_driver_get_features(dev->ifname, &features); + rte_vhost_driver_get_protocol_features(dev->ifname, &protocol_features); /* * REPLY_ACK protocol feature is only mandatory for now @@ -983,6 +1201,15 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg) size = msg->payload.log.mmap_size; off = msg->payload.log.mmap_offset; + + /* Don't allow mmap_offset to point outside the mmap region */ + if (off > size) { + RTE_LOG(ERR, VHOST_CONFIG, + "log offset %#"PRIx64" exceeds log size %#"PRIx64"\n", + off, size); + return -1; + } + RTE_LOG(INFO, VHOST_CONFIG, "log mmap size: %"PRId64", offset: %"PRId64"\n", size, off); @@ -991,7 +1218,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg) * mmap from 0 to workaround a hugepage mmap bug: mmap will * fail when offset is not page size aligned. */ - addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); if (addr == MAP_FAILED) { RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); @@ -1024,6 +1251,8 @@ static int vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg) { uint8_t *mac = (uint8_t *)&msg->payload.u64; + struct rte_vdpa_device *vdpa_dev; + int did = -1; RTE_LOG(DEBUG, VHOST_CONFIG, ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", @@ -1039,6 +1268,10 @@ vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg) */ rte_smp_wmb(); rte_atomic16_set(&dev->broadcast_rarp, 1); + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->migration_done) + vdpa_dev->ops->migration_done(dev->vid); return 0; } @@ -1131,11 +1364,12 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg) struct virtio_net *dev = *pdev; struct vhost_iotlb_msg *imsg = &msg->payload.iotlb; uint16_t i; - uint64_t vva; + uint64_t vva, len; switch (imsg->type) { case VHOST_IOTLB_UPDATE: - vva = qva_to_vva(dev, imsg->uaddr); + len = imsg->size; + vva = qva_to_vva(dev, imsg->uaddr, &len); if (!vva) return -1; @@ -1143,7 +1377,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg) struct vhost_virtqueue *vq = dev->virtqueue[i]; vhost_user_iotlb_cache_insert(vq, imsg->iova, vva, - imsg->size, imsg->perm); + len, imsg->perm); if (is_vring_iotlb_update(vq, imsg)) *pdev = dev = translate_ring_addresses(dev, i); @@ -1200,13 +1434,13 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg) } static int -send_vhost_message(int sockfd, struct VhostUserMsg *msg) +send_vhost_message(int sockfd, struct VhostUserMsg *msg, int *fds, int fd_num) { if (!msg) return 0; return send_fd_message(sockfd, (char *)msg, - VHOST_USER_HDR_SIZE + msg->size, NULL, 0); + VHOST_USER_HDR_SIZE + msg->size, fds, fd_num); } static int @@ -1220,7 +1454,23 @@ send_vhost_reply(int sockfd, struct VhostUserMsg *msg) msg->flags |= VHOST_USER_VERSION; msg->flags |= VHOST_USER_REPLY_MASK; - return send_vhost_message(sockfd, msg); + return send_vhost_message(sockfd, msg, NULL, 0); +} + +static int +send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg, + int *fds, int fd_num) +{ + int ret; + + if (msg->flags & VHOST_USER_NEED_REPLY) + rte_spinlock_lock(&dev->slave_req_lock); + + ret = send_vhost_message(dev->slave_req_fd, msg, fds, fd_num); + if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY)) + rte_spinlock_unlock(&dev->slave_req_lock); + + return ret; } /* @@ -1300,8 +1550,11 @@ vhost_user_msg_handler(int vid, int fd) { struct virtio_net *dev; struct VhostUserMsg msg; + struct rte_vdpa_device *vdpa_dev; + int did = -1; int ret; int unlock_required = 0; + uint32_t skip_master = 0; dev = get_device(vid); if (dev == NULL) @@ -1379,6 +1632,21 @@ vhost_user_msg_handler(int vid, int fd) } + if (dev->extern_ops.pre_msg_handle) { + uint32_t need_reply; + + ret = (*dev->extern_ops.pre_msg_handle)(dev->vid, + (void *)&msg, &need_reply, &skip_master); + if (ret < 0) + goto skip_to_reply; + + if (need_reply) + send_vhost_reply(fd, &msg); + + if (skip_master) + goto skip_to_post_handle; + } + switch (msg.request.master) { case VHOST_USER_GET_FEATURES: msg.payload.u64 = vhost_user_get_features(dev); @@ -1407,7 +1675,7 @@ vhost_user_msg_handler(int vid, int fd) break; case VHOST_USER_SET_MEM_TABLE: - ret = vhost_user_set_mem_table(dev, &msg); + ret = vhost_user_set_mem_table(&dev, &msg); break; case VHOST_USER_SET_LOG_BASE: @@ -1452,7 +1720,7 @@ vhost_user_msg_handler(int vid, int fd) break; case VHOST_USER_GET_QUEUE_NUM: - msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS; + msg.payload.u64 = (uint64_t)vhost_user_get_queue_num(dev); msg.size = sizeof(msg.payload.u64); send_vhost_reply(fd, &msg); break; @@ -1479,9 +1747,22 @@ vhost_user_msg_handler(int vid, int fd) default: ret = -1; break; + } + +skip_to_post_handle: + if (dev->extern_ops.post_msg_handle) { + uint32_t need_reply; + ret = (*dev->extern_ops.post_msg_handle)( + dev->vid, (void *)&msg, &need_reply); + if (ret < 0) + goto skip_to_reply; + + if (need_reply) + send_vhost_reply(fd, &msg); } +skip_to_reply: if (unlock_required) vhost_user_unlock_all_queue_pairs(dev); @@ -1505,9 +1786,53 @@ vhost_user_msg_handler(int vid, int fd) } } + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && virtio_is_ready(dev) && + !(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED) && + msg.request.master == VHOST_USER_SET_VRING_ENABLE) { + if (vdpa_dev->ops->dev_conf) + vdpa_dev->ops->dev_conf(dev->vid); + dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED; + if (vhost_user_host_notifier_ctrl(dev->vid, true) != 0) { + RTE_LOG(INFO, VHOST_CONFIG, + "(%d) software relay is used for vDPA, performance may be low.\n", + dev->vid); + } + } + return 0; } +static int process_slave_message_reply(struct virtio_net *dev, + const VhostUserMsg *msg) +{ + VhostUserMsg msg_reply; + int ret; + + if ((msg->flags & VHOST_USER_NEED_REPLY) == 0) + return 0; + + if (read_vhost_message(dev->slave_req_fd, &msg_reply) < 0) { + ret = -1; + goto out; + } + + if (msg_reply.request.slave != msg->request.slave) { + RTE_LOG(ERR, VHOST_CONFIG, + "Received unexpected msg type (%u), expected %u\n", + msg_reply.request.slave, msg->request.slave); + ret = -1; + goto out; + } + + ret = msg_reply.payload.u64 ? -1 : 0; + +out: + rte_spinlock_unlock(&dev->slave_req_lock); + return ret; +} + int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) { @@ -1523,7 +1848,7 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) }, }; - ret = send_vhost_message(dev->slave_req_fd, &msg); + ret = send_vhost_message(dev->slave_req_fd, &msg, NULL, 0); if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, "Failed to send IOTLB miss message (%d)\n", @@ -1533,3 +1858,101 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) return 0; } + +static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev, + int index, int fd, + uint64_t offset, + uint64_t size) +{ + int *fdp = NULL; + size_t fd_num = 0; + int ret; + struct VhostUserMsg msg = { + .request.slave = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG, + .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY, + .size = sizeof(msg.payload.area), + .payload.area = { + .u64 = index & VHOST_USER_VRING_IDX_MASK, + .size = size, + .offset = offset, + }, + }; + + if (fd < 0) + msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK; + else { + fdp = &fd; + fd_num = 1; + } + + ret = send_vhost_slave_message(dev, &msg, fdp, fd_num); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to set host notifier (%d)\n", ret); + return ret; + } + + return process_slave_message_reply(dev, &msg); +} + +int vhost_user_host_notifier_ctrl(int vid, bool enable) +{ + struct virtio_net *dev; + struct rte_vdpa_device *vdpa_dev; + int vfio_device_fd, did, ret = 0; + uint64_t offset, size; + unsigned int i; + + dev = get_device(vid); + if (!dev) + return -ENODEV; + + did = dev->vdpa_dev_id; + if (did < 0) + return -EINVAL; + + if (!(dev->features & (1ULL << VIRTIO_F_VERSION_1)) || + !(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) || + !(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ)) || + !(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) || + !(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER))) + return -ENOTSUP; + + vdpa_dev = rte_vdpa_get_device(did); + if (!vdpa_dev) + return -ENODEV; + + RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_vfio_device_fd, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_notify_area, -ENOTSUP); + + vfio_device_fd = vdpa_dev->ops->get_vfio_device_fd(vid); + if (vfio_device_fd < 0) + return -ENOTSUP; + + if (enable) { + for (i = 0; i < dev->nr_vring; i++) { + if (vdpa_dev->ops->get_notify_area(vid, i, &offset, + &size) < 0) { + ret = -ENOTSUP; + goto disable; + } + + if (vhost_user_slave_set_vring_host_notifier(dev, i, + vfio_device_fd, offset, size) < 0) { + ret = -EFAULT; + goto disable; + } + } + } else { +disable: + for (i = 0; i < dev->nr_vring; i++) { + vhost_user_slave_set_vring_host_notifier(dev, i, -1, + 0, 0); + } + } + + return ret; +} diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index d4bd604b..42166adf 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #ifndef _VHOST_NET_USER_H @@ -14,19 +14,15 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 -#define VHOST_USER_PROTOCOL_F_MQ 0 -#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 -#define VHOST_USER_PROTOCOL_F_RARP 2 -#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 -#define VHOST_USER_PROTOCOL_F_NET_MTU 4 -#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 - #define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ (1ULL << VHOST_USER_PROTOCOL_F_RARP) | \ (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \ - (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ)) + (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \ + (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \ + (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER)) typedef enum VhostUserRequest { VHOST_USER_NONE = 0, @@ -52,12 +48,15 @@ typedef enum VhostUserRequest { VHOST_USER_NET_SET_MTU = 20, VHOST_USER_SET_SLAVE_REQ_FD = 21, VHOST_USER_IOTLB_MSG = 22, - VHOST_USER_MAX + VHOST_USER_CRYPTO_CREATE_SESS = 26, + VHOST_USER_CRYPTO_CLOSE_SESS = 27, + VHOST_USER_MAX = 28 } VhostUserRequest; typedef enum VhostUserSlaveRequest { VHOST_USER_SLAVE_NONE = 0, VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, VHOST_USER_SLAVE_MAX } VhostUserSlaveRequest; @@ -79,10 +78,40 @@ typedef struct VhostUserLog { uint64_t mmap_offset; } VhostUserLog; +/* Comply with Cryptodev-Linux */ +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH 512 +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH 64 + +/* Same structure as vhost-user backend session info */ +typedef struct VhostUserCryptoSessionParam { + int64_t session_id; + uint32_t op_code; + uint32_t cipher_algo; + uint32_t cipher_key_len; + uint32_t hash_algo; + uint32_t digest_len; + uint32_t auth_key_len; + uint32_t aad_len; + uint8_t op_type; + uint8_t dir; + uint8_t hash_mode; + uint8_t chaining_dir; + uint8_t *ciphe_key; + uint8_t *auth_key; + uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH]; + uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH]; +} VhostUserCryptoSessionParam; + +typedef struct VhostUserVringArea { + uint64_t u64; + uint64_t size; + uint64_t offset; +} VhostUserVringArea; + typedef struct VhostUserMsg { union { - VhostUserRequest master; - VhostUserSlaveRequest slave; + uint32_t master; /* a VhostUserRequest value */ + uint32_t slave; /* a VhostUserSlaveRequest value*/ } request; #define VHOST_USER_VERSION_MASK 0x3 @@ -99,6 +128,8 @@ typedef struct VhostUserMsg { VhostUserMemory memory; VhostUserLog log; struct vhost_iotlb_msg iotlb; + VhostUserCryptoSessionParam crypto_session; + VhostUserVringArea area; } payload; int fds[VHOST_MEMORY_MAX_NREGIONS]; } __attribute((packed)) VhostUserMsg; @@ -112,6 +143,7 @@ typedef struct VhostUserMsg { /* vhost_user.c */ int vhost_user_msg_handler(int vid, int fd); int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm); +int vhost_user_host_notifier_ctrl(int vid, bool enable); /* socket.c */ int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num); diff --git a/lib/librte_vhost/virtio_crypto.h b/lib/librte_vhost/virtio_crypto.h new file mode 100644 index 00000000..e3b93573 --- /dev/null +++ b/lib/librte_vhost/virtio_crypto.h @@ -0,0 +1,422 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 HUAWEI TECHNOLOGIES CO., LTD. + */ + +#ifndef _VIRTIO_CRYPTO_H +#define _VIRTIO_CRYPTO_H + +#define VIRTIO_CRYPTO_SERVICE_CIPHER 0 +#define VIRTIO_CRYPTO_SERVICE_HASH 1 +#define VIRTIO_CRYPTO_SERVICE_MAC 2 +#define VIRTIO_CRYPTO_SERVICE_AEAD 3 + +#define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) + +struct virtio_crypto_ctrl_header { +#define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x02) +#define VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x03) +#define VIRTIO_CRYPTO_HASH_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x02) +#define VIRTIO_CRYPTO_HASH_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x03) +#define VIRTIO_CRYPTO_MAC_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x02) +#define VIRTIO_CRYPTO_MAC_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x03) +#define VIRTIO_CRYPTO_AEAD_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) +#define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) + uint32_t opcode; + uint32_t algo; + uint32_t flag; + /* data virtqueue id */ + uint32_t queue_id; +}; + +struct virtio_crypto_cipher_session_para { +#define VIRTIO_CRYPTO_NO_CIPHER 0 +#define VIRTIO_CRYPTO_CIPHER_ARC4 1 +#define VIRTIO_CRYPTO_CIPHER_AES_ECB 2 +#define VIRTIO_CRYPTO_CIPHER_AES_CBC 3 +#define VIRTIO_CRYPTO_CIPHER_AES_CTR 4 +#define VIRTIO_CRYPTO_CIPHER_DES_ECB 5 +#define VIRTIO_CRYPTO_CIPHER_DES_CBC 6 +#define VIRTIO_CRYPTO_CIPHER_3DES_ECB 7 +#define VIRTIO_CRYPTO_CIPHER_3DES_CBC 8 +#define VIRTIO_CRYPTO_CIPHER_3DES_CTR 9 +#define VIRTIO_CRYPTO_CIPHER_KASUMI_F8 10 +#define VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2 11 +#define VIRTIO_CRYPTO_CIPHER_AES_F8 12 +#define VIRTIO_CRYPTO_CIPHER_AES_XTS 13 +#define VIRTIO_CRYPTO_CIPHER_ZUC_EEA3 14 + uint32_t algo; + /* length of key */ + uint32_t keylen; + +#define VIRTIO_CRYPTO_OP_ENCRYPT 1 +#define VIRTIO_CRYPTO_OP_DECRYPT 2 + /* encrypt or decrypt */ + uint32_t op; + uint32_t padding; +}; + +struct virtio_crypto_session_input { + /* Device-writable part */ + uint64_t session_id; + uint32_t status; + uint32_t padding; +}; + +struct virtio_crypto_cipher_session_req { + struct virtio_crypto_cipher_session_para para; + uint8_t padding[32]; +}; + +struct virtio_crypto_hash_session_para { +#define VIRTIO_CRYPTO_NO_HASH 0 +#define VIRTIO_CRYPTO_HASH_MD5 1 +#define VIRTIO_CRYPTO_HASH_SHA1 2 +#define VIRTIO_CRYPTO_HASH_SHA_224 3 +#define VIRTIO_CRYPTO_HASH_SHA_256 4 +#define VIRTIO_CRYPTO_HASH_SHA_384 5 +#define VIRTIO_CRYPTO_HASH_SHA_512 6 +#define VIRTIO_CRYPTO_HASH_SHA3_224 7 +#define VIRTIO_CRYPTO_HASH_SHA3_256 8 +#define VIRTIO_CRYPTO_HASH_SHA3_384 9 +#define VIRTIO_CRYPTO_HASH_SHA3_512 10 +#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE128 11 +#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE256 12 + uint32_t algo; + /* hash result length */ + uint32_t hash_result_len; + uint8_t padding[8]; +}; + +struct virtio_crypto_hash_create_session_req { + struct virtio_crypto_hash_session_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_mac_session_para { +#define VIRTIO_CRYPTO_NO_MAC 0 +#define VIRTIO_CRYPTO_MAC_HMAC_MD5 1 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA1 2 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_224 3 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_256 4 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_384 5 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_512 6 +#define VIRTIO_CRYPTO_MAC_CMAC_3DES 25 +#define VIRTIO_CRYPTO_MAC_CMAC_AES 26 +#define VIRTIO_CRYPTO_MAC_KASUMI_F9 27 +#define VIRTIO_CRYPTO_MAC_SNOW3G_UIA2 28 +#define VIRTIO_CRYPTO_MAC_GMAC_AES 41 +#define VIRTIO_CRYPTO_MAC_GMAC_TWOFISH 42 +#define VIRTIO_CRYPTO_MAC_CBCMAC_AES 49 +#define VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9 50 +#define VIRTIO_CRYPTO_MAC_XCBC_AES 53 + uint32_t algo; + /* hash result length */ + uint32_t hash_result_len; + /* length of authenticated key */ + uint32_t auth_key_len; + uint32_t padding; +}; + +struct virtio_crypto_mac_create_session_req { + struct virtio_crypto_mac_session_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_aead_session_para { +#define VIRTIO_CRYPTO_NO_AEAD 0 +#define VIRTIO_CRYPTO_AEAD_GCM 1 +#define VIRTIO_CRYPTO_AEAD_CCM 2 +#define VIRTIO_CRYPTO_AEAD_CHACHA20_POLY1305 3 + uint32_t algo; + /* length of key */ + uint32_t key_len; + /* hash result length */ + uint32_t hash_result_len; + /* length of the additional authenticated data (AAD) in bytes */ + uint32_t aad_len; + /* encrypt or decrypt, See above VIRTIO_CRYPTO_OP_* */ + uint32_t op; + uint32_t padding; +}; + +struct virtio_crypto_aead_create_session_req { + struct virtio_crypto_aead_session_para para; + uint8_t padding[32]; +}; + +struct virtio_crypto_alg_chain_session_para { +#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 +#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 + uint32_t alg_chain_order; +/* Plain hash */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN 1 +/* Authenticated hash (mac) */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH 2 +/* Nested hash */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED 3 + uint32_t hash_mode; + struct virtio_crypto_cipher_session_para cipher_param; + union { + struct virtio_crypto_hash_session_para hash_param; + struct virtio_crypto_mac_session_para mac_param; + uint8_t padding[16]; + } u; + /* length of the additional authenticated data (AAD) in bytes */ + uint32_t aad_len; + uint32_t padding; +}; + +struct virtio_crypto_alg_chain_session_req { + struct virtio_crypto_alg_chain_session_para para; +}; + +struct virtio_crypto_sym_create_session_req { + union { + struct virtio_crypto_cipher_session_req cipher; + struct virtio_crypto_alg_chain_session_req chain; + uint8_t padding[48]; + } u; + + /* Device-readable part */ + +/* No operation */ +#define VIRTIO_CRYPTO_SYM_OP_NONE 0 +/* Cipher only operation on the data */ +#define VIRTIO_CRYPTO_SYM_OP_CIPHER 1 +/* + * Chain any cipher with any hash or mac operation. The order + * depends on the value of alg_chain_order param + */ +#define VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING 2 + uint32_t op_type; + uint32_t padding; +}; + +struct virtio_crypto_destroy_session_req { + /* Device-readable part */ + uint64_t session_id; + uint8_t padding[48]; +}; + +/* The request of the control virtqueue's packet */ +struct virtio_crypto_op_ctrl_req { + struct virtio_crypto_ctrl_header header; + + union { + struct virtio_crypto_sym_create_session_req + sym_create_session; + struct virtio_crypto_hash_create_session_req + hash_create_session; + struct virtio_crypto_mac_create_session_req + mac_create_session; + struct virtio_crypto_aead_create_session_req + aead_create_session; + struct virtio_crypto_destroy_session_req + destroy_session; + uint8_t padding[56]; + } u; +}; + +struct virtio_crypto_op_header { +#define VIRTIO_CRYPTO_CIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x00) +#define VIRTIO_CRYPTO_CIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x01) +#define VIRTIO_CRYPTO_HASH \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x00) +#define VIRTIO_CRYPTO_MAC \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x00) +#define VIRTIO_CRYPTO_AEAD_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) +#define VIRTIO_CRYPTO_AEAD_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) + uint32_t opcode; + /* algo should be service-specific algorithms */ + uint32_t algo; + /* session_id should be service-specific algorithms */ + uint64_t session_id; + /* control flag to control the request */ + uint32_t flag; + uint32_t padding; +}; + +struct virtio_crypto_cipher_para { + /* + * Byte Length of valid IV/Counter + * + * For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for + * SNOW3G in UEA2 mode, this is the length of the IV (which + * must be the same as the block length of the cipher). + * For block ciphers in CTR mode, this is the length of the counter + * (which must be the same as the block length of the cipher). + * For AES-XTS, this is the 128bit tweak, i, from IEEE Std 1619-2007. + * + * The IV/Counter will be updated after every partial cryptographic + * operation. + */ + uint32_t iv_len; + /* length of source data */ + uint32_t src_data_len; + /* length of dst data */ + uint32_t dst_data_len; + uint32_t padding; +}; + +struct virtio_crypto_hash_para { + /* length of source data */ + uint32_t src_data_len; + /* hash result length */ + uint32_t hash_result_len; +}; + +struct virtio_crypto_mac_para { + struct virtio_crypto_hash_para hash; +}; + +struct virtio_crypto_aead_para { + /* + * Byte Length of valid IV data pointed to by the below iv_addr + * parameter. + * + * For GCM mode, this is either 12 (for 96-bit IVs) or 16, in which + * case iv_addr points to J0. + * For CCM mode, this is the length of the nonce, which can be in the + * range 7 to 13 inclusive. + */ + uint32_t iv_len; + /* length of additional auth data */ + uint32_t aad_len; + /* length of source data */ + uint32_t src_data_len; + /* length of dst data */ + uint32_t dst_data_len; +}; + +struct virtio_crypto_cipher_data_req { + /* Device-readable part */ + struct virtio_crypto_cipher_para para; + uint8_t padding[24]; +}; + +struct virtio_crypto_hash_data_req { + /* Device-readable part */ + struct virtio_crypto_hash_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_mac_data_req { + /* Device-readable part */ + struct virtio_crypto_mac_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_alg_chain_data_para { + uint32_t iv_len; + /* Length of source data */ + uint32_t src_data_len; + /* Length of destination data */ + uint32_t dst_data_len; + /* Starting point for cipher processing in source data */ + uint32_t cipher_start_src_offset; + /* Length of the source data that the cipher will be computed on */ + uint32_t len_to_cipher; + /* Starting point for hash processing in source data */ + uint32_t hash_start_src_offset; + /* Length of the source data that the hash will be computed on */ + uint32_t len_to_hash; + /* Length of the additional auth data */ + uint32_t aad_len; + /* Length of the hash result */ + uint32_t hash_result_len; + uint32_t reserved; +}; + +struct virtio_crypto_alg_chain_data_req { + /* Device-readable part */ + struct virtio_crypto_alg_chain_data_para para; +}; + +struct virtio_crypto_sym_data_req { + union { + struct virtio_crypto_cipher_data_req cipher; + struct virtio_crypto_alg_chain_data_req chain; + uint8_t padding[40]; + } u; + + /* See above VIRTIO_CRYPTO_SYM_OP_* */ + uint32_t op_type; + uint32_t padding; +}; + +struct virtio_crypto_aead_data_req { + /* Device-readable part */ + struct virtio_crypto_aead_para para; + uint8_t padding[32]; +}; + +/* The request of the data virtqueue's packet */ +struct virtio_crypto_op_data_req { + struct virtio_crypto_op_header header; + + union { + struct virtio_crypto_sym_data_req sym_req; + struct virtio_crypto_hash_data_req hash_req; + struct virtio_crypto_mac_data_req mac_req; + struct virtio_crypto_aead_data_req aead_req; + uint8_t padding[48]; + } u; +}; + +#define VIRTIO_CRYPTO_OK 0 +#define VIRTIO_CRYPTO_ERR 1 +#define VIRTIO_CRYPTO_BADMSG 2 +#define VIRTIO_CRYPTO_NOTSUPP 3 +#define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ + +/* The accelerator hardware is ready */ +#define VIRTIO_CRYPTO_S_HW_READY (1 << 0) + +struct virtio_crypto_config { + /* See VIRTIO_CRYPTO_OP_* above */ + uint32_t status; + + /* + * Maximum number of data queue + */ + uint32_t max_dataqueues; + + /* + * Specifies the services mask which the device support, + * see VIRTIO_CRYPTO_SERVICE_* above + */ + uint32_t crypto_services; + + /* Detailed algorithms mask */ + uint32_t cipher_algo_l; + uint32_t cipher_algo_h; + uint32_t hash_algo; + uint32_t mac_algo_l; + uint32_t mac_algo_h; + uint32_t aead_algo; + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + uint32_t reserve; + /* Maximum size of each crypto request's content */ + uint64_t max_size; +}; + +struct virtio_crypto_inhdr { + /* See VIRTIO_CRYPTO_* above */ + uint8_t status; +}; +#endif /* _VIRTIO_CRYPTO_H */ diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 700aca7c..99c7afc8 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "iotlb.h" #include "vhost.h" @@ -24,60 +25,174 @@ #define MAX_BATCH_LEN 256 +static __rte_always_inline bool +rxvq_is_mergeable(struct virtio_net *dev) +{ + return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); +} + static bool is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) { return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; } +static __rte_always_inline void * +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t desc_addr, uint64_t desc_len) +{ + void *idesc; + uint64_t src, dst; + uint64_t len, remain = desc_len; + + idesc = rte_malloc(__func__, desc_len, 0); + if (unlikely(!idesc)) + return 0; + + dst = (uint64_t)(uintptr_t)idesc; + + while (remain) { + len = remain; + src = vhost_iova_to_vva(dev, vq, desc_addr, &len, + VHOST_ACCESS_RO); + if (unlikely(!src || !len)) { + rte_free(idesc); + return 0; + } + + rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len); + + remain -= len; + dst += len; + desc_addr += len; + } + + return idesc; +} + static __rte_always_inline void -do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t to, uint16_t from, uint16_t size) +free_ind_table(void *idesc) +{ + rte_free(idesc); +} + +static __rte_always_inline void +do_flush_shadow_used_ring_split(struct virtio_net *dev, + struct vhost_virtqueue *vq, + uint16_t to, uint16_t from, uint16_t size) { rte_memcpy(&vq->used->ring[to], - &vq->shadow_used_ring[from], + &vq->shadow_used_split[from], size * sizeof(struct vring_used_elem)); - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[to]), size * sizeof(struct vring_used_elem)); } static __rte_always_inline void -flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq) +flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) { uint16_t used_idx = vq->last_used_idx & (vq->size - 1); if (used_idx + vq->shadow_used_idx <= vq->size) { - do_flush_shadow_used_ring(dev, vq, used_idx, 0, + do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, vq->shadow_used_idx); } else { uint16_t size; /* update used ring interval [used_idx, vq->size] */ size = vq->size - used_idx; - do_flush_shadow_used_ring(dev, vq, used_idx, 0, size); + do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); /* update the left half used ring interval [0, left_size] */ - do_flush_shadow_used_ring(dev, vq, 0, size, + do_flush_shadow_used_ring_split(dev, vq, 0, size, vq->shadow_used_idx - size); } vq->last_used_idx += vq->shadow_used_idx; rte_smp_wmb(); + vhost_log_cache_sync(dev, vq); + *(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx; + vq->shadow_used_idx = 0; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); } static __rte_always_inline void -update_shadow_used_ring(struct vhost_virtqueue *vq, +update_shadow_used_ring_split(struct vhost_virtqueue *vq, uint16_t desc_idx, uint16_t len) { uint16_t i = vq->shadow_used_idx++; - vq->shadow_used_ring[i].id = desc_idx; - vq->shadow_used_ring[i].len = len; + vq->shadow_used_split[i].id = desc_idx; + vq->shadow_used_split[i].len = len; +} + +static __rte_always_inline void +flush_shadow_used_ring_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq) +{ + int i; + uint16_t used_idx = vq->last_used_idx; + + /* Split loop in two to save memory barriers */ + for (i = 0; i < vq->shadow_used_idx; i++) { + vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; + vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; + + used_idx += vq->shadow_used_packed[i].count; + if (used_idx >= vq->size) + used_idx -= vq->size; + } + + rte_smp_wmb(); + + for (i = 0; i < vq->shadow_used_idx; i++) { + uint16_t flags; + + if (vq->shadow_used_packed[i].len) + flags = VRING_DESC_F_WRITE; + else + flags = 0; + + if (vq->used_wrap_counter) { + flags |= VRING_DESC_F_USED; + flags |= VRING_DESC_F_AVAIL; + } else { + flags &= ~VRING_DESC_F_USED; + flags &= ~VRING_DESC_F_AVAIL; + } + + vq->desc_packed[vq->last_used_idx].flags = flags; + + vhost_log_cache_used_vring(dev, vq, + vq->last_used_idx * + sizeof(struct vring_packed_desc), + sizeof(struct vring_packed_desc)); + + vq->last_used_idx += vq->shadow_used_packed[i].count; + if (vq->last_used_idx >= vq->size) { + vq->used_wrap_counter ^= 1; + vq->last_used_idx -= vq->size; + } + } + + rte_smp_wmb(); + vq->shadow_used_idx = 0; + vhost_log_cache_sync(dev, vq); +} + +static __rte_always_inline void +update_shadow_used_ring_packed(struct vhost_virtqueue *vq, + uint16_t desc_idx, uint16_t len, uint16_t count) +{ + uint16_t i = vq->shadow_used_idx++; + + vq->shadow_used_packed[i].id = desc_idx; + vq->shadow_used_packed[i].len = len; + vq->shadow_used_packed[i].count = count; } static inline void @@ -89,9 +204,11 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) for (i = 0; i < count; i++) { rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); - vhost_log_write(dev, elem[i].log_addr, elem[i].len); + vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len); PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); } + + vq->batch_copy_nb_elems = 0; } static inline void @@ -103,6 +220,8 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq) for (i = 0; i < count; i++) rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); + + vq->batch_copy_nb_elems = 0; } /* avoid write operation when necessary, to lessen cache issues */ @@ -111,7 +230,7 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq) (var) = (val); \ } while (0) -static void +static __rte_always_inline void virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) { uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK; @@ -173,265 +292,93 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) } static __rte_always_inline int -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct vring_desc *descs, struct rte_mbuf *m, - uint16_t desc_idx, uint32_t size) -{ - uint32_t desc_avail, desc_offset; - uint32_t mbuf_avail, mbuf_offset; - uint32_t cpy_len; - struct vring_desc *desc; - uint64_t desc_addr; - /* A counter to avoid desc dead loop chain */ - uint16_t nr_desc = 1; - struct batch_copy_elem *batch_copy = vq->batch_copy_elems; - uint16_t copy_nb = vq->batch_copy_nb_elems; - int error = 0; - - desc = &descs[desc_idx]; - desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, - desc->len, VHOST_ACCESS_RW); - /* - * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid - * performance issue with some versions of gcc (4.8.4 and 5.3.0) which - * otherwise stores offset on the stack instead of in a register. - */ - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) { - error = -1; - goto out; - } - - rte_prefetch0((void *)(uintptr_t)desc_addr); - - virtio_enqueue_offload(m, (struct virtio_net_hdr *)(uintptr_t)desc_addr); - vhost_log_write(dev, desc->addr, dev->vhost_hlen); - PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - - desc_offset = dev->vhost_hlen; - desc_avail = desc->len - dev->vhost_hlen; - - mbuf_avail = rte_pktmbuf_data_len(m); - mbuf_offset = 0; - while (mbuf_avail != 0 || m->next != NULL) { - /* done with current mbuf, fetch next */ - if (mbuf_avail == 0) { - m = m->next; - - mbuf_offset = 0; - mbuf_avail = rte_pktmbuf_data_len(m); - } - - /* done with current desc buf, fetch next */ - if (desc_avail == 0) { - if ((desc->flags & VRING_DESC_F_NEXT) == 0) { - /* Room in vring buffer is not enough */ - error = -1; - goto out; - } - if (unlikely(desc->next >= size || ++nr_desc > size)) { - error = -1; - goto out; - } - - desc = &descs[desc->next]; - desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, - desc->len, - VHOST_ACCESS_RW); - if (unlikely(!desc_addr)) { - error = -1; - goto out; - } - - desc_offset = 0; - desc_avail = desc->len; - } - - cpy_len = RTE_MIN(desc_avail, mbuf_avail); - if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) { - rte_memcpy((void *)((uintptr_t)(desc_addr + - desc_offset)), - rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), - cpy_len); - vhost_log_write(dev, desc->addr + desc_offset, cpy_len); - PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - cpy_len, 0); - } else { - batch_copy[copy_nb].dst = - (void *)((uintptr_t)(desc_addr + desc_offset)); - batch_copy[copy_nb].src = - rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); - batch_copy[copy_nb].log_addr = desc->addr + desc_offset; - batch_copy[copy_nb].len = cpy_len; - copy_nb++; - } - - mbuf_avail -= cpy_len; - mbuf_offset += cpy_len; - desc_avail -= cpy_len; - desc_offset += cpy_len; - } - -out: - vq->batch_copy_nb_elems = copy_nb; - - return error; -} - -/** - * This function adds buffers to the virtio devices RX virtqueue. Buffers can - * be received from the physical port or from another virtio device. A packet - * count is returned to indicate the number of packets that are successfully - * added to the RX queue. This function works when the mbuf is scattered, but - * it doesn't support the mergeable feature. - */ -static __rte_always_inline uint32_t -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, - struct rte_mbuf **pkts, uint32_t count) +map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct buf_vector *buf_vec, uint16_t *vec_idx, + uint64_t desc_iova, uint64_t desc_len, uint8_t perm) { - struct vhost_virtqueue *vq; - uint16_t avail_idx, free_entries, start_idx; - uint16_t desc_indexes[MAX_PKT_BURST]; - struct vring_desc *descs; - uint16_t used_idx; - uint32_t i, sz; - - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - - rte_spinlock_lock(&vq->access_lock); - - if (unlikely(vq->enabled == 0)) - goto out_access_unlock; - - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) - vhost_user_iotlb_rd_lock(vq); - - if (unlikely(vq->access_ok == 0)) { - if (unlikely(vring_translate(dev, vq) < 0)) { - count = 0; - goto out; - } - } - - avail_idx = *((volatile uint16_t *)&vq->avail->idx); - start_idx = vq->last_used_idx; - free_entries = avail_idx - start_idx; - count = RTE_MIN(count, free_entries); - count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); - if (count == 0) - goto out; - - LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", - dev->vid, start_idx, start_idx + count); + uint16_t vec_id = *vec_idx; - vq->batch_copy_nb_elems = 0; - - /* Retrieve all of the desc indexes first to avoid caching issues. */ - rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]); - for (i = 0; i < count; i++) { - used_idx = (start_idx + i) & (vq->size - 1); - desc_indexes[i] = vq->avail->ring[used_idx]; - vq->used->ring[used_idx].id = desc_indexes[i]; - vq->used->ring[used_idx].len = pkts[i]->pkt_len + - dev->vhost_hlen; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); - } + while (desc_len) { + uint64_t desc_addr; + uint64_t desc_chunck_len = desc_len; - rte_prefetch0(&vq->desc[desc_indexes[0]]); - for (i = 0; i < count; i++) { - uint16_t desc_idx = desc_indexes[i]; - int err; - - if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) { - descs = (struct vring_desc *)(uintptr_t) - vhost_iova_to_vva(dev, - vq, vq->desc[desc_idx].addr, - vq->desc[desc_idx].len, - VHOST_ACCESS_RO); - if (unlikely(!descs)) { - count = i; - break; - } + if (unlikely(vec_id >= BUF_VECTOR_MAX)) + return -1; - desc_idx = 0; - sz = vq->desc[desc_idx].len / sizeof(*descs); - } else { - descs = vq->desc; - sz = vq->size; - } + desc_addr = vhost_iova_to_vva(dev, vq, + desc_iova, + &desc_chunck_len, + perm); + if (unlikely(!desc_addr)) + return -1; - err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz); - if (unlikely(err)) { - count = i; - break; - } + buf_vec[vec_id].buf_iova = desc_iova; + buf_vec[vec_id].buf_addr = desc_addr; + buf_vec[vec_id].buf_len = desc_chunck_len; - if (i + 1 < count) - rte_prefetch0(&vq->desc[desc_indexes[i+1]]); + desc_len -= desc_chunck_len; + desc_iova += desc_chunck_len; + vec_id++; } + *vec_idx = vec_id; - do_data_copy_enqueue(dev, vq); - - rte_smp_wmb(); - - *(volatile uint16_t *)&vq->used->idx += count; - vq->last_used_idx += count; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, idx), - sizeof(vq->used->idx)); - - vhost_vring_call(dev, vq); -out: - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) - vhost_user_iotlb_rd_unlock(vq); - -out_access_unlock: - rte_spinlock_unlock(&vq->access_lock); - - return count; + return 0; } static __rte_always_inline int -fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint32_t avail_idx, uint32_t *vec_idx, +fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint32_t avail_idx, uint16_t *vec_idx, struct buf_vector *buf_vec, uint16_t *desc_chain_head, - uint16_t *desc_chain_len) + uint16_t *desc_chain_len, uint8_t perm) { uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; - uint32_t vec_id = *vec_idx; + uint16_t vec_id = *vec_idx; uint32_t len = 0; + uint64_t dlen; struct vring_desc *descs = vq->desc; + struct vring_desc *idesc = NULL; *desc_chain_head = idx; if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { + dlen = vq->desc[idx].len; descs = (struct vring_desc *)(uintptr_t) vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, - vq->desc[idx].len, + &dlen, VHOST_ACCESS_RO); if (unlikely(!descs)) return -1; + if (unlikely(dlen < vq->desc[idx].len)) { + /* + * The indirect desc table is not contiguous + * in process VA space, we have to copy it. + */ + idesc = alloc_copy_ind_table(dev, vq, + vq->desc[idx].addr, vq->desc[idx].len); + if (unlikely(!idesc)) + return -1; + + descs = idesc; + } + idx = 0; } while (1) { - if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) + if (unlikely(idx >= vq->size)) { + free_ind_table(idesc); return -1; + } len += descs[idx].len; - buf_vec[vec_id].buf_addr = descs[idx].addr; - buf_vec[vec_id].buf_len = descs[idx].len; - buf_vec[vec_id].desc_idx = idx; - vec_id++; + + if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, + descs[idx].addr, descs[idx].len, + perm))) { + free_ind_table(idesc); + return -1; + } if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) break; @@ -442,6 +389,9 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, *desc_chain_len = len; *vec_idx = vec_id; + if (unlikely(!!idesc)) + free_ind_table(idesc); + return 0; } @@ -449,13 +399,14 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, * Returns -1 on fail, 0 on success */ static inline int -reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, +reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, uint32_t size, struct buf_vector *buf_vec, - uint16_t *num_buffers, uint16_t avail_head) + uint16_t *num_buffers, uint16_t avail_head, + uint16_t *nr_vec) { uint16_t cur_idx; - uint32_t vec_idx = 0; - uint16_t tries = 0; + uint16_t vec_idx = 0; + uint16_t max_tries, tries = 0; uint16_t head_idx = 0; uint16_t len = 0; @@ -463,227 +414,511 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, *num_buffers = 0; cur_idx = vq->last_avail_idx; + if (rxvq_is_mergeable(dev)) + max_tries = vq->size - 1; + else + max_tries = 1; + while (size > 0) { if (unlikely(cur_idx == avail_head)) return -1; + /* + * if we tried all available ring items, and still + * can't get enough buf, it means something abnormal + * happened. + */ + if (unlikely(++tries > max_tries)) + return -1; - if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec, - &head_idx, &len) < 0)) + if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, + &vec_idx, buf_vec, + &head_idx, &len, + VHOST_ACCESS_RW) < 0)) return -1; len = RTE_MIN(len, size); - update_shadow_used_ring(vq, head_idx, len); + update_shadow_used_ring_split(vq, head_idx, len); size -= len; cur_idx++; - tries++; *num_buffers += 1; - - /* - * if we tried all available ring items, and still - * can't get enough buf, it means something abnormal - * happened. - */ - if (unlikely(tries >= vq->size)) - return -1; } + *nr_vec = vec_idx; + return 0; } static __rte_always_inline int -copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mbuf *m, struct buf_vector *buf_vec, - uint16_t num_buffers) +fill_vec_buf_packed_indirect(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct vring_packed_desc *desc, uint16_t *vec_idx, + struct buf_vector *buf_vec, uint16_t *len, uint8_t perm) { - uint32_t vec_idx = 0; - uint64_t desc_addr; - uint32_t mbuf_offset, mbuf_avail; - uint32_t desc_offset, desc_avail; - uint32_t cpy_len; - uint64_t hdr_addr, hdr_phys_addr; - struct rte_mbuf *hdr_mbuf; - struct batch_copy_elem *batch_copy = vq->batch_copy_elems; - uint16_t copy_nb = vq->batch_copy_nb_elems; - int error = 0; + uint16_t i; + uint32_t nr_descs; + uint16_t vec_id = *vec_idx; + uint64_t dlen; + struct vring_packed_desc *descs, *idescs = NULL; + + dlen = desc->len; + descs = (struct vring_packed_desc *)(uintptr_t) + vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); + if (unlikely(!descs)) + return -1; + + if (unlikely(dlen < desc->len)) { + /* + * The indirect desc table is not contiguous + * in process VA space, we have to copy it. + */ + idescs = alloc_copy_ind_table(dev, vq, desc->addr, desc->len); + if (unlikely(!idescs)) + return -1; - if (unlikely(m == NULL)) { - error = -1; - goto out; + descs = idescs; } - desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr, - buf_vec[vec_idx].buf_len, - VHOST_ACCESS_RW); - if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) { - error = -1; - goto out; + nr_descs = desc->len / sizeof(struct vring_packed_desc); + if (unlikely(nr_descs >= vq->size)) { + free_ind_table(idescs); + return -1; } - hdr_mbuf = m; - hdr_addr = desc_addr; - hdr_phys_addr = buf_vec[vec_idx].buf_addr; - rte_prefetch0((void *)(uintptr_t)hdr_addr); + for (i = 0; i < nr_descs; i++) { + if (unlikely(vec_id >= BUF_VECTOR_MAX)) { + free_ind_table(idescs); + return -1; + } - LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", - dev->vid, num_buffers); + *len += descs[i].len; + if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, + descs[i].addr, descs[i].len, + perm))) + return -1; + } + *vec_idx = vec_id; - desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; - desc_offset = dev->vhost_hlen; + if (unlikely(!!idescs)) + free_ind_table(idescs); - mbuf_avail = rte_pktmbuf_data_len(m); - mbuf_offset = 0; - while (mbuf_avail != 0 || m->next != NULL) { - /* done with current desc buf, get the next one */ - if (desc_avail == 0) { - vec_idx++; - desc_addr = - vhost_iova_to_vva(dev, vq, - buf_vec[vec_idx].buf_addr, - buf_vec[vec_idx].buf_len, - VHOST_ACCESS_RW); - if (unlikely(!desc_addr)) { - error = -1; - goto out; - } + return 0; +} - /* Prefetch buffer address. */ - rte_prefetch0((void *)(uintptr_t)desc_addr); - desc_offset = 0; - desc_avail = buf_vec[vec_idx].buf_len; - } +static __rte_always_inline int +fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint16_t avail_idx, uint16_t *desc_count, + struct buf_vector *buf_vec, uint16_t *vec_idx, + uint16_t *buf_id, uint16_t *len, uint8_t perm) +{ + bool wrap_counter = vq->avail_wrap_counter; + struct vring_packed_desc *descs = vq->desc_packed; + uint16_t vec_id = *vec_idx; - /* done with current mbuf, get the next one */ - if (mbuf_avail == 0) { - m = m->next; + if (avail_idx < vq->last_avail_idx) + wrap_counter ^= 1; - mbuf_offset = 0; - mbuf_avail = rte_pktmbuf_data_len(m); - } + if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) + return -1; - if (hdr_addr) { - struct virtio_net_hdr_mrg_rxbuf *hdr; + *desc_count = 0; - hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t) - hdr_addr; - virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); - ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers); + while (1) { + if (unlikely(vec_id >= BUF_VECTOR_MAX)) + return -1; - vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen); - PRINT_PACKET(dev, (uintptr_t)hdr_addr, - dev->vhost_hlen, 0); + *desc_count += 1; + *buf_id = descs[avail_idx].id; - hdr_addr = 0; + if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { + if (unlikely(fill_vec_buf_packed_indirect(dev, vq, + &descs[avail_idx], + &vec_id, buf_vec, + len, perm) < 0)) + return -1; + } else { + *len += descs[avail_idx].len; + + if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, + descs[avail_idx].addr, + descs[avail_idx].len, + perm))) + return -1; } - cpy_len = RTE_MIN(desc_avail, mbuf_avail); + if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) + break; - if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) { - rte_memcpy((void *)((uintptr_t)(desc_addr + - desc_offset)), - rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), - cpy_len); - vhost_log_write(dev, - buf_vec[vec_idx].buf_addr + desc_offset, + if (++avail_idx >= vq->size) { + avail_idx -= vq->size; + wrap_counter ^= 1; + } + } + + *vec_idx = vec_id; + + return 0; +} + +/* + * Returns -1 on fail, 0 on success + */ +static inline int +reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint32_t size, struct buf_vector *buf_vec, + uint16_t *nr_vec, uint16_t *num_buffers, + uint16_t *nr_descs) +{ + uint16_t avail_idx; + uint16_t vec_idx = 0; + uint16_t max_tries, tries = 0; + + uint16_t buf_id = 0; + uint16_t len = 0; + uint16_t desc_count; + + *num_buffers = 0; + avail_idx = vq->last_avail_idx; + + if (rxvq_is_mergeable(dev)) + max_tries = vq->size - 1; + else + max_tries = 1; + + while (size > 0) { + /* + * if we tried all available ring items, and still + * can't get enough buf, it means something abnormal + * happened. + */ + if (unlikely(++tries > max_tries)) + return -1; + + if (unlikely(fill_vec_buf_packed(dev, vq, + avail_idx, &desc_count, + buf_vec, &vec_idx, + &buf_id, &len, + VHOST_ACCESS_RO) < 0)) + return -1; + + len = RTE_MIN(len, size); + update_shadow_used_ring_packed(vq, buf_id, len, desc_count); + size -= len; + + avail_idx += desc_count; + if (avail_idx >= vq->size) + avail_idx -= vq->size; + + *nr_descs += desc_count; + *num_buffers += 1; + } + + *nr_vec = vec_idx; + + return 0; +} + +static __rte_always_inline int +copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mbuf *m, struct buf_vector *buf_vec, + uint16_t nr_vec, uint16_t num_buffers) +{ + uint32_t vec_idx = 0; + uint32_t mbuf_offset, mbuf_avail; + uint32_t buf_offset, buf_avail; + uint64_t buf_addr, buf_iova, buf_len; + uint32_t cpy_len; + uint64_t hdr_addr; + struct rte_mbuf *hdr_mbuf; + struct batch_copy_elem *batch_copy = vq->batch_copy_elems; + struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; + int error = 0; + + if (unlikely(m == NULL)) { + error = -1; + goto out; + } + + buf_addr = buf_vec[vec_idx].buf_addr; + buf_iova = buf_vec[vec_idx].buf_iova; + buf_len = buf_vec[vec_idx].buf_len; + + if (nr_vec > 1) + rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr); + + if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { + error = -1; + goto out; + } + + hdr_mbuf = m; + hdr_addr = buf_addr; + if (unlikely(buf_len < dev->vhost_hlen)) + hdr = &tmp_hdr; + else + hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; + + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", + dev->vid, num_buffers); + + if (unlikely(buf_len < dev->vhost_hlen)) { + buf_offset = dev->vhost_hlen - buf_len; + vec_idx++; + buf_addr = buf_vec[vec_idx].buf_addr; + buf_iova = buf_vec[vec_idx].buf_iova; + buf_len = buf_vec[vec_idx].buf_len; + buf_avail = buf_len - buf_offset; + } else { + buf_offset = dev->vhost_hlen; + buf_avail = buf_len - dev->vhost_hlen; + } + + mbuf_avail = rte_pktmbuf_data_len(m); + mbuf_offset = 0; + while (mbuf_avail != 0 || m->next != NULL) { + /* done with current buf, get the next one */ + if (buf_avail == 0) { + vec_idx++; + if (unlikely(vec_idx >= nr_vec)) { + error = -1; + goto out; + } + + buf_addr = buf_vec[vec_idx].buf_addr; + buf_iova = buf_vec[vec_idx].buf_iova; + buf_len = buf_vec[vec_idx].buf_len; + + /* Prefetch next buffer address. */ + if (vec_idx + 1 < nr_vec) + rte_prefetch0((void *)(uintptr_t) + buf_vec[vec_idx + 1].buf_addr); + buf_offset = 0; + buf_avail = buf_len; + } + + /* done with current mbuf, get the next one */ + if (mbuf_avail == 0) { + m = m->next; + + mbuf_offset = 0; + mbuf_avail = rte_pktmbuf_data_len(m); + } + + if (hdr_addr) { + virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); + if (rxvq_is_mergeable(dev)) + ASSIGN_UNLESS_EQUAL(hdr->num_buffers, + num_buffers); + + if (unlikely(hdr == &tmp_hdr)) { + uint64_t len; + uint64_t remain = dev->vhost_hlen; + uint64_t src = (uint64_t)(uintptr_t)hdr, dst; + uint64_t iova = buf_vec[0].buf_iova; + uint16_t hdr_vec_idx = 0; + + while (remain) { + len = RTE_MIN(remain, + buf_vec[hdr_vec_idx].buf_len); + dst = buf_vec[hdr_vec_idx].buf_addr; + rte_memcpy((void *)(uintptr_t)dst, + (void *)(uintptr_t)src, + len); + + PRINT_PACKET(dev, (uintptr_t)dst, + (uint32_t)len, 0); + vhost_log_cache_write(dev, vq, + iova, len); + + remain -= len; + iova += len; + src += len; + hdr_vec_idx++; + } + } else { + PRINT_PACKET(dev, (uintptr_t)hdr_addr, + dev->vhost_hlen, 0); + vhost_log_cache_write(dev, vq, + buf_vec[0].buf_iova, + dev->vhost_hlen); + } + + hdr_addr = 0; + } + + cpy_len = RTE_MIN(buf_avail, mbuf_avail); + + if (likely(cpy_len > MAX_BATCH_LEN || + vq->batch_copy_nb_elems >= vq->size)) { + rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), cpy_len); - PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + vhost_log_cache_write(dev, vq, buf_iova + buf_offset, + cpy_len); + PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset), cpy_len, 0); } else { - batch_copy[copy_nb].dst = - (void *)((uintptr_t)(desc_addr + desc_offset)); - batch_copy[copy_nb].src = + batch_copy[vq->batch_copy_nb_elems].dst = + (void *)((uintptr_t)(buf_addr + buf_offset)); + batch_copy[vq->batch_copy_nb_elems].src = rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); - batch_copy[copy_nb].log_addr = - buf_vec[vec_idx].buf_addr + desc_offset; - batch_copy[copy_nb].len = cpy_len; - copy_nb++; + batch_copy[vq->batch_copy_nb_elems].log_addr = + buf_iova + buf_offset; + batch_copy[vq->batch_copy_nb_elems].len = cpy_len; + vq->batch_copy_nb_elems++; } mbuf_avail -= cpy_len; mbuf_offset += cpy_len; - desc_avail -= cpy_len; - desc_offset += cpy_len; + buf_avail -= cpy_len; + buf_offset += cpy_len; } out: - vq->batch_copy_nb_elems = copy_nb; return error; } static __rte_always_inline uint32_t -virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, +virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count) { - struct vhost_virtqueue *vq; uint32_t pkt_idx = 0; uint16_t num_buffers; struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint16_t avail_head; - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); - return 0; - } + rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); + avail_head = *((volatile uint16_t *)&vq->avail->idx); - vq = dev->virtqueue[queue_id]; + for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { + uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; + uint16_t nr_vec = 0; - rte_spinlock_lock(&vq->access_lock); + if (unlikely(reserve_avail_buf_split(dev, vq, + pkt_len, buf_vec, &num_buffers, + avail_head, &nr_vec) < 0)) { + VHOST_LOG_DEBUG(VHOST_DATA, + "(%d) failed to get enough desc from vring\n", + dev->vid); + vq->shadow_used_idx -= num_buffers; + break; + } - if (unlikely(vq->enabled == 0)) - goto out_access_unlock; + rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr); - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) - vhost_user_iotlb_rd_lock(vq); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", + dev->vid, vq->last_avail_idx, + vq->last_avail_idx + num_buffers); - if (unlikely(vq->access_ok == 0)) - if (unlikely(vring_translate(dev, vq) < 0)) - goto out; + if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx], + buf_vec, nr_vec, + num_buffers) < 0) { + vq->shadow_used_idx -= num_buffers; + break; + } - count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); - if (count == 0) - goto out; + vq->last_avail_idx += num_buffers; + } - vq->batch_copy_nb_elems = 0; + do_data_copy_enqueue(dev, vq); - rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); + if (likely(vq->shadow_used_idx)) { + flush_shadow_used_ring_split(dev, vq); + vhost_vring_call_split(dev, vq); + } + + return pkt_idx; +} + +static __rte_always_inline uint32_t +virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, uint32_t count) +{ + uint32_t pkt_idx = 0; + uint16_t num_buffers; + struct buf_vector buf_vec[BUF_VECTOR_MAX]; - vq->shadow_used_idx = 0; - avail_head = *((volatile uint16_t *)&vq->avail->idx); for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; + uint16_t nr_vec = 0; + uint16_t nr_descs = 0; - if (unlikely(reserve_avail_buf_mergeable(dev, vq, - pkt_len, buf_vec, &num_buffers, - avail_head) < 0)) { - LOG_DEBUG(VHOST_DATA, + if (unlikely(reserve_avail_buf_packed(dev, vq, + pkt_len, buf_vec, &nr_vec, + &num_buffers, &nr_descs) < 0)) { + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) failed to get enough desc from vring\n", dev->vid); vq->shadow_used_idx -= num_buffers; break; } - LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", + rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr); + + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", dev->vid, vq->last_avail_idx, vq->last_avail_idx + num_buffers); - if (copy_mbuf_to_desc_mergeable(dev, vq, pkts[pkt_idx], - buf_vec, num_buffers) < 0) { + if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx], + buf_vec, nr_vec, + num_buffers) < 0) { vq->shadow_used_idx -= num_buffers; break; } - vq->last_avail_idx += num_buffers; + vq->last_avail_idx += nr_descs; + if (vq->last_avail_idx >= vq->size) { + vq->last_avail_idx -= vq->size; + vq->avail_wrap_counter ^= 1; + } } do_data_copy_enqueue(dev, vq); if (likely(vq->shadow_used_idx)) { - flush_shadow_used_ring(dev, vq); - vhost_vring_call(dev, vq); + flush_shadow_used_ring_packed(dev, vq); + vhost_vring_call_packed(dev, vq); } + return pkt_idx; +} + +static __rte_always_inline uint32_t +virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint32_t count) +{ + struct vhost_virtqueue *vq; + + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + + rte_spinlock_lock(&vq->access_lock); + + if (unlikely(vq->enabled == 0)) + goto out_access_unlock; + + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_lock(vq); + + if (unlikely(vq->access_ok == 0)) + if (unlikely(vring_translate(dev, vq) < 0)) + goto out; + + count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); + if (count == 0) + goto out; + + if (vq_is_packed(dev)) + count = virtio_dev_rx_packed(dev, vq, pkts, count); + else + count = virtio_dev_rx_split(dev, vq, pkts, count); + out: if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) vhost_user_iotlb_rd_unlock(vq); @@ -691,7 +926,7 @@ out: out_access_unlock: rte_spinlock_unlock(&vq->access_lock); - return pkt_idx; + return count; } uint16_t @@ -710,10 +945,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id, return 0; } - if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) - return virtio_dev_merge_rx(dev, queue_id, pkts, count); - else - return virtio_dev_rx(dev, queue_id, pkts, count); + return virtio_dev_rx(dev, queue_id, pkts, count); } static inline bool @@ -838,42 +1070,62 @@ put_zmbuf(struct zcopy_mbuf *zmbuf) static __rte_always_inline int copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct vring_desc *descs, uint16_t max_desc, - struct rte_mbuf *m, uint16_t desc_idx, - struct rte_mempool *mbuf_pool) + struct buf_vector *buf_vec, uint16_t nr_vec, + struct rte_mbuf *m, struct rte_mempool *mbuf_pool) { - struct vring_desc *desc; - uint64_t desc_addr; - uint32_t desc_avail, desc_offset; + uint32_t buf_avail, buf_offset; + uint64_t buf_addr, buf_iova, buf_len; uint32_t mbuf_avail, mbuf_offset; uint32_t cpy_len; struct rte_mbuf *cur = m, *prev = m; + struct virtio_net_hdr tmp_hdr; struct virtio_net_hdr *hdr = NULL; /* A counter to avoid desc dead loop chain */ - uint32_t nr_desc = 1; + uint16_t vec_idx = 0; struct batch_copy_elem *batch_copy = vq->batch_copy_elems; - uint16_t copy_nb = vq->batch_copy_nb_elems; int error = 0; - desc = &descs[desc_idx]; - if (unlikely((desc->len < dev->vhost_hlen)) || - (desc->flags & VRING_DESC_F_INDIRECT)) { - error = -1; - goto out; - } + buf_addr = buf_vec[vec_idx].buf_addr; + buf_iova = buf_vec[vec_idx].buf_iova; + buf_len = buf_vec[vec_idx].buf_len; - desc_addr = vhost_iova_to_vva(dev, - vq, desc->addr, - desc->len, - VHOST_ACCESS_RO); - if (unlikely(!desc_addr)) { + if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { error = -1; goto out; } + if (likely(nr_vec > 1)) + rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr); + if (virtio_net_with_host_offload(dev)) { - hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); - rte_prefetch0(hdr); + if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { + uint64_t len; + uint64_t remain = sizeof(struct virtio_net_hdr); + uint64_t src; + uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr; + uint16_t hdr_vec_idx = 0; + + /* + * No luck, the virtio-net header doesn't fit + * in a contiguous virtual area. + */ + while (remain) { + len = RTE_MIN(remain, + buf_vec[hdr_vec_idx].buf_len); + src = buf_vec[hdr_vec_idx].buf_addr; + rte_memcpy((void *)(uintptr_t)dst, + (void *)(uintptr_t)src, len); + + remain -= len; + dst += len; + hdr_vec_idx++; + } + + hdr = &tmp_hdr; + } else { + hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); + rte_prefetch0(hdr); + } } /* @@ -881,41 +1133,40 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, * for Tx: the first for storing the header, and others * for storing the data. */ - if (likely((desc->len == dev->vhost_hlen) && - (desc->flags & VRING_DESC_F_NEXT) != 0)) { - desc = &descs[desc->next]; - if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) { - error = -1; - goto out; - } - - desc_addr = vhost_iova_to_vva(dev, - vq, desc->addr, - desc->len, - VHOST_ACCESS_RO); - if (unlikely(!desc_addr)) { - error = -1; + if (unlikely(buf_len < dev->vhost_hlen)) { + buf_offset = dev->vhost_hlen - buf_len; + vec_idx++; + buf_addr = buf_vec[vec_idx].buf_addr; + buf_iova = buf_vec[vec_idx].buf_iova; + buf_len = buf_vec[vec_idx].buf_len; + buf_avail = buf_len - buf_offset; + } else if (buf_len == dev->vhost_hlen) { + if (unlikely(++vec_idx >= nr_vec)) goto out; - } + buf_addr = buf_vec[vec_idx].buf_addr; + buf_iova = buf_vec[vec_idx].buf_iova; + buf_len = buf_vec[vec_idx].buf_len; - desc_offset = 0; - desc_avail = desc->len; - nr_desc += 1; + buf_offset = 0; + buf_avail = buf_len; } else { - desc_avail = desc->len - dev->vhost_hlen; - desc_offset = dev->vhost_hlen; + buf_offset = dev->vhost_hlen; + buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; } - rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); + rte_prefetch0((void *)(uintptr_t) + (buf_addr + buf_offset)); - PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0); + PRINT_PACKET(dev, + (uintptr_t)(buf_addr + buf_offset), + (uint32_t)buf_avail, 0); mbuf_offset = 0; mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; while (1) { uint64_t hpa; - cpy_len = RTE_MIN(desc_avail, mbuf_avail); + cpy_len = RTE_MIN(buf_avail, mbuf_avail); /* * A desc buf might across two host physical pages that are @@ -923,11 +1174,11 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, * will be copied even though zero copy is enabled. */ if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev, - desc->addr + desc_offset, cpy_len)))) { + buf_iova + buf_offset, cpy_len)))) { cur->data_len = cpy_len; cur->data_off = 0; - cur->buf_addr = (void *)(uintptr_t)(desc_addr - + desc_offset); + cur->buf_addr = + (void *)(uintptr_t)(buf_addr + buf_offset); cur->buf_iova = hpa; /* @@ -937,61 +1188,53 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, mbuf_avail = cpy_len; } else { if (likely(cpy_len > MAX_BATCH_LEN || - copy_nb >= vq->size || + vq->batch_copy_nb_elems >= vq->size || (hdr && cur == m))) { rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), - (void *)((uintptr_t)(desc_addr + - desc_offset)), + (void *)((uintptr_t)(buf_addr + + buf_offset)), cpy_len); } else { - batch_copy[copy_nb].dst = + batch_copy[vq->batch_copy_nb_elems].dst = rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset); - batch_copy[copy_nb].src = - (void *)((uintptr_t)(desc_addr + - desc_offset)); - batch_copy[copy_nb].len = cpy_len; - copy_nb++; + batch_copy[vq->batch_copy_nb_elems].src = + (void *)((uintptr_t)(buf_addr + + buf_offset)); + batch_copy[vq->batch_copy_nb_elems].len = + cpy_len; + vq->batch_copy_nb_elems++; } } mbuf_avail -= cpy_len; mbuf_offset += cpy_len; - desc_avail -= cpy_len; - desc_offset += cpy_len; + buf_avail -= cpy_len; + buf_offset += cpy_len; - /* This desc reaches to its end, get the next one */ - if (desc_avail == 0) { - if ((desc->flags & VRING_DESC_F_NEXT) == 0) + /* This buf reaches to its end, get the next one */ + if (buf_avail == 0) { + if (++vec_idx >= nr_vec) break; - if (unlikely(desc->next >= max_desc || - ++nr_desc > max_desc)) { - error = -1; - goto out; - } - desc = &descs[desc->next]; - if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) { - error = -1; - goto out; - } + buf_addr = buf_vec[vec_idx].buf_addr; + buf_iova = buf_vec[vec_idx].buf_iova; + buf_len = buf_vec[vec_idx].buf_len; - desc_addr = vhost_iova_to_vva(dev, - vq, desc->addr, - desc->len, - VHOST_ACCESS_RO); - if (unlikely(!desc_addr)) { - error = -1; - goto out; - } - - rte_prefetch0((void *)(uintptr_t)desc_addr); + /* + * Prefecth desc n + 1 buffer while + * desc n buffer is processed. + */ + if (vec_idx + 1 < nr_vec) + rte_prefetch0((void *)(uintptr_t) + buf_vec[vec_idx + 1].buf_addr); - desc_offset = 0; - desc_avail = desc->len; + buf_offset = 0; + buf_avail = buf_len; - PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); + PRINT_PACKET(dev, (uintptr_t)buf_addr, + (uint32_t)buf_avail, 0); } /* @@ -1027,38 +1270,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, vhost_dequeue_offload(hdr, m); out: - vq->batch_copy_nb_elems = copy_nb; return error; } -static __rte_always_inline void -update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint32_t used_idx, uint32_t desc_idx) -{ - vq->used->ring[used_idx].id = desc_idx; - vq->used->ring[used_idx].len = 0; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); -} - -static __rte_always_inline void -update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint32_t count) -{ - if (unlikely(count == 0)) - return; - - rte_smp_wmb(); - rte_smp_rmb(); - - vq->used->idx += count; - vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), - sizeof(vq->used->idx)); - vhost_vring_call(dev, vq); -} - static __rte_always_inline struct zcopy_mbuf * get_zmbuf(struct vhost_virtqueue *vq) { @@ -1118,66 +1333,137 @@ restore_mbuf(struct rte_mbuf *m) } } -uint16_t -rte_vhost_dequeue_burst(int vid, uint16_t queue_id, +static __rte_always_inline uint16_t +virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) { - struct virtio_net *dev; - struct rte_mbuf *rarp_mbuf = NULL; - struct vhost_virtqueue *vq; - uint32_t desc_indexes[MAX_PKT_BURST]; - uint32_t used_idx; - uint32_t i = 0; + uint16_t i; uint16_t free_entries; - uint16_t avail_idx; - dev = get_device(vid); - if (!dev) - return 0; + if (unlikely(dev->dequeue_zero_copy)) { + struct zcopy_mbuf *zmbuf, *next; - if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { - RTE_LOG(ERR, VHOST_DATA, - "(%d) %s: built-in vhost net backend is disabled.\n", - dev->vid, __func__); - return 0; - } + for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list); + zmbuf != NULL; zmbuf = next) { + next = TAILQ_NEXT(zmbuf, next); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); - return 0; + if (mbuf_is_consumed(zmbuf->mbuf)) { + update_shadow_used_ring_split(vq, + zmbuf->desc_idx, 0); + TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); + restore_mbuf(zmbuf->mbuf); + rte_pktmbuf_free(zmbuf->mbuf); + put_zmbuf(zmbuf); + vq->nr_zmbuf -= 1; + } + } + + flush_shadow_used_ring_split(dev, vq); + vhost_vring_call_split(dev, vq); } - vq = dev->virtqueue[queue_id]; + rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); - if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) + free_entries = *((volatile uint16_t *)&vq->avail->idx) - + vq->last_avail_idx; + if (free_entries == 0) return 0; - if (unlikely(vq->enabled == 0)) - goto out_access_unlock; + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - vq->batch_copy_nb_elems = 0; + count = RTE_MIN(count, MAX_PKT_BURST); + count = RTE_MIN(count, free_entries); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", + dev->vid, count); - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) - vhost_user_iotlb_rd_lock(vq); + for (i = 0; i < count; i++) { + struct buf_vector buf_vec[BUF_VECTOR_MAX]; + uint16_t head_idx, dummy_len; + uint16_t nr_vec = 0; + int err; - if (unlikely(vq->access_ok == 0)) - if (unlikely(vring_translate(dev, vq) < 0)) - goto out; + if (unlikely(fill_vec_buf_split(dev, vq, + vq->last_avail_idx + i, + &nr_vec, buf_vec, + &head_idx, &dummy_len, + VHOST_ACCESS_RO) < 0)) + break; + + if (likely(dev->dequeue_zero_copy == 0)) + update_shadow_used_ring_split(vq, head_idx, 0); + + rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr); + + pkts[i] = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(pkts[i] == NULL)) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); + break; + } + + err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], + mbuf_pool); + if (unlikely(err)) { + rte_pktmbuf_free(pkts[i]); + break; + } + + if (unlikely(dev->dequeue_zero_copy)) { + struct zcopy_mbuf *zmbuf; + + zmbuf = get_zmbuf(vq); + if (!zmbuf) { + rte_pktmbuf_free(pkts[i]); + break; + } + zmbuf->mbuf = pkts[i]; + zmbuf->desc_idx = head_idx; + + /* + * Pin lock the mbuf; we will check later to see + * whether the mbuf is freed (when we are the last + * user) or not. If that's the case, we then could + * update the used ring safely. + */ + rte_mbuf_refcnt_update(pkts[i], 1); + + vq->nr_zmbuf += 1; + TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next); + } + } + vq->last_avail_idx += i; + + if (likely(dev->dequeue_zero_copy == 0)) { + do_data_copy_dequeue(vq); + if (unlikely(i < count)) + vq->shadow_used_idx = i; + flush_shadow_used_ring_split(dev, vq); + vhost_vring_call_split(dev, vq); + } + + return i; +} + +static __rte_always_inline uint16_t +virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) +{ + uint16_t i; + + rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); if (unlikely(dev->dequeue_zero_copy)) { struct zcopy_mbuf *zmbuf, *next; - int nr_updated = 0; for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list); zmbuf != NULL; zmbuf = next) { next = TAILQ_NEXT(zmbuf, next); if (mbuf_is_consumed(zmbuf->mbuf)) { - used_idx = vq->last_used_idx++ & (vq->size - 1); - update_used_ring(dev, vq, used_idx, - zmbuf->desc_idx); - nr_updated += 1; + update_shadow_used_ring_packed(vq, + zmbuf->desc_idx, + 0, + zmbuf->desc_count); TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); restore_mbuf(zmbuf->mbuf); @@ -1187,93 +1473,34 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, } } - update_used_idx(dev, vq, nr_updated); + flush_shadow_used_ring_packed(dev, vq); + vhost_vring_call_packed(dev, vq); } - /* - * Construct a RARP broadcast packet, and inject it to the "pkts" - * array, to looks like that guest actually send such packet. - * - * Check user_send_rarp() for more information. - * - * broadcast_rarp shares a cacheline in the virtio_net structure - * with some fields that are accessed during enqueue and - * rte_atomic16_cmpset() causes a write if using cmpxchg. This could - * result in false sharing between enqueue and dequeue. - * - * Prevent unnecessary false sharing by reading broadcast_rarp first - * and only performing cmpset if the read indicates it is likely to - * be set. - */ - - if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) && - rte_atomic16_cmpset((volatile uint16_t *) - &dev->broadcast_rarp.cnt, 1, 0))) { - - rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); - if (rarp_mbuf == NULL) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to make RARP packet.\n"); - return 0; - } - count -= 1; - } - - free_entries = *((volatile uint16_t *)&vq->avail->idx) - - vq->last_avail_idx; - if (free_entries == 0) - goto out; - - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - - /* Prefetch available and used ring */ - avail_idx = vq->last_avail_idx & (vq->size - 1); - used_idx = vq->last_used_idx & (vq->size - 1); - rte_prefetch0(&vq->avail->ring[avail_idx]); - rte_prefetch0(&vq->used->ring[used_idx]); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); count = RTE_MIN(count, MAX_PKT_BURST); - count = RTE_MIN(count, free_entries); - LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", dev->vid, count); - /* Retrieve all of the head indexes first to avoid caching issues. */ - for (i = 0; i < count; i++) { - avail_idx = (vq->last_avail_idx + i) & (vq->size - 1); - used_idx = (vq->last_used_idx + i) & (vq->size - 1); - desc_indexes[i] = vq->avail->ring[avail_idx]; - - if (likely(dev->dequeue_zero_copy == 0)) - update_used_ring(dev, vq, used_idx, desc_indexes[i]); - } - - /* Prefetch descriptor index. */ - rte_prefetch0(&vq->desc[desc_indexes[0]]); for (i = 0; i < count; i++) { - struct vring_desc *desc; - uint16_t sz, idx; + struct buf_vector buf_vec[BUF_VECTOR_MAX]; + uint16_t buf_id, dummy_len; + uint16_t desc_count, nr_vec = 0; int err; - if (likely(i + 1 < count)) - rte_prefetch0(&vq->desc[desc_indexes[i + 1]]); + if (unlikely(fill_vec_buf_packed(dev, vq, + vq->last_avail_idx, &desc_count, + buf_vec, &nr_vec, + &buf_id, &dummy_len, + VHOST_ACCESS_RW) < 0)) + break; - if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) { - desc = (struct vring_desc *)(uintptr_t) - vhost_iova_to_vva(dev, vq, - vq->desc[desc_indexes[i]].addr, - sizeof(*desc), - VHOST_ACCESS_RO); - if (unlikely(!desc)) - break; + if (likely(dev->dequeue_zero_copy == 0)) + update_shadow_used_ring_packed(vq, buf_id, 0, + desc_count); - rte_prefetch0(desc); - sz = vq->desc[desc_indexes[i]].len / sizeof(*desc); - idx = 0; - } else { - desc = vq->desc; - sz = vq->size; - idx = desc_indexes[i]; - } + rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr); pkts[i] = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(pkts[i] == NULL)) { @@ -1282,8 +1509,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, break; } - err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx, - mbuf_pool); + err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], + mbuf_pool); if (unlikely(err)) { rte_pktmbuf_free(pkts[i]); break; @@ -1298,7 +1525,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, break; } zmbuf->mbuf = pkts[i]; - zmbuf->desc_idx = desc_indexes[i]; + zmbuf->desc_idx = buf_id; + zmbuf->desc_count = desc_count; /* * Pin lock the mbuf; we will check later to see @@ -1311,15 +1539,103 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, vq->nr_zmbuf += 1; TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next); } + + vq->last_avail_idx += desc_count; + if (vq->last_avail_idx >= vq->size) { + vq->last_avail_idx -= vq->size; + vq->avail_wrap_counter ^= 1; + } } - vq->last_avail_idx += i; if (likely(dev->dequeue_zero_copy == 0)) { do_data_copy_dequeue(vq); - vq->last_used_idx += i; - update_used_idx(dev, vq, i); + if (unlikely(i < count)) + vq->shadow_used_idx = i; + flush_shadow_used_ring_packed(dev, vq); + vhost_vring_call_packed(dev, vq); } + return i; +} + +uint16_t +rte_vhost_dequeue_burst(int vid, uint16_t queue_id, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) +{ + struct virtio_net *dev; + struct rte_mbuf *rarp_mbuf = NULL; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (!dev) + return 0; + + if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { + RTE_LOG(ERR, VHOST_DATA, + "(%d) %s: built-in vhost net backend is disabled.\n", + dev->vid, __func__); + return 0; + } + + if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + + if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) + return 0; + + if (unlikely(vq->enabled == 0)) { + count = 0; + goto out_access_unlock; + } + + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_lock(vq); + + if (unlikely(vq->access_ok == 0)) + if (unlikely(vring_translate(dev, vq) < 0)) { + count = 0; + goto out; + } + + /* + * Construct a RARP broadcast packet, and inject it to the "pkts" + * array, to looks like that guest actually send such packet. + * + * Check user_send_rarp() for more information. + * + * broadcast_rarp shares a cacheline in the virtio_net structure + * with some fields that are accessed during enqueue and + * rte_atomic16_cmpset() causes a write if using cmpxchg. This could + * result in false sharing between enqueue and dequeue. + * + * Prevent unnecessary false sharing by reading broadcast_rarp first + * and only performing cmpset if the read indicates it is likely to + * be set. + */ + if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) && + rte_atomic16_cmpset((volatile uint16_t *) + &dev->broadcast_rarp.cnt, 1, 0))) { + + rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); + if (rarp_mbuf == NULL) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to make RARP packet.\n"); + count = 0; + goto out; + } + count -= 1; + } + + if (vq_is_packed(dev)) + count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count); + else + count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count); + out: if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) vhost_user_iotlb_rd_unlock(vq); @@ -1332,10 +1648,10 @@ out_access_unlock: * Inject it to the head of "pkts" array, so that switch's mac * learning table will get updated first. */ - memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *)); + memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *)); pkts[0] = rarp_mbuf; - i += 1; + count += 1; } - return i; + return count; } diff --git a/lib/meson.build b/lib/meson.build index ef615917..eb91f100 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -9,22 +9,27 @@ # given as a dep, no need to mention ring. This is especially true for the # core libs which are widely reused, so their deps are kept to a minimum. libraries = [ 'compat', # just a header, used for versioning - 'eal', 'ring', 'mempool', 'mbuf', 'net', 'ether', 'pci', # core + 'kvargs', + 'eal', 'ring', 'mempool', 'mbuf', 'net', 'ethdev', 'pci', # core 'metrics', # bitrate/latency stats depends on this 'hash', # efd depends on this - 'kvargs', # cryptodev depends on this + 'timer', # eventdev depends on this 'acl', 'bbdev', 'bitratestats', 'cfgfile', - 'cmdline', 'cryptodev', + 'cmdline', 'compressdev', 'cryptodev', 'distributor', 'efd', 'eventdev', 'gro', 'gso', 'ip_frag', 'jobstats', 'kni', 'latencystats', 'lpm', 'member', - 'meter', 'power', 'pdump', - 'reorder', 'sched', 'security', 'timer', 'vhost', + 'meter', 'power', 'pdump', 'rawdev', + 'reorder', 'sched', 'security', 'vhost', # add pkt framework libs which use other libs from above 'port', 'table', 'pipeline', # flow_classify lib depends on pkt framework table lib - 'flow_classify'] + 'flow_classify', 'bpf'] +default_cflags = machine_args +if cc.has_argument('-Wno-format-truncation') + default_cflags += '-Wno-format-truncation' +endif foreach l:libraries build = true name = l @@ -33,7 +38,7 @@ foreach l:libraries sources = [] headers = [] includes = [] - cflags = machine_args + cflags = default_cflags objs = [] # other object files to link against, used e.g. for # instruction-set optimized versions of code @@ -41,9 +46,12 @@ foreach l:libraries # external package/library requirements ext_deps = [] deps = ['eal'] # eal is standard dependency except for itself - if l == 'eal' + if l == 'kvargs' deps = [] endif + if l == 'eal' + deps = ['kvargs'] + endif dir_name = 'librte_' + l subdir(dir_name) @@ -63,6 +71,10 @@ foreach l:libraries shared_deps = ext_deps static_deps = ext_deps foreach d:deps + if not is_variable('shared_rte_' + d) + error('Missing dependency ' + d + + ' for library ' + lib_name) + endif shared_deps += [get_variable('shared_rte_' + d)] static_deps += [get_variable('static_rte_' + d)] endforeach @@ -95,7 +107,7 @@ foreach l:libraries # then use pre-build objects to build shared lib sources = [] - objs += static_lib.extract_all_objects() + objs += static_lib.extract_all_objects(recursive: false) version_map = '@0@/@1@/rte_@2@_version.map'.format( meson.current_source_dir(), dir_name, name) shared_lib = shared_library(libname, -- cgit 1.2.3-korg